From bc86256d2e80e6731a2055175d9a32cf96eb71f8 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:51 +0300 Subject: affs: stop setting bm_flags AFFS stores values '1' and '2' in 'bm_flags', and I fail to see any logic when it prefers one or another. AFFS writes '1' only from '->put_super()', while '->sync_fs()' and '->write_super()' store value '2'. So on the first glance, it looks like we want to have '1' if we unmount. However, this does not really happen in these cases: 1. superblock is written via 'write_super()' then we unmount; 2. we re-mount R/O, then unmount. which are quite typical. I could not find good documentation describing this field, except of one random piece of documentation in the internet which says that -1 means that the root block is valid, which is not consistent with what we have in the Linux AFFS driver. Jan Kara commented on this: "I have some vague recollection that on Amiga boolean was usually encoded as: 0 == false, ~0 == -1 == true. But it has been ages..." Thus, my conclusion is that value of '1' is as good as value of '2' and we can just always use '2'. An Jan Kara suggested to go further: "generally bm_flags handling looks strange. If they are 0, we mount fs read only and thus cannot change them. If they are != 0, we write 2 there. So IMHO if you just removed bm_flags setting, nothing will really happen." So this patch removes the bm_flags setting completely. This makes the "clean" argument of the 'affs_commit_super()' function unneeded, so it is also removed. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/affs/super.c b/fs/affs/super.c index 0782653..1d42e46 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -25,13 +25,12 @@ static int affs_statfs(struct dentry *dentry, struct kstatfs *buf); static int affs_remount (struct super_block *sb, int *flags, char *data); static void -affs_commit_super(struct super_block *sb, int wait, int clean) +affs_commit_super(struct super_block *sb, int wait) { struct affs_sb_info *sbi = AFFS_SB(sb); struct buffer_head *bh = sbi->s_root_bh; struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh); - tail->bm_flag = cpu_to_be32(clean); secs_to_datestamp(get_seconds(), &tail->disk_change); affs_fix_checksum(sb, bh); mark_buffer_dirty(bh); @@ -46,7 +45,7 @@ affs_put_super(struct super_block *sb) pr_debug("AFFS: put_super()\n"); if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt) - affs_commit_super(sb, 1, 1); + affs_commit_super(sb, 1); kfree(sbi->s_prefix); affs_free_bitmap(sb); @@ -60,7 +59,7 @@ affs_write_super(struct super_block *sb) { lock_super(sb); if (!(sb->s_flags & MS_RDONLY)) - affs_commit_super(sb, 1, 2); + affs_commit_super(sb, 1); sb->s_dirt = 0; unlock_super(sb); @@ -71,7 +70,7 @@ static int affs_sync_fs(struct super_block *sb, int wait) { lock_super(sb); - affs_commit_super(sb, wait, 2); + affs_commit_super(sb, wait); sb->s_dirt = 0; unlock_super(sb); return 0; -- cgit v0.10.2 From c9753b1d20e13c94d15a1c8b252a696744bd22a2 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:52 +0300 Subject: affs: remove useless superblock writeout on unmount We do not need to write out the superblock from '->put_super()' because VFS has already called '->sync_fs()' by this time and the superblock has already been written out. Thus, remove the 'affs_commit_super()' infocation from 'affs_put_super()'. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/affs/super.c b/fs/affs/super.c index 1d42e46..12b4f58 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -44,9 +44,6 @@ affs_put_super(struct super_block *sb) struct affs_sb_info *sbi = AFFS_SB(sb); pr_debug("AFFS: put_super()\n"); - if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt) - affs_commit_super(sb, 1); - kfree(sbi->s_prefix); affs_free_bitmap(sb); affs_brelse(sbi->s_root_bh); -- cgit v0.10.2 From 0164b1a32e6849121ea73ef3124a2994951a4713 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:53 +0300 Subject: affs: remove useless superblock writeout on remount We do not need to write out the superblock from '->remount_fs()' because VFS has already called '->sync_fs()' by this time and the superblock has already been written out. Thus, remove the 'affs_write_super()' infocation from 'affs_remount()'. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/affs/super.c b/fs/affs/super.c index 12b4f58..c837e43 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -545,10 +545,9 @@ affs_remount(struct super_block *sb, int *flags, char *data) if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) return 0; - if (*flags & MS_RDONLY) { - affs_write_super(sb); + if (*flags & MS_RDONLY) affs_free_bitmap(sb); - } else + else res = affs_init_bitmap(sb, flags); return res; -- cgit v0.10.2 From e0471c8d8abbc2b07fc82f7b02896d1637909319 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:54 +0300 Subject: affs: re-structure superblock locking a bit AFFS wants to serialize the superblock (the root block in AFFS terms) updates and uses 'lock_super()/unlock_super()' for these purposes. This patch pushes the locking down to the 'affs_commit_super()' from the callers. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/affs/super.c b/fs/affs/super.c index c837e43..4ceec56 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -31,11 +31,13 @@ affs_commit_super(struct super_block *sb, int wait) struct buffer_head *bh = sbi->s_root_bh; struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh); + lock_super(sb); secs_to_datestamp(get_seconds(), &tail->disk_change); affs_fix_checksum(sb, bh); mark_buffer_dirty(bh); if (wait) sync_dirty_buffer(bh); + unlock_super(sb); } static void @@ -54,22 +56,17 @@ affs_put_super(struct super_block *sb) static void affs_write_super(struct super_block *sb) { - lock_super(sb); if (!(sb->s_flags & MS_RDONLY)) affs_commit_super(sb, 1); sb->s_dirt = 0; - unlock_super(sb); - pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds()); } static int affs_sync_fs(struct super_block *sb, int wait) { - lock_super(sb); affs_commit_super(sb, wait); sb->s_dirt = 0; - unlock_super(sb); return 0; } -- cgit v0.10.2 From a837107439ea50116e59943556d6902c09e52772 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:55 +0300 Subject: affs: stop using lock_super The VFS's 'lock_super()' and 'unlock_super()' calls are deprecated and unwanted and just wait for a brave knight who'd kill them. This patch makes AFFS stop using them and use the buffer-head's own lock instead. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/affs/super.c b/fs/affs/super.c index 4ceec56..da7498d 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -31,13 +31,14 @@ affs_commit_super(struct super_block *sb, int wait) struct buffer_head *bh = sbi->s_root_bh; struct affs_root_tail *tail = AFFS_ROOT_TAIL(sb, bh); - lock_super(sb); + lock_buffer(bh); secs_to_datestamp(get_seconds(), &tail->disk_change); affs_fix_checksum(sb, bh); + unlock_buffer(bh); + mark_buffer_dirty(bh); if (wait) sync_dirty_buffer(bh); - unlock_super(sb); } static void -- cgit v0.10.2 From a215fef7edfdcd8948037ceb3060b9ae7ebcef8b Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:56 +0300 Subject: affs: introduce VFS superblock object back-reference Add an 'sb' VFS superblock back-reference to the 'struct affs_sb_info' data structure - we will need to find the VFS superblock from a 'struct affs_sb_info' object in the next patch, so this change is jut a preparation. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 1fceb32..5a726e9 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -100,6 +100,7 @@ struct affs_sb_info { char *s_prefix; /* Prefix for volumes and assigns. */ char s_volume[32]; /* Volume prefix for absolute symlinks. */ spinlock_t symlink_lock; /* protects the previous two */ + struct super_block *sb; /* the VFS superblock object */ }; #define SF_INTL 0x0001 /* International filesystem. */ diff --git a/fs/affs/super.c b/fs/affs/super.c index da7498d..0496cbb 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -299,6 +299,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) return -ENOMEM; sb->s_fs_info = sbi; + sbi->sb = sb; mutex_init(&sbi->s_bmlock); spin_lock_init(&sbi->symlink_lock); -- cgit v0.10.2 From 3dd847820d138c9d60764b0e920380373285ff10 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 6 Jun 2012 18:56:57 +0300 Subject: affs: get rid of affs_sync_super This patch makes affs stop using the VFS '->write_super()' method along with the 's_dirt' superblock flag, because they are on their way out. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblocks using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds, even if there are no diry superblocks, or there are no client file-systems which would need this (e.g., btrfs does not use '->write_super()'). So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super()' VFS service, and then remove it together with the kernel thread. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 5a726e9..3a130e2 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -3,6 +3,7 @@ #include #include #include +#include /* AmigaOS allows file names with up to 30 characters length. * Names longer than that will be silently truncated. If you @@ -101,6 +102,9 @@ struct affs_sb_info { char s_volume[32]; /* Volume prefix for absolute symlinks. */ spinlock_t symlink_lock; /* protects the previous two */ struct super_block *sb; /* the VFS superblock object */ + int work_queued; /* non-zero delayed work is queued */ + struct delayed_work sb_work; /* superblock flush delayed work */ + spinlock_t work_lock; /* protects sb_work and work_queued */ }; #define SF_INTL 0x0001 /* International filesystem. */ @@ -121,6 +125,8 @@ static inline struct affs_sb_info *AFFS_SB(struct super_block *sb) return sb->s_fs_info; } +void affs_mark_sb_dirty(struct super_block *sb); + /* amigaffs.c */ extern int affs_insert_hash(struct inode *inode, struct buffer_head *bh); diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c index 3e26271..6e0be43 100644 --- a/fs/affs/bitmap.c +++ b/fs/affs/bitmap.c @@ -103,7 +103,7 @@ affs_free_block(struct super_block *sb, u32 block) *(__be32 *)bh->b_data = cpu_to_be32(tmp - mask); mark_buffer_dirty(bh); - sb->s_dirt = 1; + affs_mark_sb_dirty(sb); bm->bm_free++; mutex_unlock(&sbi->s_bmlock); @@ -248,7 +248,7 @@ find_bit: *(__be32 *)bh->b_data = cpu_to_be32(tmp + mask); mark_buffer_dirty(bh); - sb->s_dirt = 1; + affs_mark_sb_dirty(sb); mutex_unlock(&sbi->s_bmlock); diff --git a/fs/affs/super.c b/fs/affs/super.c index 0496cbb..c70f1e5 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "affs.h" extern struct timezone sys_tz; @@ -47,6 +48,7 @@ affs_put_super(struct super_block *sb) struct affs_sb_info *sbi = AFFS_SB(sb); pr_debug("AFFS: put_super()\n"); + cancel_delayed_work_sync(&sbi->sb_work); kfree(sbi->s_prefix); affs_free_bitmap(sb); affs_brelse(sbi->s_root_bh); @@ -54,23 +56,45 @@ affs_put_super(struct super_block *sb) sb->s_fs_info = NULL; } -static void -affs_write_super(struct super_block *sb) -{ - if (!(sb->s_flags & MS_RDONLY)) - affs_commit_super(sb, 1); - sb->s_dirt = 0; - pr_debug("AFFS: write_super() at %lu, clean=2\n", get_seconds()); -} - static int affs_sync_fs(struct super_block *sb, int wait) { affs_commit_super(sb, wait); - sb->s_dirt = 0; return 0; } +static void flush_superblock(struct work_struct *work) +{ + struct affs_sb_info *sbi; + struct super_block *sb; + + sbi = container_of(work, struct affs_sb_info, sb_work.work); + sb = sbi->sb; + + spin_lock(&sbi->work_lock); + sbi->work_queued = 0; + spin_unlock(&sbi->work_lock); + + affs_commit_super(sb, 1); +} + +void affs_mark_sb_dirty(struct super_block *sb) +{ + struct affs_sb_info *sbi = AFFS_SB(sb); + unsigned long delay; + + if (sb->s_flags & MS_RDONLY) + return; + + spin_lock(&sbi->work_lock); + if (!sbi->work_queued) { + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + queue_delayed_work(system_long_wq, &sbi->sb_work, delay); + sbi->work_queued = 1; + } + spin_unlock(&sbi->work_lock); +} + static struct kmem_cache * affs_inode_cachep; static struct inode *affs_alloc_inode(struct super_block *sb) @@ -132,7 +156,6 @@ static const struct super_operations affs_sops = { .write_inode = affs_write_inode, .evict_inode = affs_evict_inode, .put_super = affs_put_super, - .write_super = affs_write_super, .sync_fs = affs_sync_fs, .statfs = affs_statfs, .remount_fs = affs_remount, @@ -302,6 +325,8 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) sbi->sb = sb; mutex_init(&sbi->s_bmlock); spin_lock_init(&sbi->symlink_lock); + spin_lock_init(&sbi->work_lock); + INIT_DELAYED_WORK(&sbi->sb_work, flush_superblock); if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block, &blocksize,&sbi->s_prefix, @@ -526,6 +551,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) return -EINVAL; } + flush_delayed_work_sync(&sbi->sb_work); replace_mount_options(sb, new_opts); sbi->s_flags = mount_flags; -- cgit v0.10.2 From d187663ef24cd3d033f0cbf2867e70b36a3a90b8 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Thu, 7 Jun 2012 15:45:00 -0700 Subject: fs/direct-io.c: adjust suspicious bit operation READ is 0, so the result of the bit-and operation is 0. Rewrite with == as done elsewhere in the same file. This problem was found using Coccinelle (http://coccinelle.lip6.fr/). Signed-off-by: Julia Lawall Reviewed-by: Jeff Moyer Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro diff --git a/fs/direct-io.c b/fs/direct-io.c index 0c85fae..1faf4cb 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1258,7 +1258,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, */ BUG_ON(retval == -EIOCBQUEUED); if (dio->is_async && retval == 0 && dio->result && - ((rw & READ) || (dio->result == sdio.size))) + ((rw == READ) || (dio->result == sdio.size))) retval = -EIOCBQUEUED; if (retval != -EIOCBQUEUED) -- cgit v0.10.2 From f7a99c5b7c8bd3d3f533c8b38274e33f3da9096e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 00:59:08 -0400 Subject: get rid of ->mnt_longterm it's enough to set ->mnt_ns of internal vfsmounts to something distinct from all struct mnt_namespace out there; then we can just use the check for ->mnt_ns != NULL in the fast path of mntput_no_expire() Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 4046904..44acb5b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2622,7 +2622,7 @@ global_root: if (!slash) error = prepend(buffer, buflen, "/", 1); if (!error) - error = real_mount(vfsmnt)->mnt_ns ? 1 : 2; + error = is_mounted(vfsmnt) ? 1 : 2; goto out; } diff --git a/fs/fs_struct.c b/fs/fs_struct.c index e159e68..5df4775 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -6,18 +6,6 @@ #include #include "internal.h" -static inline void path_get_longterm(struct path *path) -{ - path_get(path); - mnt_make_longterm(path->mnt); -} - -static inline void path_put_longterm(struct path *path) -{ - mnt_make_shortterm(path->mnt); - path_put(path); -} - /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. * It can block. @@ -26,7 +14,7 @@ void set_fs_root(struct fs_struct *fs, struct path *path) { struct path old_root; - path_get_longterm(path); + path_get(path); spin_lock(&fs->lock); write_seqcount_begin(&fs->seq); old_root = fs->root; @@ -34,7 +22,7 @@ void set_fs_root(struct fs_struct *fs, struct path *path) write_seqcount_end(&fs->seq); spin_unlock(&fs->lock); if (old_root.dentry) - path_put_longterm(&old_root); + path_put(&old_root); } /* @@ -45,7 +33,7 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) { struct path old_pwd; - path_get_longterm(path); + path_get(path); spin_lock(&fs->lock); write_seqcount_begin(&fs->seq); old_pwd = fs->pwd; @@ -54,7 +42,7 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) spin_unlock(&fs->lock); if (old_pwd.dentry) - path_put_longterm(&old_pwd); + path_put(&old_pwd); } static inline int replace_path(struct path *p, const struct path *old, const struct path *new) @@ -84,7 +72,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) write_seqcount_end(&fs->seq); while (hits--) { count++; - path_get_longterm(new_root); + path_get(new_root); } spin_unlock(&fs->lock); } @@ -92,13 +80,13 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) } while_each_thread(g, p); read_unlock(&tasklist_lock); while (count--) - path_put_longterm(old_root); + path_put(old_root); } void free_fs_struct(struct fs_struct *fs) { - path_put_longterm(&fs->root); - path_put_longterm(&fs->pwd); + path_put(&fs->root); + path_put(&fs->pwd); kmem_cache_free(fs_cachep, fs); } @@ -132,9 +120,9 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) spin_lock(&old->lock); fs->root = old->root; - path_get_longterm(&fs->root); + path_get(&fs->root); fs->pwd = old->pwd; - path_get_longterm(&fs->pwd); + path_get(&fs->pwd); spin_unlock(&old->lock); } return fs; diff --git a/fs/internal.h b/fs/internal.h index 18bc216..d2a23ff6 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -50,8 +50,6 @@ extern int copy_mount_string(const void __user *, char **); extern struct vfsmount *lookup_mnt(struct path *); extern int finish_automount(struct vfsmount *, struct path *); -extern void mnt_make_longterm(struct vfsmount *); -extern void mnt_make_shortterm(struct vfsmount *); extern int sb_prepare_remount_readonly(struct super_block *); extern void __init mnt_init(void); diff --git a/fs/mount.h b/fs/mount.h index 4ef36d9..05a2a11 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -22,7 +22,6 @@ struct mount { struct vfsmount mnt; #ifdef CONFIG_SMP struct mnt_pcp __percpu *mnt_pcp; - atomic_t mnt_longterm; /* how many of the refs are longterm */ #else int mnt_count; int mnt_writers; @@ -49,6 +48,8 @@ struct mount { int mnt_ghosts; }; +#define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ + static inline struct mount *real_mount(struct vfsmount *mnt) { return container_of(mnt, struct mount, mnt); @@ -59,6 +60,12 @@ static inline int mnt_has_parent(struct mount *mnt) return mnt != mnt->mnt_parent; } +static inline int is_mounted(struct vfsmount *mnt) +{ + /* neither detached nor internal? */ + return !IS_ERR_OR_NULL(real_mount(mnt)); +} + extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int); static inline void get_mnt_ns(struct mnt_namespace *ns) diff --git a/fs/namespace.c b/fs/namespace.c index 1e4a5fe..a524ea4 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -621,21 +621,6 @@ static void attach_mnt(struct mount *mnt, struct path *path) list_add_tail(&mnt->mnt_child, &real_mount(path->mnt)->mnt_mounts); } -static inline void __mnt_make_longterm(struct mount *mnt) -{ -#ifdef CONFIG_SMP - atomic_inc(&mnt->mnt_longterm); -#endif -} - -/* needs vfsmount lock for write */ -static inline void __mnt_make_shortterm(struct mount *mnt) -{ -#ifdef CONFIG_SMP - atomic_dec(&mnt->mnt_longterm); -#endif -} - /* * vfsmount lock must be held for write */ @@ -649,10 +634,8 @@ static void commit_tree(struct mount *mnt) BUG_ON(parent == mnt); list_add_tail(&head, &mnt->mnt_list); - list_for_each_entry(m, &head, mnt_list) { + list_for_each_entry(m, &head, mnt_list) m->mnt_ns = n; - __mnt_make_longterm(m); - } list_splice(&head, n->list.prev); @@ -804,7 +787,8 @@ static void mntput_no_expire(struct mount *mnt) put_again: #ifdef CONFIG_SMP br_read_lock(&vfsmount_lock); - if (likely(atomic_read(&mnt->mnt_longterm))) { + if (likely(mnt->mnt_ns)) { + /* shouldn't be the last one */ mnt_add_count(mnt, -1); br_read_unlock(&vfsmount_lock); return; @@ -1074,8 +1058,6 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill) list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); - if (p->mnt_ns) - __mnt_make_shortterm(p); p->mnt_ns = NULL; list_del_init(&p->mnt_child); if (mnt_has_parent(p)) { @@ -2209,23 +2191,6 @@ static struct mnt_namespace *alloc_mnt_ns(void) return new_ns; } -void mnt_make_longterm(struct vfsmount *mnt) -{ - __mnt_make_longterm(real_mount(mnt)); -} - -void mnt_make_shortterm(struct vfsmount *m) -{ -#ifdef CONFIG_SMP - struct mount *mnt = real_mount(m); - if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) - return; - br_write_lock(&vfsmount_lock); - atomic_dec(&mnt->mnt_longterm); - br_write_unlock(&vfsmount_lock); -#endif -} - /* * Allocate a new namespace structure and populate it with contents * copied from the namespace of the passed in task structure. @@ -2265,18 +2230,13 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, q = new; while (p) { q->mnt_ns = new_ns; - __mnt_make_longterm(q); if (fs) { if (&p->mnt == fs->root.mnt) { fs->root.mnt = mntget(&q->mnt); - __mnt_make_longterm(q); - mnt_make_shortterm(&p->mnt); rootmnt = &p->mnt; } if (&p->mnt == fs->pwd.mnt) { fs->pwd.mnt = mntget(&q->mnt); - __mnt_make_longterm(q); - mnt_make_shortterm(&p->mnt); pwdmnt = &p->mnt; } } @@ -2320,7 +2280,6 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) if (!IS_ERR(new_ns)) { struct mount *mnt = real_mount(m); mnt->mnt_ns = new_ns; - __mnt_make_longterm(mnt); new_ns->root = mnt; list_add(&new_ns->list, &mnt->mnt_list); } else { @@ -2615,7 +2574,7 @@ struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) * it is a longterm mount, don't release mnt until * we unmount before file sys is unregistered */ - mnt_make_longterm(mnt); + real_mount(mnt)->mnt_ns = MNT_NS_INTERNAL; } return mnt; } @@ -2625,7 +2584,9 @@ void kern_unmount(struct vfsmount *mnt) { /* release long term mount so mount point can be released */ if (!IS_ERR_OR_NULL(mnt)) { - mnt_make_shortterm(mnt); + br_write_lock(&vfsmount_lock); + real_mount(mnt)->mnt_ns = NULL; + br_write_unlock(&vfsmount_lock); mntput(mnt); } } -- cgit v0.10.2 From 6ce6e24e72233073c8ead9419fc5040d44803dae Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 01:16:59 -0400 Subject: get rid of magic in proc_namespace.c don't rely on proc_mounts->m being the first field; container_of() is there for purpose. No need to bother with ->private, while we are at it - the same container_of will do nicely. Signed-off-by: Al Viro diff --git a/fs/mount.h b/fs/mount.h index 05a2a11..4f291f9 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -74,10 +74,12 @@ static inline void get_mnt_ns(struct mnt_namespace *ns) } struct proc_mounts { - struct seq_file m; /* must be the first element */ + struct seq_file m; struct mnt_namespace *ns; struct path root; int (*show)(struct seq_file *, struct vfsmount *); }; +#define proc_mounts(p) (container_of((p), struct proc_mounts, m)) + extern const struct seq_operations mounts_op; diff --git a/fs/namespace.c b/fs/namespace.c index a524ea4..8f412ab 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -923,7 +923,7 @@ EXPORT_SYMBOL(replace_mount_options); /* iterator; we want it to have access to namespace_sem, thus here... */ static void *m_start(struct seq_file *m, loff_t *pos) { - struct proc_mounts *p = container_of(m, struct proc_mounts, m); + struct proc_mounts *p = proc_mounts(m); down_read(&namespace_sem); return seq_list_start(&p->ns->list, *pos); @@ -931,7 +931,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) static void *m_next(struct seq_file *m, void *v, loff_t *pos) { - struct proc_mounts *p = container_of(m, struct proc_mounts, m); + struct proc_mounts *p = proc_mounts(m); return seq_list_next(v, &p->ns->list, pos); } @@ -943,7 +943,7 @@ static void m_stop(struct seq_file *m, void *v) static int m_show(struct seq_file *m, void *v) { - struct proc_mounts *p = container_of(m, struct proc_mounts, m); + struct proc_mounts *p = proc_mounts(m); struct mount *r = list_entry(v, struct mount, mnt_list); return p->show(m, &r->mnt); } diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 5e289a7..5fe34c3 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -17,7 +17,7 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) { - struct proc_mounts *p = file->private_data; + struct proc_mounts *p = proc_mounts(file->private_data); struct mnt_namespace *ns = p->ns; unsigned res = POLLIN | POLLRDNORM; @@ -121,7 +121,7 @@ out: static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) { - struct proc_mounts *p = m->private; + struct proc_mounts *p = proc_mounts(m); struct mount *r = real_mount(mnt); struct super_block *sb = mnt->mnt_sb; struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt }; @@ -268,7 +268,6 @@ static int mounts_open_common(struct inode *inode, struct file *file, if (ret) goto err_free; - p->m.private = p; p->ns = ns; p->root = root; p->m.poll_event = ns->event; @@ -288,7 +287,7 @@ static int mounts_open_common(struct inode *inode, struct file *file, static int mounts_release(struct inode *inode, struct file *file) { - struct proc_mounts *p = file->private_data; + struct proc_mounts *p = proc_mounts(file->private_data); path_put(&p->root); put_mnt_ns(p->ns); return seq_release(inode, file); -- cgit v0.10.2 From 63a44583f3a4408b902a3d7ba18b4ab13d1309ab Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 11:49:04 -0400 Subject: qnx6: don't bother with ->i_dentry in inode-freeing callback we'll initialize it in inode_init_always() when we allocate that object again. Signed-off-by: Al Viro diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index e44012d..2049c81 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -622,7 +622,6 @@ static struct inode *qnx6_alloc_inode(struct super_block *sb) static void qnx6_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(qnx6_inode_cachep, QNX6_I(inode)); } -- cgit v0.10.2 From e6f9f8d0296aad7fbaf01de38ccaa1bf654bbda4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 11:50:36 -0400 Subject: cifs: don't bother with ->i_dentry in ->destroy_inode() Signed-off-by: Al Viro diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8b6e344..bcab12c 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -257,7 +257,6 @@ cifs_alloc_inode(struct super_block *sb) static void cifs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); } -- cgit v0.10.2 From 7968ce12e9645c5eb5bb3f4320e43c2e402d580c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 11:51:12 -0400 Subject: adfs: don't bother with ->i_dentry in ->destroy_inode() Signed-off-by: Al Viro diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 06fdcc9..bdaec92 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -246,7 +246,6 @@ static struct inode *adfs_alloc_inode(struct super_block *sb) static void adfs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - INIT_LIST_HEAD(&inode->i_dentry); kmem_cache_free(adfs_inode_cachep, ADFS_I(inode)); } -- cgit v0.10.2 From 049b3c10eecd0a5f3605fa3cd13f638593213ccb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 11:55:20 -0400 Subject: vfs: update documentation on ->i_dentry handling we used to need to clean it in RCU callback freeing an inode; in 3.2 that requirement went away. Unfortunately, it hadn't been reflected in Documentation/filesystems/porting. Signed-off-by: Al Viro diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 8c91d10..ed9fbc2 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -355,12 +355,10 @@ protects *all* the dcache state of a given dentry. via rcu-walk path walk (basically, if the file can have had a path name in the vfs namespace). - i_dentry and i_rcu share storage in a union, and the vfs expects -i_dentry to be reinitialized before it is freed, so an: - - INIT_LIST_HEAD(&inode->i_dentry); - -must be done in the RCU callback. + Even though i_dentry and i_rcu share storage in a union, we will +initialize the former in inode_init_always(), so just leave it alone in +the callback. It used to be necessary to clean it there, but not anymore +(starting at 3.2). -- [recommended] -- cgit v0.10.2 From 3084ee95f08ce353ae26c18c7627c4e9786983ca Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 13:03:04 -0400 Subject: affs: get rid of open-coded list_for_each_entry() Signed-off-by: Al Viro diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 52a6407..1c7fd79 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -126,18 +126,13 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino) { struct inode *inode = dentry->d_inode; void *data = dentry->d_fsdata; - struct list_head *head, *next; spin_lock(&inode->i_lock); - head = &inode->i_dentry; - next = head->next; - while (next != head) { - dentry = list_entry(next, struct dentry, d_alias); + list_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (entry_ino == (u32)(long)dentry->d_fsdata) { dentry->d_fsdata = data; break; } - next = next->next; } spin_unlock(&inode->i_lock); } -- cgit v0.10.2 From 12447c40394695c9a19920c65fea124bdf3ea034 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 13:06:09 -0400 Subject: affs: unobfuscate affs_fix_dcache() and add a comment on what it's doing Signed-off-by: Al Viro diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 1c7fd79..843cdc9 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -122,15 +122,13 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh) } static void -affs_fix_dcache(struct dentry *dentry, u32 entry_ino) +affs_fix_dcache(struct inode *inode, u32 entry_ino) { - struct inode *inode = dentry->d_inode; - void *data = dentry->d_fsdata; - + struct dentry *dentry; spin_lock(&inode->i_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (entry_ino == (u32)(long)dentry->d_fsdata) { - dentry->d_fsdata = data; + dentry->d_fsdata = (void *)inode->i_ino; break; } } @@ -172,7 +170,11 @@ affs_remove_link(struct dentry *dentry) } affs_lock_dir(dir); - affs_fix_dcache(dentry, link_ino); + /* + * if there's a dentry for that block, make it + * refer to inode itself. + */ + affs_fix_dcache(inode, link_ino); retval = affs_remove_hash(dir, link_bh); if (retval) { affs_unlock_dir(dir); -- cgit v0.10.2 From a614a092bf28d58c742b9ec43209f3f78c3d9fb3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 13:09:15 -0400 Subject: ocfs2: use list_for_each_entry in ocfs2_find_local_alias() Signed-off-by: Al Viro diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index e5ba348..a40edc1 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -170,13 +170,10 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, int skip_unhashed) { - struct list_head *p; - struct dentry *dentry = NULL; + struct dentry *dentry; spin_lock(&inode->i_lock); - list_for_each(p, &inode->i_dentry) { - dentry = list_entry(p, struct dentry, d_alias); - + list_for_each_entry(dentry, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { trace_ocfs2_find_local_alias(dentry->d_name.len, @@ -184,16 +181,13 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, dget_dlock(dentry); spin_unlock(&dentry->d_lock); - break; + spin_unlock(&inode->i_lock); + return dentry; } spin_unlock(&dentry->d_lock); - - dentry = NULL; } - spin_unlock(&inode->i_lock); - - return dentry; + return NULL; } DEFINE_SPINLOCK(dentry_attach_lock); -- cgit v0.10.2 From 9f713878f22e0b2d34d62df0ca55f65166375634 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 13:19:12 -0400 Subject: ext4: get rid of open-coded d_find_any_alias() Signed-off-by: Al Viro diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index bb6c7d8..4359a4d 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -135,14 +135,7 @@ static int ext4_sync_parent(struct inode *inode) inode = igrab(inode); while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); - dentry = NULL; - spin_lock(&inode->i_lock); - if (!list_empty(&inode->i_dentry)) { - dentry = list_first_entry(&inode->i_dentry, - struct dentry, d_alias); - dget(dentry); - } - spin_unlock(&inode->i_lock); + dentry = d_find_any_alias(inode); if (!dentry) break; next = igrab(dentry->d_parent->d_inode); -- cgit v0.10.2 From b3d9b7a3c752dc4b6976a4ff7b8298887a5b734d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 13:51:19 -0400 Subject: vfs: switch i_dentry/d_alias to hlist Signed-off-by: Al Viro diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index 843cdc9..eb82ee5 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c @@ -125,8 +125,9 @@ static void affs_fix_dcache(struct inode *inode, u32 entry_ino) { struct dentry *dentry; + struct hlist_node *p; spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { if (entry_ino == (u32)(long)dentry->d_fsdata) { dentry->d_fsdata = (void *)inode->i_ino; break; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a7d1921..a101572 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6987,7 +6987,7 @@ void btrfs_destroy_inode(struct inode *inode) struct btrfs_ordered_extent *ordered; struct btrfs_root *root = BTRFS_I(inode)->root; - WARN_ON(!list_empty(&inode->i_dentry)); + WARN_ON(!hlist_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); WARN_ON(BTRFS_I(inode)->outstanding_extents); WARN_ON(BTRFS_I(inode)->reserved_extents); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 745da3d..8e8bb49 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -800,7 +800,7 @@ cifs_find_inode(struct inode *inode, void *opaque) return 0; /* if it's not a directory or has no dentries, then flag it */ - if (S_ISDIR(inode->i_mode) && !list_empty(&inode->i_dentry)) + if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) fattr->cf_flags |= CIFS_FATTR_INO_COLLISION; return 1; @@ -825,9 +825,10 @@ static bool inode_has_hashed_dentries(struct inode *inode) { struct dentry *dentry; + struct hlist_node *p; spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { spin_unlock(&inode->i_lock); return true; diff --git a/fs/dcache.c b/fs/dcache.c index 44acb5b..015586f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -218,7 +218,7 @@ static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); - WARN_ON(!list_empty(&dentry->d_alias)); + WARN_ON(!hlist_unhashed(&dentry->d_alias)); if (dname_external(dentry)) kfree(dentry->d_name.name); kmem_cache_free(dentry_cache, dentry); @@ -267,7 +267,7 @@ static void dentry_iput(struct dentry * dentry) struct inode *inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; - list_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_alias); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -291,7 +291,7 @@ static void dentry_unlink_inode(struct dentry * dentry) { struct inode *inode = dentry->d_inode; dentry->d_inode = NULL; - list_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_alias); dentry_rcuwalk_barrier(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); @@ -699,10 +699,11 @@ EXPORT_SYMBOL(dget_parent); static struct dentry *__d_find_alias(struct inode *inode, int want_discon) { struct dentry *alias, *discon_alias; + struct hlist_node *p; again: discon_alias = NULL; - list_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { spin_lock(&alias->d_lock); if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) { if (IS_ROOT(alias) && @@ -737,7 +738,7 @@ struct dentry *d_find_alias(struct inode *inode) { struct dentry *de = NULL; - if (!list_empty(&inode->i_dentry)) { + if (!hlist_empty(&inode->i_dentry)) { spin_lock(&inode->i_lock); de = __d_find_alias(inode, 0); spin_unlock(&inode->i_lock); @@ -753,9 +754,10 @@ EXPORT_SYMBOL(d_find_alias); void d_prune_aliases(struct inode *inode) { struct dentry *dentry; + struct hlist_node *p; restart: spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (!dentry->d_count) { __dget_dlock(dentry); @@ -977,7 +979,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) inode = dentry->d_inode; if (inode) { dentry->d_inode = NULL; - list_del_init(&dentry->d_alias); + hlist_del_init(&dentry->d_alias); if (dentry->d_op && dentry->d_op->d_iput) dentry->d_op->d_iput(dentry, inode); else @@ -1312,7 +1314,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) INIT_HLIST_BL_NODE(&dentry->d_hash); INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); - INIT_LIST_HEAD(&dentry->d_alias); + INIT_HLIST_NODE(&dentry->d_alias); INIT_LIST_HEAD(&dentry->d_u.d_child); d_set_d_op(dentry, dentry->d_sb->s_d_op); @@ -1400,7 +1402,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) if (inode) { if (unlikely(IS_AUTOMOUNT(inode))) dentry->d_flags |= DCACHE_NEED_AUTOMOUNT; - list_add(&dentry->d_alias, &inode->i_dentry); + hlist_add_head(&dentry->d_alias, &inode->i_dentry); } dentry->d_inode = inode; dentry_rcuwalk_barrier(dentry); @@ -1425,7 +1427,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) void d_instantiate(struct dentry *entry, struct inode * inode) { - BUG_ON(!list_empty(&entry->d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_alias)); if (inode) spin_lock(&inode->i_lock); __d_instantiate(entry, inode); @@ -1458,13 +1460,14 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, int len = entry->d_name.len; const char *name = entry->d_name.name; unsigned int hash = entry->d_name.hash; + struct hlist_node *p; if (!inode) { __d_instantiate(entry, NULL); return NULL; } - list_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { /* * Don't need alias->d_lock here, because aliases with * d_parent == entry->d_parent are not subject to name or @@ -1490,7 +1493,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode) { struct dentry *result; - BUG_ON(!list_empty(&entry->d_alias)); + BUG_ON(!hlist_unhashed(&entry->d_alias)); if (inode) spin_lock(&inode->i_lock); @@ -1531,9 +1534,9 @@ static struct dentry * __d_find_any_alias(struct inode *inode) { struct dentry *alias; - if (list_empty(&inode->i_dentry)) + if (hlist_empty(&inode->i_dentry)) return NULL; - alias = list_first_entry(&inode->i_dentry, struct dentry, d_alias); + alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias); __dget(alias); return alias; } @@ -1607,7 +1610,7 @@ struct dentry *d_obtain_alias(struct inode *inode) spin_lock(&tmp->d_lock); tmp->d_inode = inode; tmp->d_flags |= DCACHE_DISCONNECTED; - list_add(&tmp->d_alias, &inode->i_dentry); + hlist_add_head(&tmp->d_alias, &inode->i_dentry); hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); hlist_bl_unlock(&tmp->d_sb->s_anon); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index b0201ca..b42063c 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -44,13 +44,14 @@ find_acceptable_alias(struct dentry *result, { struct dentry *dentry, *toput = NULL; struct inode *inode; + struct hlist_node *p; if (acceptable(context, result)) return result; inode = result->d_inode; spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { dget(dentry); spin_unlock(&inode->i_lock); if (toput) diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 4359a4d..2a1dcea 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -225,7 +225,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (!journal) { ret = __sync_inode(inode, datasync); - if (!ret && !list_empty(&inode->i_dentry)) + if (!ret && !hlist_empty(&inode->i_dentry)) ret = ext4_sync_parent(inode); goto out; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 334e0b1..f7543f7 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -249,7 +249,7 @@ static struct dentry *fuse_d_add_directory(struct dentry *entry, /* This tries to shrink the subtree below alias */ fuse_invalidate_entry(alias); dput(alias); - if (!list_empty(&inode->i_dentry)) + if (!hlist_empty(&inode->i_dentry)) return ERR_PTR(-EBUSY); } else { dput(alias); diff --git a/fs/inode.c b/fs/inode.c index c99163b..775cbab 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -182,7 +182,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) } inode->i_private = NULL; inode->i_mapping = mapping; - INIT_LIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ + INIT_HLIST_HEAD(&inode->i_dentry); /* buggered by rcu freeing */ #ifdef CONFIG_FS_POSIX_ACL inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; #endif diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index 8abfb19..a67990f 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -62,7 +62,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i */ spin_lock(&sb->s_root->d_inode->i_lock); spin_lock(&sb->s_root->d_lock); - list_del_init(&sb->s_root->d_alias); + hlist_del_init(&sb->s_root->d_alias); spin_unlock(&sb->s_root->d_lock); spin_unlock(&sb->s_root->d_inode->i_lock); } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index b39c5c1..6baadb5 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -52,6 +52,7 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) void __fsnotify_update_child_dentry_flags(struct inode *inode) { struct dentry *alias; + struct hlist_node *p; int watched; if (!S_ISDIR(inode->i_mode)) @@ -63,7 +64,7 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode) spin_lock(&inode->i_lock); /* run all of the dentries associated with this inode. Since this is a * directory, there damn well better only be one item on this list */ - list_for_each_entry(alias, &inode->i_dentry, d_alias) { + hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) { struct dentry *child; /* run all of the children of the original inode and fix their diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index a40edc1..af44882 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -170,10 +170,11 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, int skip_unhashed) { + struct hlist_node *p; struct dentry *dentry; spin_lock(&inode->i_lock); - list_for_each_entry(dentry, &inode->i_dentry, d_alias) { + hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) { spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { trace_ocfs2_find_local_alias(dentry->d_name.len, diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 094789f..8ca2555 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -128,7 +128,7 @@ struct dentry { struct rcu_head d_rcu; } d_u; struct list_head d_subdirs; /* our children */ - struct list_head d_alias; /* inode alias list */ + struct hlist_node d_alias; /* inode alias list */ }; /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 17fd887..f06db6b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -826,7 +826,7 @@ struct inode { struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; union { - struct list_head i_dentry; + struct hlist_head i_dentry; struct rcu_head i_rcu; }; u64 i_version; -- cgit v0.10.2 From 1d674107ea4b68669e012e654d64369b7f2bb250 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 19:52:19 -0400 Subject: coda: use list_for_each_entry Signed-off-by: Al Viro diff --git a/fs/coda/cache.c b/fs/coda/cache.c index 6901578..958ae0e 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c @@ -89,17 +89,13 @@ int coda_cache_check(struct inode *inode, int mask) /* this won't do any harm: just flag all children */ static void coda_flag_children(struct dentry *parent, int flag) { - struct list_head *child; struct dentry *de; spin_lock(&parent->d_lock); - list_for_each(child, &parent->d_subdirs) - { - de = list_entry(child, struct dentry, d_u.d_child); + list_for_each_entry(de, &parent->d_subdirs, d_u.d_child) { /* don't know what to do with negative dentries */ - if ( ! de->d_inode ) - continue; - coda_flag_inode(de->d_inode, flag); + if (de->d_inode ) + coda_flag_inode(de->d_inode, flag); } spin_unlock(&parent->d_lock); return; -- cgit v0.10.2 From 6d7b5aaed7d887b34f29f900244cdbd17a86637c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 04:15:17 -0400 Subject: namei.c: let follow_link() do put_link() on failure no need for kludgy "set cookie to ERR_PTR(...) because we failed before we did actual ->follow_link() and want to suppress put_link()", no pointless check in put_link() itself. Callers checked if follow_link() has failed anyway; might as well break out of their loops if that happened, without bothering to call put_link() first. [AV: folded fixes from hch] Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 7d69419..6135a14 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -605,7 +605,7 @@ static inline void path_to_nameidata(const struct path *path, static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) { struct inode *inode = link->dentry->d_inode; - if (!IS_ERR(cookie) && inode->i_op->put_link) + if (inode->i_op->put_link) inode->i_op->put_link(link->dentry, nd, cookie); path_put(link); } @@ -613,19 +613,19 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki static __always_inline int follow_link(struct path *link, struct nameidata *nd, void **p) { - int error; struct dentry *dentry = link->dentry; + int error; + char *s; BUG_ON(nd->flags & LOOKUP_RCU); if (link->mnt == nd->path.mnt) mntget(link->mnt); - if (unlikely(current->total_link_count >= 40)) { - *p = ERR_PTR(-ELOOP); /* no ->put_link(), please */ - path_put(&nd->path); - return -ELOOP; - } + error = -ELOOP; + if (unlikely(current->total_link_count >= 40)) + goto out_put_nd_path; + cond_resched(); current->total_link_count++; @@ -633,30 +633,37 @@ follow_link(struct path *link, struct nameidata *nd, void **p) nd_set_link(nd, NULL); error = security_inode_follow_link(link->dentry, nd); - if (error) { - *p = ERR_PTR(error); /* no ->put_link(), please */ - path_put(&nd->path); - return error; - } + if (error) + goto out_put_nd_path; nd->last_type = LAST_BIND; *p = dentry->d_inode->i_op->follow_link(dentry, nd); error = PTR_ERR(*p); - if (!IS_ERR(*p)) { - char *s = nd_get_link(nd); - error = 0; - if (s) - error = __vfs_follow_link(nd, s); - else if (nd->last_type == LAST_BIND) { - nd->flags |= LOOKUP_JUMPED; - nd->inode = nd->path.dentry->d_inode; - if (nd->inode->i_op->follow_link) { - /* stepped on a _really_ weird one */ - path_put(&nd->path); - error = -ELOOP; - } + if (IS_ERR(*p)) + goto out_put_link; + + error = 0; + s = nd_get_link(nd); + if (s) { + error = __vfs_follow_link(nd, s); + } else if (nd->last_type == LAST_BIND) { + nd->flags |= LOOKUP_JUMPED; + nd->inode = nd->path.dentry->d_inode; + if (nd->inode->i_op->follow_link) { + /* stepped on a _really_ weird one */ + path_put(&nd->path); + error = -ELOOP; } } + if (unlikely(error)) + put_link(nd, link, *p); + + return error; + +out_put_nd_path: + path_put(&nd->path); +out_put_link: + path_put(link); return error; } @@ -1383,9 +1390,10 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd) void *cookie; res = follow_link(&link, nd, &cookie); - if (!res) - res = walk_component(nd, path, &nd->last, - nd->last_type, LOOKUP_FOLLOW); + if (res) + break; + res = walk_component(nd, path, &nd->last, + nd->last_type, LOOKUP_FOLLOW); put_link(nd, &link, cookie); } while (res > 0); @@ -1777,8 +1785,9 @@ static int path_lookupat(int dfd, const char *name, struct path link = path; nd->flags |= LOOKUP_PARENT; err = follow_link(&link, nd, &cookie); - if (!err) - err = lookup_last(nd, &path); + if (err) + break; + err = lookup_last(nd, &path); put_link(nd, &link, cookie); } } @@ -2475,9 +2484,8 @@ static struct file *path_openat(int dfd, const char *pathname, nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); error = follow_link(&link, nd, &cookie); if (unlikely(error)) - filp = ERR_PTR(error); - else - filp = do_last(nd, &path, op, pathname); + goto out_filp; + filp = do_last(nd, &path, op, pathname); put_link(nd, &link, cookie); } out: -- cgit v0.10.2 From 37d7fffc9cafe75ded8a840fa30ba625f99ed7ae Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:12 +0200 Subject: vfs: do_last(): inline lookup_slow() Copy lookup_slow() into do_last(). Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 6135a14..68742e3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2254,9 +2254,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (error < 0) goto exit; - error = lookup_slow(nd, &nd->last, path); - if (error < 0) + BUG_ON(nd->inode != dir->d_inode); + + mutex_lock(&dir->d_inode->i_mutex); + dentry = __lookup_hash(&nd->last, dir, nd); + mutex_unlock(&dir->d_inode->i_mutex); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) goto exit; + path->mnt = nd->path.mnt; + path->dentry = dentry; + error = follow_managed(path, nd->flags); + if (unlikely(error < 0)) + goto exit_dput; + + if (error) + nd->flags |= LOOKUP_JUMPED; inode = path->dentry->d_inode; } -- cgit v0.10.2 From b6183df7b294997a748eeb9991daa126986ead12 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:13 +0200 Subject: vfs: do_last(): separate O_CREAT specific code Check O_CREAT on the slow lookup paths where necessary. This allows the rest to be shared with plain open. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 68742e3..12ed297 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2274,22 +2274,23 @@ static struct file *do_last(struct nameidata *nd, struct path *path, inode = path->dentry->d_inode; } goto finish_lookup; - } - - /* create side of things */ - /* - * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED has been - * cleared when we got to the last component we are about to look up - */ - error = complete_walk(nd); - if (error) - return ERR_PTR(error); + } else { + /* create side of things */ + /* + * This will *only* deal with leaving RCU mode - LOOKUP_JUMPED + * has been cleared when we got to the last component we are + * about to look up + */ + error = complete_walk(nd); + if (error) + return ERR_PTR(error); - audit_inode(pathname, dir); - error = -EISDIR; - /* trailing slashes? */ - if (nd->last.name[nd->last.len]) - goto exit; + audit_inode(pathname, dir); + error = -EISDIR; + /* trailing slashes? */ + if (nd->last.name[nd->last.len]) + goto exit; + } retry_lookup: mutex_lock(&dir->d_inode->i_mutex); @@ -2305,7 +2306,7 @@ retry_lookup: path->mnt = nd->path.mnt; /* Negative dentry, just create the file */ - if (!dentry->d_inode) { + if (!dentry->d_inode && (open_flag & O_CREAT)) { umode_t mode = op->mode; if (!IS_POSIXACL(dir->d_inode)) mode &= ~current_umask(); -- cgit v0.10.2 From 7157486541bffc0dfec912e21ae639b029dae3d3 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:14 +0200 Subject: vfs: do_last(): common slow lookup Make the slow lookup part of O_CREAT and non-O_CREAT opens common. This allows atomic_open to be hooked into the slow lookup part. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 12ed297..285e62e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2250,30 +2250,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, symlink_ok = 1; /* we _can_ be in RCU mode here */ error = lookup_fast(nd, &nd->last, path, &inode); - if (unlikely(error)) { - if (error < 0) - goto exit; - - BUG_ON(nd->inode != dir->d_inode); - - mutex_lock(&dir->d_inode->i_mutex); - dentry = __lookup_hash(&nd->last, dir, nd); - mutex_unlock(&dir->d_inode->i_mutex); - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto exit; - path->mnt = nd->path.mnt; - path->dentry = dentry; - error = follow_managed(path, nd->flags); - if (unlikely(error < 0)) - goto exit_dput; + if (likely(!error)) + goto finish_lookup; - if (error) - nd->flags |= LOOKUP_JUMPED; + if (error < 0) + goto exit; - inode = path->dentry->d_inode; - } - goto finish_lookup; + BUG_ON(nd->inode != dir->d_inode); } else { /* create side of things */ /* -- cgit v0.10.2 From d58ffd35c1e595df2cf8ac4803f178c8be95ca7a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:15 +0200 Subject: vfs: add lookup_open() Split out lookup + maybe create from do_last(). This is the part under i_mutex protection. The function is called lookup_open() and returns a filp even though the open part is not used yet. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 285e62e..fad7117 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2197,19 +2197,73 @@ static inline int open_to_namei_flags(int flag) } /* + * Lookup, maybe create and open the last component + * + * Must be called with i_mutex held on parent. + * + * Returns open file or NULL on success, error otherwise. NULL means no open + * was performed, only lookup. + */ +static struct file *lookup_open(struct nameidata *nd, struct path *path, + const struct open_flags *op, + int *want_write, bool *created) +{ + struct dentry *dir = nd->path.dentry; + struct dentry *dentry; + int error; + + *created = false; + dentry = lookup_hash(nd); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + + /* Negative dentry, just create the file */ + if (!dentry->d_inode && (op->open_flag & O_CREAT)) { + umode_t mode = op->mode; + if (!IS_POSIXACL(dir->d_inode)) + mode &= ~current_umask(); + /* + * This write is needed to ensure that a + * rw->ro transition does not occur between + * the time when the file is created and when + * a permanent write count is taken through + * the 'struct file' in nameidata_to_filp(). + */ + error = mnt_want_write(nd->path.mnt); + if (error) + goto out_dput; + *want_write = 1; + *created = true; + error = security_path_mknod(&nd->path, dentry, mode, 0); + if (error) + goto out_dput; + error = vfs_create(dir->d_inode, dentry, mode, nd); + if (error) + goto out_dput; + } + path->dentry = dentry; + path->mnt = nd->path.mnt; + return NULL; + +out_dput: + dput(dentry); + return ERR_PTR(error); +} + +/* * Handle the last step of open() */ static struct file *do_last(struct nameidata *nd, struct path *path, const struct open_flags *op, const char *pathname) { struct dentry *dir = nd->path.dentry; - struct dentry *dentry; int open_flag = op->open_flag; int will_truncate = open_flag & O_TRUNC; int want_write = 0; int acc_mode = op->acc_mode; struct file *filp; struct inode *inode; + bool created; int symlink_ok = 0; struct path save_parent = { .dentry = NULL, .mnt = NULL }; bool retried = false; @@ -2277,53 +2331,24 @@ static struct file *do_last(struct nameidata *nd, struct path *path, retry_lookup: mutex_lock(&dir->d_inode->i_mutex); + filp = lookup_open(nd, path, op, &want_write, &created); + mutex_unlock(&dir->d_inode->i_mutex); - dentry = lookup_hash(nd); - error = PTR_ERR(dentry); - if (IS_ERR(dentry)) { - mutex_unlock(&dir->d_inode->i_mutex); - goto exit; - } - - path->dentry = dentry; - path->mnt = nd->path.mnt; + if (IS_ERR(filp)) + goto out; - /* Negative dentry, just create the file */ - if (!dentry->d_inode && (open_flag & O_CREAT)) { - umode_t mode = op->mode; - if (!IS_POSIXACL(dir->d_inode)) - mode &= ~current_umask(); - /* - * This write is needed to ensure that a - * rw->ro transition does not occur between - * the time when the file is created and when - * a permanent write count is taken through - * the 'struct file' in nameidata_to_filp(). - */ - error = mnt_want_write(nd->path.mnt); - if (error) - goto exit_mutex_unlock; - want_write = 1; + if (created) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; will_truncate = 0; acc_mode = MAY_OPEN; - error = security_path_mknod(&nd->path, dentry, mode, 0); - if (error) - goto exit_mutex_unlock; - error = vfs_create(dir->d_inode, dentry, mode, nd); - if (error) - goto exit_mutex_unlock; - mutex_unlock(&dir->d_inode->i_mutex); - dput(nd->path.dentry); - nd->path.dentry = dentry; + path_to_nameidata(path, nd); goto common; } /* * It already exists. */ - mutex_unlock(&dir->d_inode->i_mutex); audit_inode(pathname, path->dentry); error = -EEXIST; @@ -2432,8 +2457,6 @@ out: terminate_walk(nd); return filp; -exit_mutex_unlock: - mutex_unlock(&dir->d_inode->i_mutex); exit_dput: path_put_conditional(path, nd); exit: -- cgit v0.10.2 From 54ef487241e863a6046536ac5b1fcd5d7cde86e5 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:16 +0200 Subject: vfs: lookup_open(): expand lookup_hash() Copy __lookup_hash() into lookup_open(). The next patch will insert the atomic open call just before the real lookup. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index fad7117..ccb0eb1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2209,14 +2209,24 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, int *want_write, bool *created) { struct dentry *dir = nd->path.dentry; + struct inode *dir_inode = dir->d_inode; struct dentry *dentry; int error; + bool need_lookup; *created = false; - dentry = lookup_hash(nd); + dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup); if (IS_ERR(dentry)) return ERR_CAST(dentry); + if (need_lookup) { + BUG_ON(dentry->d_inode); + + dentry = lookup_real(dir_inode, dentry, nd); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + } + /* Negative dentry, just create the file */ if (!dentry->d_inode && (op->open_flag & O_CREAT)) { umode_t mode = op->mode; -- cgit v0.10.2 From d18e9008c377dc6a6d2166a6840bf3a23a5867fd Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:17 +0200 Subject: vfs: add i_op->atomic_open() Add a new inode operation which is called on the last component of an open. Using this the filesystem can look up, possibly create and open the file in one atomic operation. If it cannot perform this (e.g. the file type turned out to be wrong) it may signal this by returning NULL instead of an open struct file pointer. i_op->atomic_open() is only called if the last component is negative or needs lookup. Handling cached positive dentries here doesn't add much value: these can be opened using f_op->open(). If the cached file turns out to be invalid, the open can be retried, this time using ->atomic_open() with a fresh dentry. For now leave the old way of using open intents in lookup and revalidate in place. This will be removed once all the users are converted. David Howells noticed that if ->atomic_open() opens the file but does not create it, handle_truncate() will be called on it even if it is not a regular file. Fix this by checking the file type in this case too. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 8e2da1e..8157488 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -62,6 +62,9 @@ ata *); int (*removexattr) (struct dentry *, const char *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); void (*update_time)(struct inode *, struct timespec *, int); + struct file * (*atomic_open)(struct inode *, struct dentry *, + struct opendata *, unsigned open_flag, + umode_t create_mode, bool *created); locking rules: all may block @@ -89,6 +92,7 @@ listxattr: no removexattr: yes fiemap: no update_time: no +atomic_open: yes Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index efd23f4..beb6e69 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -364,6 +364,9 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*update_time)(struct inode *, struct timespec *, int); + struct file * (*atomic_open)(struct inode *, struct dentry *, + struct opendata *, unsigned open_flag, + umode_t create_mode, bool *created); }; Again, all methods are called without any locks being held, unless @@ -476,6 +479,14 @@ otherwise noted. an inode. If this is not defined the VFS will update the inode itself and call mark_inode_dirty_sync. + atomic_open: called on the last component of an open. Using this optional + method the filesystem can look up, possibly create and open the file in + one atomic operation. If it cannot perform this (e.g. the file type + turned out to be wrong) it may signal this by returning NULL instead of + an open struct file pointer. This method is only called if the last + component is negative or needs lookup. Cached positive dentries are + still handled by f_op->open(). + The Address Space Object ======================== diff --git a/fs/internal.h b/fs/internal.h index d2a23ff6..7006777 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -85,6 +85,11 @@ extern struct super_block *user_get_super(dev_t); struct nameidata; extern struct file *nameidata_to_filp(struct nameidata *); extern void release_open_intent(struct nameidata *); +struct opendata { + struct dentry *dentry; + struct vfsmount *mnt; + struct file **filp; +}; struct open_flags { int open_flag; umode_t mode; diff --git a/fs/namei.c b/fs/namei.c index ccb0eb1..9e11ae8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2196,6 +2196,176 @@ static inline int open_to_namei_flags(int flag) return flag; } +static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) +{ + int error = security_path_mknod(dir, dentry, mode, 0); + if (error) + return error; + + error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC); + if (error) + return error; + + return security_inode_create(dir->dentry->d_inode, dentry, mode); +} + +static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, + struct path *path, const struct open_flags *op, + int *want_write, bool need_lookup, + bool *created) +{ + struct inode *dir = nd->path.dentry->d_inode; + unsigned open_flag = open_to_namei_flags(op->open_flag); + umode_t mode; + int error; + int acc_mode; + struct opendata od; + struct file *filp; + int create_error = 0; + struct dentry *const DENTRY_NOT_SET = (void *) -1UL; + + BUG_ON(dentry->d_inode); + + /* Don't create child dentry for a dead directory. */ + if (unlikely(IS_DEADDIR(dir))) { + filp = ERR_PTR(-ENOENT); + goto out; + } + + mode = op->mode & S_IALLUGO; + if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) + mode &= ~current_umask(); + + if (open_flag & O_EXCL) { + open_flag &= ~O_TRUNC; + *created = true; + } + + /* + * Checking write permission is tricky, bacuse we don't know if we are + * going to actually need it: O_CREAT opens should work as long as the + * file exists. But checking existence breaks atomicity. The trick is + * to check access and if not granted clear O_CREAT from the flags. + * + * Another problem is returing the "right" error value (e.g. for an + * O_EXCL open we want to return EEXIST not EROFS). + */ + if ((open_flag & (O_CREAT | O_TRUNC)) || + (open_flag & O_ACCMODE) != O_RDONLY) { + error = mnt_want_write(nd->path.mnt); + if (!error) { + *want_write = 1; + } else if (!(open_flag & O_CREAT)) { + /* + * No O_CREATE -> atomicity not a requirement -> fall + * back to lookup + open + */ + goto no_open; + } else if (open_flag & (O_EXCL | O_TRUNC)) { + /* Fall back and fail with the right error */ + create_error = error; + goto no_open; + } else { + /* No side effects, safe to clear O_CREAT */ + create_error = error; + open_flag &= ~O_CREAT; + } + } + + if (open_flag & O_CREAT) { + error = may_o_create(&nd->path, dentry, op->mode); + if (error) { + create_error = error; + if (open_flag & O_EXCL) + goto no_open; + open_flag &= ~O_CREAT; + } + } + + if (nd->flags & LOOKUP_DIRECTORY) + open_flag |= O_DIRECTORY; + + od.dentry = DENTRY_NOT_SET; + od.mnt = nd->path.mnt; + od.filp = &nd->intent.open.file; + filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode, + created); + if (IS_ERR(filp)) { + if (WARN_ON(od.dentry != DENTRY_NOT_SET)) + dput(od.dentry); + + if (create_error && PTR_ERR(filp) == -ENOENT) + filp = ERR_PTR(create_error); + goto out; + } + + acc_mode = op->acc_mode; + if (*created) { + fsnotify_create(dir, dentry); + acc_mode = MAY_OPEN; + } + + if (!filp) { + if (WARN_ON(od.dentry == DENTRY_NOT_SET)) { + filp = ERR_PTR(-EIO); + goto out; + } + if (od.dentry) { + dput(dentry); + dentry = od.dentry; + } + goto looked_up; + } + + /* + * We didn't have the inode before the open, so check open permission + * here. + */ + error = may_open(&filp->f_path, acc_mode, open_flag); + if (error) + goto out_fput; + + error = open_check_o_direct(filp); + if (error) + goto out_fput; + +out: + dput(dentry); + return filp; + +out_fput: + fput(filp); + filp = ERR_PTR(error); + goto out; + +no_open: + if (need_lookup) { + dentry = lookup_real(dir, dentry, nd); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + + if (create_error) { + int open_flag = op->open_flag; + + filp = ERR_PTR(create_error); + if ((open_flag & O_EXCL)) { + if (!dentry->d_inode) + goto out; + } else if (!dentry->d_inode) { + goto out; + } else if ((open_flag & O_TRUNC) && + S_ISREG(dentry->d_inode->i_mode)) { + goto out; + } + /* will fail later, go on to get the right error */ + } + } +looked_up: + path->dentry = dentry; + path->mnt = nd->path.mnt; + return NULL; +} + /* * Lookup, maybe create and open the last component * @@ -2219,6 +2389,15 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, if (IS_ERR(dentry)) return ERR_CAST(dentry); + /* Cached positive dentry: will open in f_op->open */ + if (!need_lookup && dentry->d_inode) + goto out_no_open; + + if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { + return atomic_open(nd, dentry, path, op, want_write, + need_lookup, created); + } + if (need_lookup) { BUG_ON(dentry->d_inode); @@ -2251,6 +2430,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, if (error) goto out_dput; } +out_no_open: path->dentry = dentry; path->mnt = nd->path.mnt; return NULL; @@ -2344,8 +2524,16 @@ retry_lookup: filp = lookup_open(nd, path, op, &want_write, &created); mutex_unlock(&dir->d_inode->i_mutex); - if (IS_ERR(filp)) - goto out; + if (filp) { + if (IS_ERR(filp)) + goto out; + + if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + will_truncate = 0; + + audit_inode(pathname, filp->f_path.dentry); + goto opened; + } if (created) { /* Don't check for write permission, don't truncate */ @@ -2361,6 +2549,16 @@ retry_lookup: */ audit_inode(pathname, path->dentry); + /* + * If atomic_open() acquired write access it is dropped now due to + * possible mount and symlink following (this might be optimized away if + * necessary...) + */ + if (want_write) { + mnt_drop_write(nd->path.mnt); + want_write = 0; + } + error = -EEXIST; if (open_flag & O_EXCL) goto exit_dput; @@ -2444,6 +2642,7 @@ common: retried = true; goto retry_lookup; } +opened: if (!IS_ERR(filp)) { error = ima_file_check(filp, op->acc_mode); if (error) { diff --git a/fs/open.c b/fs/open.c index 1540632..13bece4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -811,6 +811,48 @@ out_err: EXPORT_SYMBOL_GPL(lookup_instantiate_filp); /** + * finish_open - finish opening a file + * @od: opaque open data + * @dentry: pointer to dentry + * @open: open callback + * + * This can be used to finish opening a file passed to i_op->atomic_open(). + * + * If the open callback is set to NULL, then the standard f_op->open() + * filesystem callback is substituted. + */ +struct file *finish_open(struct opendata *od, struct dentry *dentry, + int (*open)(struct inode *, struct file *)) +{ + struct file *res; + + mntget(od->mnt); + dget(dentry); + + res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred()); + if (!IS_ERR(res)) + *od->filp = NULL; + + return res; +} +EXPORT_SYMBOL(finish_open); + +/** + * finish_no_open - finish ->atomic_open() without opening the file + * + * @od: opaque open data + * @dentry: dentry or NULL (as returned from ->lookup()) + * + * This can be used to set the result of a successful lookup in ->atomic_open(). + * The filesystem's atomic_open() method shall return NULL after calling this. + */ +void finish_no_open(struct opendata *od, struct dentry *dentry) +{ + od->dentry = dentry; +} +EXPORT_SYMBOL(finish_no_open); + +/** * nameidata_to_filp - convert a nameidata to an open filp. * @nd: pointer to nameidata * @flags: open flags diff --git a/include/linux/fs.h b/include/linux/fs.h index f06db6b..0314635 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -427,6 +427,7 @@ struct kstatfs; struct vm_area_struct; struct vfsmount; struct cred; +struct opendata; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -1693,6 +1694,9 @@ struct inode_operations { int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); int (*update_time)(struct inode *, struct timespec *, int); + struct file * (*atomic_open)(struct inode *, struct dentry *, + struct opendata *, unsigned open_flag, + umode_t create_mode, bool *created); } ____cacheline_aligned; struct seq_file; @@ -2061,6 +2065,9 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); +extern struct file *finish_open(struct opendata *od, struct dentry *dentry, + int (*open)(struct inode *, struct file *)); +extern void finish_no_open(struct opendata *od, struct dentry *dentry); /* fs/ioctl.c */ -- cgit v0.10.2 From 0dd2b474d0b69d58859399b1df7fdc699ea005d4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:18 +0200 Subject: nfs: implement i_op->atomic_open() Replace NFS4 specific ->lookup implementation with ->atomic_open impelementation and use the generic nfs_lookup for other lookups. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f430057..0d8c712 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -111,11 +111,15 @@ const struct inode_operations nfs3_dir_inode_operations = { #ifdef CONFIG_NFS_V4 -static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); -static int nfs_open_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd); +static struct file *nfs_atomic_open(struct inode *, struct dentry *, + struct opendata *, unsigned, umode_t, + bool *); +static int nfs4_create(struct inode *dir, struct dentry *dentry, + umode_t mode, struct nameidata *nd); const struct inode_operations nfs4_dir_inode_operations = { - .create = nfs_open_create, - .lookup = nfs_atomic_lookup, + .create = nfs4_create, + .lookup = nfs_lookup, + .atomic_open = nfs_atomic_open, .link = nfs_link, .unlink = nfs_unlink, .symlink = nfs_symlink, @@ -1403,120 +1407,132 @@ static int do_open(struct inode *inode, struct file *filp) return 0; } -static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx) +static struct file *nfs_finish_open(struct nfs_open_context *ctx, + struct dentry *dentry, + struct opendata *od, unsigned open_flags) { struct file *filp; - int ret = 0; + int err; + + if (ctx->dentry != dentry) { + dput(ctx->dentry); + ctx->dentry = dget(dentry); + } /* If the open_intent is for execute, we have an extra check to make */ if (ctx->mode & FMODE_EXEC) { - ret = nfs_may_open(ctx->dentry->d_inode, - ctx->cred, - nd->intent.open.flags); - if (ret < 0) + err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags); + if (err < 0) { + filp = ERR_PTR(err); goto out; + } } - filp = lookup_instantiate_filp(nd, ctx->dentry, do_open); - if (IS_ERR(filp)) - ret = PTR_ERR(filp); - else + + filp = finish_open(od, dentry, do_open); + if (!IS_ERR(filp)) nfs_file_set_open_context(filp, ctx); + out: put_nfs_open_context(ctx); - return ret; + return filp; } -static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned open_flags, + umode_t mode, bool *created) { struct nfs_open_context *ctx; - struct iattr attr; - struct dentry *res = NULL; + struct dentry *res; + struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; - int open_flags; + struct file *filp; int err; - dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", + /* Expect a negative dentry */ + BUG_ON(dentry->d_inode); + + dfprintk(VFS, "NFS: atomic_open(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - /* Check that we are indeed trying to open this file */ - if (!is_atomic_open(nd)) + /* NFS only supports OPEN on regular files */ + if ((open_flags & O_DIRECTORY)) { + err = -ENOENT; + if (!d_unhashed(dentry)) { + /* + * Hashed negative dentry with O_DIRECTORY: dentry was + * revalidated and is fine, no need to perform lookup + * again + */ + goto out_err; + } goto no_open; - - if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { - res = ERR_PTR(-ENAMETOOLONG); - goto out; } - /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash - * the dentry. */ - if (nd->flags & LOOKUP_EXCL) { - d_instantiate(dentry, NULL); - goto out; - } - - open_flags = nd->intent.open.flags; - attr.ia_valid = ATTR_OPEN; - - ctx = create_nfs_open_context(dentry, open_flags); - res = ERR_CAST(ctx); - if (IS_ERR(ctx)) - goto out; + err = -ENAMETOOLONG; + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) + goto out_err; - if (nd->flags & LOOKUP_CREATE) { - attr.ia_mode = nd->intent.open.create_mode; + if (open_flags & O_CREAT) { attr.ia_valid |= ATTR_MODE; - attr.ia_mode &= ~current_umask(); - } else - open_flags &= ~(O_EXCL | O_CREAT); - + attr.ia_mode = mode & ~current_umask(); + } if (open_flags & O_TRUNC) { attr.ia_valid |= ATTR_SIZE; attr.ia_size = 0; } - /* Open the file on the server */ + ctx = create_nfs_open_context(dentry, open_flags); + err = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out_err; + nfs_block_sillyrename(dentry->d_parent); inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); + d_drop(dentry); if (IS_ERR(inode)) { nfs_unblock_sillyrename(dentry->d_parent); put_nfs_open_context(ctx); - switch (PTR_ERR(inode)) { - /* Make a negative dentry */ - case -ENOENT: - d_add(dentry, NULL); - res = NULL; - goto out; - /* This turned out not to be a regular file */ - case -EISDIR: - case -ENOTDIR: + err = PTR_ERR(inode); + switch (err) { + case -ENOENT: + d_add(dentry, NULL); + break; + case -EISDIR: + case -ENOTDIR: + goto no_open; + case -ELOOP: + if (!(open_flags & O_NOFOLLOW)) goto no_open; - case -ELOOP: - if (!(nd->intent.open.flags & O_NOFOLLOW)) - goto no_open; + break; /* case -EINVAL: */ - default: - res = ERR_CAST(inode); - goto out; + default: + break; } + goto out_err; } res = d_add_unique(dentry, inode); - nfs_unblock_sillyrename(dentry->d_parent); - if (res != NULL) { - dput(ctx->dentry); - ctx->dentry = dget(res); + if (res != NULL) dentry = res; - } - err = nfs_intent_set_file(nd, ctx); - if (err < 0) { - if (res != NULL) - dput(res); - return ERR_PTR(err); - } -out: + + nfs_unblock_sillyrename(dentry->d_parent); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - return res; + + filp = nfs_finish_open(ctx, dentry, od, open_flags); + + dput(res); + return filp; + +out_err: + return ERR_PTR(err); + no_open: - return nfs_lookup(dir, dentry, nd); + res = nfs_lookup(dir, dentry, NULL); + err = PTR_ERR(res); + if (IS_ERR(res)) + goto out_err; + + finish_no_open(od, res); + return NULL; } static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) @@ -1566,8 +1582,8 @@ no_open: return nfs_lookup_revalidate(dentry, nd); } -static int nfs_open_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) +static int nfs4_create(struct inode *dir, struct dentry *dentry, + umode_t mode, struct nameidata *nd) { struct nfs_open_context *ctx = NULL; struct iattr attr; @@ -1591,19 +1607,14 @@ static int nfs_open_create(struct inode *dir, struct dentry *dentry, error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); if (error != 0) goto out_put_ctx; - if (nd) { - error = nfs_intent_set_file(nd, ctx); - if (error < 0) - goto out_err; - } else { - put_nfs_open_context(ctx); - } + + put_nfs_open_context(ctx); + return 0; out_put_ctx: put_nfs_open_context(ctx); out_err_drop: d_drop(dentry); -out_err: return error; } -- cgit v0.10.2 From 8867fe5899010a0c0ac36dadfdacf1072b1c990c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:19 +0200 Subject: nfs: clean up ->create in nfs_rpc_ops Don't pass nfs_open_context() to ->create(). Only the NFS4 implementation needed that and only because it wanted to return an open file using open intents. That task has been replaced by ->atomic_open so it is not necessary anymore to pass the context to the create rpc operation. Despite nfs4_proc_create apparently being okay with a NULL context it Oopses somewhere down the call chain. So allocate a context here. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0d8c712..45015d3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -114,10 +114,8 @@ const struct inode_operations nfs3_dir_inode_operations = { static struct file *nfs_atomic_open(struct inode *, struct dentry *, struct opendata *, unsigned, umode_t, bool *); -static int nfs4_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd); const struct inode_operations nfs4_dir_inode_operations = { - .create = nfs4_create, + .create = nfs_create, .lookup = nfs_lookup, .atomic_open = nfs_atomic_open, .link = nfs_link, @@ -1582,42 +1580,6 @@ no_open: return nfs_lookup_revalidate(dentry, nd); } -static int nfs4_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) -{ - struct nfs_open_context *ctx = NULL; - struct iattr attr; - int error; - int open_flags = O_CREAT|O_EXCL; - - dfprintk(VFS, "NFS: create(%s/%ld), %s\n", - dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); - - attr.ia_mode = mode; - attr.ia_valid = ATTR_MODE; - - if (nd) - open_flags = nd->intent.open.flags; - - ctx = create_nfs_open_context(dentry, open_flags); - error = PTR_ERR(ctx); - if (IS_ERR(ctx)) - goto out_err_drop; - - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); - if (error != 0) - goto out_put_ctx; - - put_nfs_open_context(ctx); - - return 0; -out_put_ctx: - put_nfs_open_context(ctx); -out_err_drop: - d_drop(dentry); - return error; -} - #endif /* CONFIG_NFSV4 */ /* @@ -1684,7 +1646,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, if (nd) open_flags = nd->intent.open.flags; - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, NULL); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); if (error != 0) goto out_err; return 0; diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 2292a0f..3187e24 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -314,7 +314,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data) */ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nfs_open_context *ctx) + int flags) { struct nfs3_createdata *data; umode_t mode = sattr->ia_mode; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 15fc7e4..c157b20 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2806,37 +2806,22 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page, } /* - * Got race? - * We will need to arrange for the VFS layer to provide an atomic open. - * Until then, this create/open method is prone to inefficiency and race - * conditions due to the lookup, create, and open VFS calls from sys_open() - * placed on the wire. - * - * Given the above sorry state of affairs, I'm simply sending an OPEN. - * The file will be opened again in the subsequent VFS open call - * (nfs4_proc_file_open). - * - * The open for read will just hang around to be used by any process that - * opens the file O_RDONLY. This will all be resolved with the VFS changes. + * This is just for mknod. open(O_CREAT) will always do ->open_context(). */ - static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nfs_open_context *ctx) + int flags) { - struct dentry *de = dentry; + struct nfs_open_context *ctx; struct nfs4_state *state; - struct rpc_cred *cred = NULL; - fmode_t fmode = 0; int status = 0; - if (ctx != NULL) { - cred = ctx->cred; - de = ctx->dentry; - fmode = ctx->mode; - } + ctx = alloc_nfs_open_context(dentry, FMODE_READ); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + sattr->ia_mode &= ~current_umask(); - state = nfs4_do_open(dir, de, fmode, flags, sattr, cred, NULL); + state = nfs4_do_open(dir, dentry, ctx->mode, flags, sattr, ctx->cred, NULL); d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); @@ -2844,11 +2829,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, } d_add(dentry, igrab(state->inode)); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - if (ctx != NULL) - ctx->state = state; - else - nfs4_close_sync(state, fmode); + ctx->state = state; out: + put_nfs_open_context(ctx); return status; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 617c741..4433806 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -259,7 +259,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data) static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags, struct nfs_open_context *ctx) + int flags) { struct nfs_createdata *data; struct rpc_message msg = { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 8aadd90..d3b7c18 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1374,7 +1374,7 @@ struct nfs_rpc_ops { int (*readlink)(struct inode *, struct page *, unsigned int, unsigned int); int (*create) (struct inode *, struct dentry *, - struct iattr *, int, struct nfs_open_context *); + struct iattr *, int); int (*remove) (struct inode *, struct qstr *); void (*unlink_setup) (struct rpc_message *, struct inode *dir); void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); -- cgit v0.10.2 From 50de348c3604f7684a89ce64180666d4dd74623f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:20 +0200 Subject: nfs: don't use nd->intent.open.flags Instead check LOOKUP_EXCL in nd->flags, which is basically what the open intent flags were used for. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 45015d3..0432f47 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1538,7 +1538,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) struct dentry *parent = NULL; struct inode *inode; struct inode *dir; - int openflags, ret = 0; + int ret = 0; if (nd->flags & LOOKUP_RCU) return -ECHILD; @@ -1562,9 +1562,8 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) goto no_open_dput; - openflags = nd->intent.open.flags; /* We cannot do exclusive creation on a positive dentry */ - if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) + if (nd && nd->flags & LOOKUP_EXCL) goto no_open_dput; /* Let f_op->open() actually open (and revalidate) the file */ @@ -1643,8 +1642,8 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if (nd) - open_flags = nd->intent.open.flags; + if (nd && !(nd->flags & LOOKUP_EXCL)) + open_flags = O_CREAT; error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); if (error != 0) -- cgit v0.10.2 From eda72afb9ef9f45941fb09260c0f268ff81ec40d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:21 +0200 Subject: nfs: don't use intents for checking atomic open is_atomic_open() is now only used by nfs4_lookup_revalidate() to check whether it's okay to skip normal revalidation. It does a racy check for mount read-onlyness and falls back to normal revalidation if the open would fail. This makes little sense now that this function isn't used for determining whether to actually open the file or not. The d_mountpoint() check still makes sense since it is an indication that we might be following a mount and so open may not revalidate the dentry. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0432f47..e6d55dc 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1366,24 +1366,6 @@ const struct dentry_operations nfs4_dentry_operations = { .d_release = nfs_d_release, }; -/* - * Use intent information to determine whether we need to substitute - * the NFSv4-style stateful OPEN for the LOOKUP call - */ -static int is_atomic_open(struct nameidata *nd) -{ - if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_OPEN) == 0) - return 0; - /* NFS does not (yet) have a stateful open for directories */ - if (nd->flags & LOOKUP_DIRECTORY) - return 0; - /* Are we trying to write to a read only partition? */ - if (__mnt_is_readonly(nd->path.mnt) && - (nd->intent.open.flags & (O_CREAT|O_TRUNC|O_ACCMODE))) - return 0; - return 1; -} - static fmode_t flags_to_mode(int flags) { fmode_t res = (__force fmode_t)flags & FMODE_EXEC; @@ -1543,10 +1525,12 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) if (nd->flags & LOOKUP_RCU) return -ECHILD; - inode = dentry->d_inode; - if (!is_atomic_open(nd) || d_mountpoint(dentry)) + if (!(nd->flags & LOOKUP_OPEN) || (nd->flags & LOOKUP_DIRECTORY)) + goto no_open; + if (d_mountpoint(dentry)) goto no_open; + inode = dentry->d_inode; parent = dget_parent(dentry); dir = parent->d_inode; -- cgit v0.10.2 From c8ccbe032feb127a977c66865cb63d72d9a6e08b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:22 +0200 Subject: fuse: implement i_op->atomic_open() Add an ->atomic_open implementation which replaces the atomic open+create operation implemented via ->create. No functionality is changed. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index f7543f7..e42442f 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -369,8 +369,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, * If the filesystem doesn't support this, then fall back to separate * 'mknod' + 'open' requests. */ -static int fuse_create_open(struct inode *dir, struct dentry *entry, - umode_t mode, struct nameidata *nd) +static struct file *fuse_create_open(struct inode *dir, struct dentry *entry, + struct opendata *od, unsigned flags, + umode_t mode) { int err; struct inode *inode; @@ -382,14 +383,11 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct fuse_entry_out outentry; struct fuse_file *ff; struct file *file; - int flags = nd->intent.open.flags; - - if (fc->no_create) - return -ENOSYS; forget = fuse_alloc_forget(); + err = -ENOMEM; if (!forget) - return -ENOMEM; + goto out_err; req = fuse_get_req(fc); err = PTR_ERR(req); @@ -428,11 +426,8 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, req->out.args[1].value = &outopen; fuse_request_send(fc, req); err = req->out.h.error; - if (err) { - if (err == -ENOSYS) - fc->no_create = 1; + if (err) goto out_free_ff; - } err = -EIO; if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid)) @@ -448,28 +443,78 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, flags &= ~(O_CREAT | O_EXCL | O_TRUNC); fuse_sync_release(ff, flags); fuse_queue_forget(fc, forget, outentry.nodeid, 1); - return -ENOMEM; + err = -ENOMEM; + goto out_err; } kfree(forget); d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); fuse_invalidate_attr(dir); - file = lookup_instantiate_filp(nd, entry, generic_file_open); + file = finish_open(od, entry, generic_file_open); if (IS_ERR(file)) { fuse_sync_release(ff, flags); - return PTR_ERR(file); + } else { + file->private_data = fuse_file_get(ff); + fuse_finish_open(inode, file); } - file->private_data = fuse_file_get(ff); - fuse_finish_open(inode, file); - return 0; + return file; - out_free_ff: +out_free_ff: fuse_file_free(ff); - out_put_request: +out_put_request: fuse_put_request(fc, req); - out_put_forget_req: +out_put_forget_req: kfree(forget); - return err; +out_err: + return ERR_PTR(err); +} + +static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t); +static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry, + struct opendata *od, unsigned flags, + umode_t mode, bool *created) +{ + int err; + struct fuse_conn *fc = get_fuse_conn(dir); + struct file *file; + struct dentry *res = NULL; + + if (d_unhashed(entry)) { + res = fuse_lookup(dir, entry, NULL); + if (IS_ERR(res)) + return ERR_CAST(res); + + if (res) + entry = res; + } + + if (!(flags & O_CREAT) || entry->d_inode) + goto no_open; + + /* Only creates */ + *created = true; + + if (fc->no_create) + goto mknod; + + file = fuse_create_open(dir, entry, od, flags, mode); + if (PTR_ERR(file) == -ENOSYS) { + fc->no_create = 1; + goto mknod; + } +out_dput: + dput(res); + return file; + +mknod: + err = fuse_mknod(dir, entry, mode, 0); + if (err) { + file = ERR_PTR(err); + goto out_dput; + } +no_open: + finish_no_open(od, res); + return NULL; } /* @@ -573,12 +618,6 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode, struct nameidata *nd) { - if (nd) { - int err = fuse_create_open(dir, entry, mode, nd); - if (err != -ENOSYS) - return err; - /* Fall back on mknod */ - } return fuse_mknod(dir, entry, mode, 0); } @@ -1646,6 +1685,7 @@ static const struct inode_operations fuse_dir_inode_operations = { .link = fuse_link, .setattr = fuse_setattr, .create = fuse_create, + .atomic_open = fuse_atomic_open, .mknod = fuse_mknod, .permission = fuse_permission, .getattr = fuse_getattr, -- cgit v0.10.2 From d2c127197dfc0b2bae62a52e1e0d3e3ff493919e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:23 +0200 Subject: cifs: implement i_op->atomic_open() Add an ->atomic_open implementation which replaces the atomic lookup+open+create operation implemented via ->lookup and ->create operations. Signed-off-by: Miklos Szeredi CC: Steve French Signed-off-by: Al Viro diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index bcab12c..c0c2751 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -777,6 +777,7 @@ struct file_system_type cifs_fs_type = { }; const struct inode_operations cifs_dir_inode_ops = { .create = cifs_create, + .atomic_open = cifs_atomic_open, .lookup = cifs_lookup, .getattr = cifs_getattr, .unlink = cifs_unlink, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 6536535..3a572bf 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -46,6 +46,9 @@ extern const struct inode_operations cifs_dir_inode_ops; extern struct inode *cifs_root_iget(struct super_block *); extern int cifs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); +extern struct file *cifs_atomic_open(struct inode *, struct dentry *, + struct opendata *, unsigned, umode_t, + bool *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, struct nameidata *); extern int cifs_unlink(struct inode *dir, struct dentry *dentry); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index ec4e9a2..7a3dcd1 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -133,108 +133,141 @@ cifs_bp_rename_retry: return full_path; } +/* + * Don't allow the separator character in a path component. + * The VFS will not allow "/", but "\" is allowed by posix. + */ +static int +check_name(struct dentry *direntry) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); + int i; + + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { + for (i = 0; i < direntry->d_name.len; i++) { + if (direntry->d_name.name[i] == '\\') { + cFYI(1, "Invalid file name"); + return -EINVAL; + } + } + } + return 0; +} + + /* Inode operations in similar order to how they appear in Linux file fs.h */ -int -cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, - struct nameidata *nd) +static int cifs_do_create(struct inode *inode, struct dentry *direntry, + int xid, struct tcon_link *tlink, unsigned oflags, + umode_t mode, __u32 *oplock, __u16 *fileHandle, + bool *created) { int rc = -ENOENT; - int xid; int create_options = CREATE_NOT_DIR; - __u32 oplock = 0; - int oflags; - /* - * BB below access is probably too much for mknod to request - * but we have to do query and setpathinfo so requesting - * less could fail (unless we want to request getatr and setatr - * permissions (only). At least for POSIX we do not have to - * request so much. - */ - int desiredAccess = GENERIC_READ | GENERIC_WRITE; - __u16 fileHandle; - struct cifs_sb_info *cifs_sb; - struct tcon_link *tlink; - struct cifs_tcon *tcon; + int desiredAccess; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_tcon *tcon = tlink_tcon(tlink); char *full_path = NULL; FILE_ALL_INFO *buf = NULL; struct inode *newinode = NULL; - int disposition = FILE_OVERWRITE_IF; - - xid = GetXid(); - - cifs_sb = CIFS_SB(inode->i_sb); - tlink = cifs_sb_tlink(cifs_sb); - if (IS_ERR(tlink)) { - FreeXid(xid); - return PTR_ERR(tlink); - } - tcon = tlink_tcon(tlink); + int disposition; + *oplock = 0; if (tcon->ses->server->oplocks) - oplock = REQ_OPLOCK; - - if (nd) - oflags = nd->intent.open.file->f_flags; - else - oflags = O_RDONLY | O_CREAT; + *oplock = REQ_OPLOCK; full_path = build_path_from_dentry(direntry); if (full_path == NULL) { rc = -ENOMEM; - goto cifs_create_out; + goto out; } if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && + !tcon->broken_posix_open && (CIFS_UNIX_POSIX_PATH_OPS_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))) { rc = cifs_posix_open(full_path, &newinode, - inode->i_sb, mode, oflags, &oplock, &fileHandle, xid); - /* EIO could indicate that (posix open) operation is not - supported, despite what server claimed in capability - negotiation. EREMOTE indicates DFS junction, which is not - handled in posix open */ - - if (rc == 0) { - if (newinode == NULL) /* query inode info */ + inode->i_sb, mode, oflags, oplock, fileHandle, xid); + switch (rc) { + case 0: + if (newinode == NULL) { + /* query inode info */ goto cifs_create_get_file_info; - else /* success, no need to query */ - goto cifs_create_set_dentry; - } else if ((rc != -EIO) && (rc != -EREMOTE) && - (rc != -EOPNOTSUPP) && (rc != -EINVAL)) - goto cifs_create_out; - /* else fallthrough to retry, using older open call, this is - case where server does not support this SMB level, and - falsely claims capability (also get here for DFS case - which should be rare for path not covered on files) */ - } + } - if (nd) { - /* if the file is going to stay open, then we - need to set the desired access properly */ - desiredAccess = 0; - if (OPEN_FMODE(oflags) & FMODE_READ) - desiredAccess |= GENERIC_READ; /* is this too little? */ - if (OPEN_FMODE(oflags) & FMODE_WRITE) - desiredAccess |= GENERIC_WRITE; - - if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) - disposition = FILE_CREATE; - else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) - disposition = FILE_OVERWRITE_IF; - else if ((oflags & O_CREAT) == O_CREAT) - disposition = FILE_OPEN_IF; - else - cFYI(1, "Create flag not set in create function"); + if (!S_ISREG(newinode->i_mode)) { + /* + * The server may allow us to open things like + * FIFOs, but the client isn't set up to deal + * with that. If it's not a regular file, just + * close it and proceed as if it were a normal + * lookup. + */ + CIFSSMBClose(xid, tcon, *fileHandle); + goto cifs_create_get_file_info; + } + /* success, no need to query */ + goto cifs_create_set_dentry; + + case -ENOENT: + goto cifs_create_get_file_info; + + case -EIO: + case -EINVAL: + /* + * EIO could indicate that (posix open) operation is not + * supported, despite what server claimed in capability + * negotiation. + * + * POSIX open in samba versions 3.3.1 and earlier could + * incorrectly fail with invalid parameter. + */ + tcon->broken_posix_open = true; + break; + + case -EREMOTE: + case -EOPNOTSUPP: + /* + * EREMOTE indicates DFS junction, which is not handled + * in posix open. If either that or op not supported + * returned, follow the normal lookup. + */ + break; + + default: + goto out; + } + /* + * fallthrough to retry, using older open call, this is case + * where server does not support this SMB level, and falsely + * claims capability (also get here for DFS case which should be + * rare for path not covered on files) + */ } + desiredAccess = 0; + if (OPEN_FMODE(oflags) & FMODE_READ) + desiredAccess |= GENERIC_READ; /* is this too little? */ + if (OPEN_FMODE(oflags) & FMODE_WRITE) + desiredAccess |= GENERIC_WRITE; + + disposition = FILE_OVERWRITE_IF; + if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + disposition = FILE_CREATE; + else if ((oflags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) + disposition = FILE_OVERWRITE_IF; + else if ((oflags & O_CREAT) == O_CREAT) + disposition = FILE_OPEN_IF; + else + cFYI(1, "Create flag not set in create function"); + /* BB add processing to set equivalent of mode - e.g. via CreateX with ACLs */ buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { rc = -ENOMEM; - goto cifs_create_out; + goto out; } /* @@ -250,7 +283,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, if (tcon->ses->capabilities & CAP_NT_SMBS) rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess, create_options, - &fileHandle, &oplock, buf, cifs_sb->local_nls, + fileHandle, oplock, buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); else rc = -EIO; /* no NT SMB support fall into legacy open below */ @@ -259,17 +292,17 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, /* old server, retry the open legacy style */ rc = SMBLegacyOpen(xid, tcon, full_path, disposition, desiredAccess, create_options, - &fileHandle, &oplock, buf, cifs_sb->local_nls, + fileHandle, oplock, buf, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); } if (rc) { cFYI(1, "cifs_create returned 0x%x", rc); - goto cifs_create_out; + goto out; } /* If Open reported that we actually created a file then we now have to set the mode if possible */ - if ((tcon->unix_ext) && (oplock & CIFS_CREATE_ACTION)) { + if ((tcon->unix_ext) && (*oplock & CIFS_CREATE_ACTION)) { struct cifs_unix_set_info_args args = { .mode = mode, .ctime = NO_CHANGE_64, @@ -278,6 +311,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, .device = 0, }; + *created = true; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { args.uid = (__u64) current_fsuid(); if (inode->i_mode & S_ISGID) @@ -288,7 +322,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, args.uid = NO_CHANGE_64; args.gid = NO_CHANGE_64; } - CIFSSMBUnixSetFileInfo(xid, tcon, &args, fileHandle, + CIFSSMBUnixSetFileInfo(xid, tcon, &args, *fileHandle, current->tgid); } else { /* BB implement mode setting via Windows security @@ -305,11 +339,11 @@ cifs_create_get_file_info: inode->i_sb, xid); else { rc = cifs_get_inode_info(&newinode, full_path, buf, - inode->i_sb, xid, &fileHandle); + inode->i_sb, xid, fileHandle); if (newinode) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM) newinode->i_mode = mode; - if ((oplock & CIFS_CREATE_ACTION) && + if ((*oplock & CIFS_CREATE_ACTION) && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)) { newinode->i_uid = current_fsuid(); if (inode->i_mode & S_ISGID) @@ -321,37 +355,139 @@ cifs_create_get_file_info: } cifs_create_set_dentry: - if (rc == 0) - d_instantiate(direntry, newinode); - else + if (rc != 0) { cFYI(1, "Create worked, get_inode_info failed rc = %d", rc); + goto out; + } + d_drop(direntry); + d_add(direntry, newinode); - if (newinode && nd) { - struct cifsFileInfo *pfile_info; - struct file *filp; + /* ENOENT for create? How weird... */ + rc = -ENOENT; + if (!newinode) { + CIFSSMBClose(xid, tcon, *fileHandle); + goto out; + } + rc = 0; - filp = lookup_instantiate_filp(nd, direntry, generic_file_open); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); - CIFSSMBClose(xid, tcon, fileHandle); - goto cifs_create_out; - } +out: + kfree(buf); + kfree(full_path); + return rc; +} - pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock); - if (pfile_info == NULL) { - fput(filp); - CIFSSMBClose(xid, tcon, fileHandle); - rc = -ENOMEM; - } - } else { +struct file * +cifs_atomic_open(struct inode *inode, struct dentry *direntry, + struct opendata *od, unsigned oflags, umode_t mode, + bool *created) +{ + int rc; + int xid; + struct tcon_link *tlink; + struct cifs_tcon *tcon; + __u16 fileHandle; + __u32 oplock; + struct file *filp; + struct cifsFileInfo *pfile_info; + + /* Posix open is only called (at lookup time) for file create now. For + * opens (rather than creates), because we do not know if it is a file + * or directory yet, and current Samba no longer allows us to do posix + * open on dirs, we could end up wasting an open call on what turns out + * to be a dir. For file opens, we wait to call posix open till + * cifs_open. It could be added to atomic_open in the future but the + * performance tradeoff of the extra network request when EISDIR or + * EACCES is returned would have to be weighed against the 50% reduction + * in network traffic in the other paths. + */ + if (!(oflags & O_CREAT)) { + struct dentry *res = cifs_lookup(inode, direntry, NULL); + if (IS_ERR(res)) + return ERR_CAST(res); + + finish_no_open(od, res); + return NULL; + } + + rc = check_name(direntry); + if (rc) + return ERR_PTR(rc); + + xid = GetXid(); + + cFYI(1, "parent inode = 0x%p name is: %s and dentry = 0x%p", + inode, direntry->d_name.name, direntry); + + tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); + filp = ERR_CAST(tlink); + if (IS_ERR(tlink)) + goto free_xid; + + tcon = tlink_tcon(tlink); + + rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, + &oplock, &fileHandle, created); + + if (rc) { + filp = ERR_PTR(rc); + goto out; + } + + filp = finish_open(od, direntry, generic_file_open); + if (IS_ERR(filp)) { CIFSSMBClose(xid, tcon, fileHandle); + goto out; } -cifs_create_out: - kfree(buf); - kfree(full_path); + pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock); + if (pfile_info == NULL) { + CIFSSMBClose(xid, tcon, fileHandle); + fput(filp); + filp = ERR_PTR(-ENOMEM); + } + +out: + cifs_put_tlink(tlink); +free_xid: + FreeXid(xid); + return filp; +} + +int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, + struct nameidata *nd) +{ + int rc; + int xid = GetXid(); + /* + * BB below access is probably too much for mknod to request + * but we have to do query and setpathinfo so requesting + * less could fail (unless we want to request getatr and setatr + * permissions (only). At least for POSIX we do not have to + * request so much. + */ + unsigned oflags = O_EXCL | O_CREAT | O_RDWR; + struct tcon_link *tlink; + __u16 fileHandle; + __u32 oplock; + bool created = true; + + cFYI(1, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p", + inode, direntry->d_name.name, direntry); + + tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb)); + rc = PTR_ERR(tlink); + if (IS_ERR(tlink)) + goto free_xid; + + rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, + &oplock, &fileHandle, &created); + if (!rc) + CIFSSMBClose(xid, tlink_tcon(tlink), fileHandle); + cifs_put_tlink(tlink); +free_xid: FreeXid(xid); + return rc; } @@ -492,16 +628,11 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ - __u32 oplock; - __u16 fileHandle = 0; - bool posix_open = false; struct cifs_sb_info *cifs_sb; struct tcon_link *tlink; struct cifs_tcon *pTcon; - struct cifsFileInfo *cfile; struct inode *newInode = NULL; char *full_path = NULL; - struct file *filp; xid = GetXid(); @@ -518,31 +649,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } pTcon = tlink_tcon(tlink); - oplock = pTcon->ses->server->oplocks ? REQ_OPLOCK : 0; - - /* - * Don't allow the separator character in a path component. - * The VFS will not allow "/", but "\" is allowed by posix. - */ - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) { - int i; - for (i = 0; i < direntry->d_name.len; i++) - if (direntry->d_name.name[i] == '\\') { - cFYI(1, "Invalid file name"); - rc = -EINVAL; - goto lookup_out; - } - } - - /* - * O_EXCL: optimize away the lookup, but don't hash the dentry. Let - * the VFS handle the create. - */ - if (nd && (nd->flags & LOOKUP_EXCL)) { - d_instantiate(direntry, NULL); - rc = 0; + rc = check_name(direntry); + if (rc) goto lookup_out; - } /* can not grab the rename sem here since it would deadlock in the cases (beginning of sys_rename itself) @@ -560,80 +669,16 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, } cFYI(1, "Full path: %s inode = 0x%p", full_path, direntry->d_inode); - /* Posix open is only called (at lookup time) for file create now. - * For opens (rather than creates), because we do not know if it - * is a file or directory yet, and current Samba no longer allows - * us to do posix open on dirs, we could end up wasting an open call - * on what turns out to be a dir. For file opens, we wait to call posix - * open till cifs_open. It could be added here (lookup) in the future - * but the performance tradeoff of the extra network request when EISDIR - * or EACCES is returned would have to be weighed against the 50% - * reduction in network traffic in the other paths. - */ if (pTcon->unix_ext) { - if (nd && !(nd->flags & LOOKUP_DIRECTORY) && - (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && - (nd->intent.open.file->f_flags & O_CREAT)) { - rc = cifs_posix_open(full_path, &newInode, - parent_dir_inode->i_sb, - nd->intent.open.create_mode, - nd->intent.open.file->f_flags, &oplock, - &fileHandle, xid); - /* - * The check below works around a bug in POSIX - * open in samba versions 3.3.1 and earlier where - * open could incorrectly fail with invalid parameter. - * If either that or op not supported returned, follow - * the normal lookup. - */ - switch (rc) { - case 0: - /* - * The server may allow us to open things like - * FIFOs, but the client isn't set up to deal - * with that. If it's not a regular file, just - * close it and proceed as if it were a normal - * lookup. - */ - if (newInode && !S_ISREG(newInode->i_mode)) { - CIFSSMBClose(xid, pTcon, fileHandle); - break; - } - case -ENOENT: - posix_open = true; - case -EOPNOTSUPP: - break; - default: - pTcon->broken_posix_open = true; - } - } - if (!posix_open) - rc = cifs_get_inode_info_unix(&newInode, full_path, - parent_dir_inode->i_sb, xid); - } else + rc = cifs_get_inode_info_unix(&newInode, full_path, + parent_dir_inode->i_sb, xid); + } else { rc = cifs_get_inode_info(&newInode, full_path, NULL, parent_dir_inode->i_sb, xid, NULL); + } if ((rc == 0) && (newInode != NULL)) { d_add(direntry, newInode); - if (posix_open) { - filp = lookup_instantiate_filp(nd, direntry, - generic_file_open); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); - CIFSSMBClose(xid, pTcon, fileHandle); - goto lookup_out; - } - - cfile = cifs_new_fileinfo(fileHandle, filp, tlink, - oplock); - if (cfile == NULL) { - fput(filp); - CIFSSMBClose(xid, pTcon, fileHandle); - rc = -ENOMEM; - goto lookup_out; - } - } /* since paths are not looked up by component - the parent directories are presumed to be good here */ renew_parental_timestamps(direntry); -- cgit v0.10.2 From 3819219b592159725069eb16a7a46f58e4ecef32 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:24 +0200 Subject: ceph: remove unused arg from ceph_lookup_open() What was the purpose of this? Signed-off-by: Miklos Szeredi CC: Sage Weil Signed-off-by: Al Viro diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 3e8094b..c4b7832 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -599,7 +599,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, (nd->flags & LOOKUP_OPEN) && !(nd->intent.open.flags & O_CREAT)) { int mode = nd->intent.open.create_mode & ~current->fs->umask; - return ceph_lookup_open(dir, dentry, nd, mode, 1); + return ceph_lookup_open(dir, dentry, nd, mode); } /* can we conclude ENOENT locally? */ @@ -710,7 +710,7 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (nd) { BUG_ON((nd->flags & LOOKUP_OPEN) == 0); - dentry = ceph_lookup_open(dir, dentry, nd, mode, 0); + dentry = ceph_lookup_open(dir, dentry, nd, mode); /* hrm, what should i do here if we get aliased? */ if (IS_ERR(dentry)) return PTR_ERR(dentry); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 988d4f3..4bf9773 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -219,8 +219,7 @@ out: * path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE */ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct nameidata *nd, int mode, - int locked_dir) + struct nameidata *nd, int mode) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index fc35036..8471db9 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -807,8 +807,7 @@ extern int ceph_copy_from_page_vector(struct page **pages, extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_open(struct inode *inode, struct file *file); extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct nameidata *nd, int mode, - int locked_dir); + struct nameidata *nd, int mode); extern int ceph_release(struct inode *inode, struct file *filp); /* dir.c */ -- cgit v0.10.2 From 2d83bde9a16e18eafdc73a3a1f4a8eb110e49672 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:25 +0200 Subject: ceph: implement i_op->atomic_open() Add an ->atomic_open implementation which replaces the atomic lookup+open+create operation implemented via ->lookup and ->create operations. Signed-off-by: Miklos Szeredi CC: Sage Weil Signed-off-by: Al Viro diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index c4b7832..75df600 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -594,14 +594,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, if (err < 0) return ERR_PTR(err); - /* open (but not create!) intent? */ - if (nd && - (nd->flags & LOOKUP_OPEN) && - !(nd->intent.open.flags & O_CREAT)) { - int mode = nd->intent.open.create_mode & ~current->fs->umask; - return ceph_lookup_open(dir, dentry, nd, mode); - } - /* can we conclude ENOENT locally? */ if (dentry->d_inode == NULL) { struct ceph_inode_info *ci = ceph_inode(dir); @@ -642,6 +634,47 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, return dentry; } +struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t mode, + bool *created) +{ + int err; + struct dentry *res = NULL; + struct file *filp; + + if (!(flags & O_CREAT)) { + if (dentry->d_name.len > NAME_MAX) + return ERR_PTR(-ENAMETOOLONG); + + err = ceph_init_dentry(dentry); + if (err < 0) + return ERR_PTR(err); + + return ceph_lookup_open(dir, dentry, od, flags, mode); + } + + if (d_unhashed(dentry)) { + res = ceph_lookup(dir, dentry, NULL); + if (IS_ERR(res)) + return ERR_CAST(res); + + if (res) + dentry = res; + } + + /* We don't deal with positive dentries here */ + if (dentry->d_inode) { + finish_no_open(od, res); + return NULL; + } + + *created = true; + filp = ceph_lookup_open(dir, dentry, od, flags, mode); + dput(res); + + return filp; +} + /* * If we do a create but get no trace back from the MDS, follow up with * a lookup (the VFS expects us to link up the provided dentry). @@ -702,23 +735,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) { - dout("create in dir %p dentry %p name '%.*s'\n", - dir, dentry, dentry->d_name.len, dentry->d_name.name); - - if (ceph_snap(dir) != CEPH_NOSNAP) - return -EROFS; - - if (nd) { - BUG_ON((nd->flags & LOOKUP_OPEN) == 0); - dentry = ceph_lookup_open(dir, dentry, nd, mode); - /* hrm, what should i do here if we get aliased? */ - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - return 0; - } - - /* fall back to mknod */ - return ceph_mknod(dir, dentry, (mode & ~S_IFMT) | S_IFREG, 0); + return ceph_mknod(dir, dentry, mode, 0); } static int ceph_symlink(struct inode *dir, struct dentry *dentry, @@ -1357,6 +1374,7 @@ const struct inode_operations ceph_dir_iops = { .rmdir = ceph_unlink, .rename = ceph_rename, .create = ceph_create, + .atomic_open = ceph_atomic_open, }; const struct dentry_operations ceph_dentry_ops = { diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4bf9773..e34dc22 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -213,21 +213,15 @@ out: * may_open() fails, the struct *file gets cleaned up (i.e. * ceph_release gets called). So fear not! */ -/* - * flags - * path_lookup_open -> LOOKUP_OPEN - * path_lookup_create -> LOOKUP_OPEN|LOOKUP_CREATE - */ -struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct nameidata *nd, int mode) +struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t mode) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; - struct file *file; + struct file *file = NULL; struct ceph_mds_request *req; struct dentry *ret; int err; - int flags = nd->intent.open.flags; dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n", dentry, dentry->d_name.len, dentry->d_name.name, flags, mode); @@ -253,14 +247,19 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, err = ceph_handle_notrace_create(dir, dentry); if (err) goto out; - file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open); + file = finish_open(od, req->r_dentry, ceph_open); if (IS_ERR(file)) err = PTR_ERR(file); out: ret = ceph_finish_lookup(req, dentry, err); ceph_mdsc_put_request(req); dout("ceph_lookup_open result=%p\n", ret); - return ret; + + if (IS_ERR(ret)) + return ERR_CAST(ret); + + dput(ret); + return err ? ERR_PTR(err) : file; } int ceph_release(struct inode *inode, struct file *file) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 8471db9..e61e546 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -806,8 +806,9 @@ extern int ceph_copy_from_page_vector(struct page **pages, loff_t off, size_t len); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_open(struct inode *inode, struct file *file); -extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct nameidata *nd, int mode); +extern struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, + umode_t mode); extern int ceph_release(struct inode *inode, struct file *filp); /* dir.c */ -- cgit v0.10.2 From e43ae79c540270865918dab5ac914c74f43101e2 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:26 +0200 Subject: 9p: implement i_op->atomic_open() Add an ->atomic_open implementation which replaces the atomic open+create operation implemented via ->create. No functionality is changed. Signed-off-by: Miklos Szeredi CC: Eric Van Hensbergen Signed-off-by: Al Viro diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 57ccb75..e8c42ce 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -712,11 +712,14 @@ error: } /** - * v9fs_vfs_create - VFS hook to create files + * v9fs_vfs_create - VFS hook to create a regular file + * + * open(.., O_CREAT) is handled in v9fs_vfs_atomic_open(). This is only called + * for mknod(2). + * * @dir: directory inode that is being created * @dentry: dentry that is being deleted * @mode: create permissions - * @nd: path information * */ @@ -724,76 +727,19 @@ static int v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) { - int err; - u32 perm; - int flags; - struct file *filp; - struct v9fs_inode *v9inode; - struct v9fs_session_info *v9ses; - struct p9_fid *fid, *inode_fid; - - err = 0; - fid = NULL; - v9ses = v9fs_inode2v9ses(dir); - perm = unixmode2p9mode(v9ses, mode); - if (nd) - flags = nd->intent.open.flags; - else - flags = O_RDWR; + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); + u32 perm = unixmode2p9mode(v9ses, mode); + struct p9_fid *fid; - fid = v9fs_create(v9ses, dir, dentry, NULL, perm, - v9fs_uflags2omode(flags, - v9fs_proto_dotu(v9ses))); - if (IS_ERR(fid)) { - err = PTR_ERR(fid); - fid = NULL; - goto error; - } + /* P9_OEXCL? */ + fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_ORDWR); + if (IS_ERR(fid)) + return PTR_ERR(fid); v9fs_invalidate_inode_attr(dir); - /* if we are opening a file, assign the open fid to the file */ - if (nd) { - v9inode = V9FS_I(dentry->d_inode); - mutex_lock(&v9inode->v_mutex); - if (v9ses->cache && !v9inode->writeback_fid && - ((flags & O_ACCMODE) != O_RDONLY)) { - /* - * clone a fid and add it to writeback_fid - * we do it during open time instead of - * page dirty time via write_begin/page_mkwrite - * because we want write after unlink usecase - * to work. - */ - inode_fid = v9fs_writeback_fid(dentry); - if (IS_ERR(inode_fid)) { - err = PTR_ERR(inode_fid); - mutex_unlock(&v9inode->v_mutex); - goto error; - } - v9inode->writeback_fid = (void *) inode_fid; - } - mutex_unlock(&v9inode->v_mutex); - filp = lookup_instantiate_filp(nd, dentry, generic_file_open); - if (IS_ERR(filp)) { - err = PTR_ERR(filp); - goto error; - } - - filp->private_data = fid; -#ifdef CONFIG_9P_FSCACHE - if (v9ses->cache) - v9fs_cache_inode_set_cookie(dentry->d_inode, filp); -#endif - } else - p9_client_clunk(fid); + p9_client_clunk(fid); return 0; - -error: - if (fid) - p9_client_clunk(fid); - - return err; } /** @@ -910,6 +856,93 @@ error: return ERR_PTR(result); } +static struct file * +v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t mode, + bool *created) +{ + int err; + u32 perm; + struct file *filp; + struct v9fs_inode *v9inode; + struct v9fs_session_info *v9ses; + struct p9_fid *fid, *inode_fid; + struct dentry *res = NULL; + + if (d_unhashed(dentry)) { + res = v9fs_vfs_lookup(dir, dentry, NULL); + if (IS_ERR(res)) + return ERR_CAST(res); + + if (res) + dentry = res; + } + + /* Only creates */ + if (!(flags & O_CREAT) || dentry->d_inode) { + finish_no_open(od, res); + return NULL; + } + + err = 0; + fid = NULL; + v9ses = v9fs_inode2v9ses(dir); + perm = unixmode2p9mode(v9ses, mode); + fid = v9fs_create(v9ses, dir, dentry, NULL, perm, + v9fs_uflags2omode(flags, + v9fs_proto_dotu(v9ses))); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + fid = NULL; + goto error; + } + + v9fs_invalidate_inode_attr(dir); + v9inode = V9FS_I(dentry->d_inode); + mutex_lock(&v9inode->v_mutex); + if (v9ses->cache && !v9inode->writeback_fid && + ((flags & O_ACCMODE) != O_RDONLY)) { + /* + * clone a fid and add it to writeback_fid + * we do it during open time instead of + * page dirty time via write_begin/page_mkwrite + * because we want write after unlink usecase + * to work. + */ + inode_fid = v9fs_writeback_fid(dentry); + if (IS_ERR(inode_fid)) { + err = PTR_ERR(inode_fid); + mutex_unlock(&v9inode->v_mutex); + goto error; + } + v9inode->writeback_fid = (void *) inode_fid; + } + mutex_unlock(&v9inode->v_mutex); + filp = finish_open(od, dentry, generic_file_open); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto error; + } + + filp->private_data = fid; +#ifdef CONFIG_9P_FSCACHE + if (v9ses->cache) + v9fs_cache_inode_set_cookie(dentry->d_inode, filp); +#endif + + *created = true; +out: + dput(res); + return filp; + +error: + if (fid) + p9_client_clunk(fid); + + filp = ERR_PTR(err); + goto out; +} + /** * v9fs_vfs_unlink - VFS unlink hook to delete an inode * @i: inode that is being unlinked @@ -1488,6 +1521,7 @@ out: static const struct inode_operations v9fs_dir_inode_operations_dotu = { .create = v9fs_vfs_create, .lookup = v9fs_vfs_lookup, + .atomic_open = v9fs_vfs_atomic_open, .symlink = v9fs_vfs_symlink, .link = v9fs_vfs_link, .unlink = v9fs_vfs_unlink, @@ -1502,6 +1536,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotu = { static const struct inode_operations v9fs_dir_inode_operations = { .create = v9fs_vfs_create, .lookup = v9fs_vfs_lookup, + .atomic_open = v9fs_vfs_atomic_open, .unlink = v9fs_vfs_unlink, .mkdir = v9fs_vfs_mkdir, .rmdir = v9fs_vfs_rmdir, diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index e3dd2a1..a354fe2 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -230,7 +230,6 @@ int v9fs_open_to_dotl_flags(int flags) * @dir: directory inode that is being created * @dentry: dentry that is being deleted * @mode: create permissions - * @nd: path information * */ @@ -238,9 +237,16 @@ static int v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, struct nameidata *nd) { + return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0); +} + +static struct file * +v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t omode, + bool *created) +{ int err = 0; gid_t gid; - int flags; umode_t mode; char *name = NULL; struct file *filp; @@ -251,19 +257,25 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, struct p9_fid *dfid, *ofid, *inode_fid; struct v9fs_session_info *v9ses; struct posix_acl *pacl = NULL, *dacl = NULL; + struct dentry *res = NULL; - v9ses = v9fs_inode2v9ses(dir); - if (nd) - flags = nd->intent.open.flags; - else { - /* - * create call without LOOKUP_OPEN is due - * to mknod of regular files. So use mknod - * operation. - */ - return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0); + if (d_unhashed(dentry)) { + res = v9fs_vfs_lookup(dir, dentry, NULL); + if (IS_ERR(res)) + return ERR_CAST(res); + + if (res) + dentry = res; + } + + /* Only creates */ + if (!(flags & O_CREAT) || dentry->d_inode) { + finish_no_open(od, res); + return NULL; } + v9ses = v9fs_inode2v9ses(dir); + name = (char *) dentry->d_name.name; p9_debug(P9_DEBUG_VFS, "name:%s flags:0x%x mode:0x%hx\n", name, flags, omode); @@ -272,7 +284,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); - return err; + goto err_return; } /* clone a fid to use for creation */ @@ -280,7 +292,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, if (IS_ERR(ofid)) { err = PTR_ERR(ofid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); - return err; + goto err_return; } gid = v9fs_get_fsgid_for_create(dir); @@ -345,7 +357,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, } mutex_unlock(&v9inode->v_mutex); /* Since we are opening a file, assign the open fid to the file */ - filp = lookup_instantiate_filp(nd, dentry, generic_file_open); + filp = finish_open(od, dentry, generic_file_open); if (IS_ERR(filp)) { err = PTR_ERR(filp); goto err_clunk_old_fid; @@ -355,7 +367,10 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, if (v9ses->cache) v9fs_cache_inode_set_cookie(inode, filp); #endif - return 0; + *created = true; +out: + dput(res); + return filp; error: if (fid) @@ -364,7 +379,9 @@ err_clunk_old_fid: if (ofid) p9_client_clunk(ofid); v9fs_set_create_acl(NULL, &dacl, &pacl); - return err; +err_return: + filp = ERR_PTR(err); + goto out; } /** @@ -982,6 +999,7 @@ out: const struct inode_operations v9fs_dir_inode_operations_dotl = { .create = v9fs_vfs_create_dotl, + .atomic_open = v9fs_vfs_atomic_open_dotl, .lookup = v9fs_vfs_lookup, .link = v9fs_vfs_link_dotl, .symlink = v9fs_vfs_symlink_dotl, -- cgit v0.10.2 From 015c3bbcd88df2305aae5b017a9c91c08b380aa1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:27 +0200 Subject: vfs: remove open intents from nameidata All users of open intents have been converted to use ->atomic_{open,create}. This patch gets rid of nd->intent.open and related infrastructure. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/internal.h b/fs/internal.h index 7006777..ae69a3b 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -82,13 +82,10 @@ extern struct super_block *user_get_super(dev_t); /* * open.c */ -struct nameidata; -extern struct file *nameidata_to_filp(struct nameidata *); -extern void release_open_intent(struct nameidata *); struct opendata { struct dentry *dentry; struct vfsmount *mnt; - struct file **filp; + struct file *filp; }; struct open_flags { int open_flag; diff --git a/fs/namei.c b/fs/namei.c index 9e11ae8..0ed8762 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -463,22 +463,6 @@ err_root: return -ECHILD; } -/** - * release_open_intent - free up open intent resources - * @nd: pointer to nameidata - */ -void release_open_intent(struct nameidata *nd) -{ - struct file *file = nd->intent.open.file; - - if (file && !IS_ERR(file)) { - if (file->f_path.dentry == NULL) - put_filp(file); - else - fput(file); - } -} - static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd) { return dentry->d_op->d_revalidate(dentry, nd); @@ -2210,7 +2194,8 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) } static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, - struct path *path, const struct open_flags *op, + struct path *path, struct opendata *od, + const struct open_flags *op, int *want_write, bool need_lookup, bool *created) { @@ -2219,7 +2204,6 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, umode_t mode; int error; int acc_mode; - struct opendata od; struct file *filp; int create_error = 0; struct dentry *const DENTRY_NOT_SET = (void *) -1UL; @@ -2285,14 +2269,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (nd->flags & LOOKUP_DIRECTORY) open_flag |= O_DIRECTORY; - od.dentry = DENTRY_NOT_SET; - od.mnt = nd->path.mnt; - od.filp = &nd->intent.open.file; - filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode, + od->dentry = DENTRY_NOT_SET; + od->mnt = nd->path.mnt; + filp = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, created); if (IS_ERR(filp)) { - if (WARN_ON(od.dentry != DENTRY_NOT_SET)) - dput(od.dentry); + if (WARN_ON(od->dentry != DENTRY_NOT_SET)) + dput(od->dentry); if (create_error && PTR_ERR(filp) == -ENOENT) filp = ERR_PTR(create_error); @@ -2306,13 +2289,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, } if (!filp) { - if (WARN_ON(od.dentry == DENTRY_NOT_SET)) { + if (WARN_ON(od->dentry == DENTRY_NOT_SET)) { filp = ERR_PTR(-EIO); goto out; } - if (od.dentry) { + if (od->dentry) { dput(dentry); - dentry = od.dentry; + dentry = od->dentry; } goto looked_up; } @@ -2375,6 +2358,7 @@ looked_up: * was performed, only lookup. */ static struct file *lookup_open(struct nameidata *nd, struct path *path, + struct opendata *od, const struct open_flags *op, int *want_write, bool *created) { @@ -2394,7 +2378,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, goto out_no_open; if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { - return atomic_open(nd, dentry, path, op, want_write, + return atomic_open(nd, dentry, path, od, op, want_write, need_lookup, created); } @@ -2416,7 +2400,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, * rw->ro transition does not occur between * the time when the file is created and when * a permanent write count is taken through - * the 'struct file' in nameidata_to_filp(). + * the 'struct file' in finish_open(). */ error = mnt_want_write(nd->path.mnt); if (error) @@ -2444,7 +2428,8 @@ out_dput: * Handle the last step of open() */ static struct file *do_last(struct nameidata *nd, struct path *path, - const struct open_flags *op, const char *pathname) + struct opendata *od, const struct open_flags *op, + const char *pathname) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; @@ -2521,7 +2506,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, retry_lookup: mutex_lock(&dir->d_inode->i_mutex); - filp = lookup_open(nd, path, op, &want_write, &created); + filp = lookup_open(nd, path, od, op, &want_write, &created); mutex_unlock(&dir->d_inode->i_mutex); if (filp) { @@ -2627,7 +2612,8 @@ common: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto exit; - filp = nameidata_to_filp(nd); + od->mnt = nd->path.mnt; + filp = finish_open(od, nd->path.dentry, NULL); if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) { BUG_ON(save_parent.dentry != dir); path_put(&nd->path); @@ -2642,6 +2628,11 @@ common: retried = true; goto retry_lookup; } + if (IS_ERR(filp)) + goto out; + error = open_check_o_direct(filp); + if (error) + goto exit_fput; opened: if (!IS_ERR(filp)) { error = ima_file_check(filp, op->acc_mode); @@ -2671,24 +2662,26 @@ exit_dput: exit: filp = ERR_PTR(error); goto out; +exit_fput: + fput(filp); + goto exit; + } static struct file *path_openat(int dfd, const char *pathname, struct nameidata *nd, const struct open_flags *op, int flags) { struct file *base = NULL; - struct file *filp; + struct opendata od; + struct file *res; struct path path; int error; - filp = get_empty_filp(); - if (!filp) + od.filp = get_empty_filp(); + if (!od.filp) return ERR_PTR(-ENFILE); - filp->f_flags = op->open_flag; - nd->intent.open.file = filp; - nd->intent.open.flags = open_to_namei_flags(op->open_flag); - nd->intent.open.create_mode = op->mode; + od.filp->f_flags = op->open_flag; error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); if (unlikely(error)) @@ -2699,14 +2692,14 @@ static struct file *path_openat(int dfd, const char *pathname, if (unlikely(error)) goto out_filp; - filp = do_last(nd, &path, op, pathname); - while (unlikely(!filp)) { /* trailing symlink */ + res = do_last(nd, &path, &od, op, pathname); + while (unlikely(!res)) { /* trailing symlink */ struct path link = path; void *cookie; if (!(nd->flags & LOOKUP_FOLLOW)) { path_put_conditional(&path, nd); path_put(&nd->path); - filp = ERR_PTR(-ELOOP); + res = ERR_PTR(-ELOOP); break; } nd->flags |= LOOKUP_PARENT; @@ -2714,7 +2707,7 @@ static struct file *path_openat(int dfd, const char *pathname, error = follow_link(&link, nd, &cookie); if (unlikely(error)) goto out_filp; - filp = do_last(nd, &path, op, pathname); + res = do_last(nd, &path, &od, op, pathname); put_link(nd, &link, cookie); } out: @@ -2722,17 +2715,20 @@ out: path_put(&nd->root); if (base) fput(base); - release_open_intent(nd); - if (filp == ERR_PTR(-EOPENSTALE)) { + if (od.filp) { + BUG_ON(od.filp->f_path.dentry); + put_filp(od.filp); + } + if (res == ERR_PTR(-EOPENSTALE)) { if (flags & LOOKUP_RCU) - filp = ERR_PTR(-ECHILD); + res = ERR_PTR(-ECHILD); else - filp = ERR_PTR(-ESTALE); + res = ERR_PTR(-ESTALE); } - return filp; + return res; out_filp: - filp = ERR_PTR(error); + res = ERR_PTR(error); goto out; } @@ -2788,7 +2784,6 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path goto out; nd.flags &= ~LOOKUP_PARENT; nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL; - nd.intent.open.flags = O_EXCL; /* * Do the final lookup. diff --git a/fs/open.c b/fs/open.c index 13bece4..937f4ec 100644 --- a/fs/open.c +++ b/fs/open.c @@ -771,46 +771,6 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, } /** - * lookup_instantiate_filp - instantiates the open intent filp - * @nd: pointer to nameidata - * @dentry: pointer to dentry - * @open: open callback - * - * Helper for filesystems that want to use lookup open intents and pass back - * a fully instantiated struct file to the caller. - * This function is meant to be called from within a filesystem's - * lookup method. - * Beware of calling it for non-regular files! Those ->open methods might block - * (e.g. in fifo_open), leaving you with parent locked (and in case of fifo, - * leading to a deadlock, as nobody can open that fifo anymore, because - * another process to open fifo will block on locked parent when doing lookup). - * Note that in case of error, nd->intent.open.file is destroyed, but the - * path information remains valid. - * If the open callback is set to NULL, then the standard f_op->open() - * filesystem callback is substituted. - */ -struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, - int (*open)(struct inode *, struct file *)) -{ - const struct cred *cred = current_cred(); - - if (IS_ERR(nd->intent.open.file)) - goto out; - if (IS_ERR(dentry)) - goto out_err; - nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt), - nd->intent.open.file, - open, cred); -out: - return nd->intent.open.file; -out_err: - release_open_intent(nd); - nd->intent.open.file = ERR_CAST(dentry); - goto out; -} -EXPORT_SYMBOL_GPL(lookup_instantiate_filp); - -/** * finish_open - finish opening a file * @od: opaque open data * @dentry: pointer to dentry @@ -829,9 +789,9 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry, mntget(od->mnt); dget(dentry); - res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred()); + res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred()); if (!IS_ERR(res)) - *od->filp = NULL; + od->filp = NULL; return res; } @@ -852,49 +812,6 @@ void finish_no_open(struct opendata *od, struct dentry *dentry) } EXPORT_SYMBOL(finish_no_open); -/** - * nameidata_to_filp - convert a nameidata to an open filp. - * @nd: pointer to nameidata - * @flags: open flags - * - * Note that this function destroys the original nameidata - */ -struct file *nameidata_to_filp(struct nameidata *nd) -{ - const struct cred *cred = current_cred(); - struct file *filp; - - /* Pick up the filp from the open intent */ - filp = nd->intent.open.file; - - /* Has the filesystem initialised the file for us? */ - if (filp->f_path.dentry != NULL) { - nd->intent.open.file = NULL; - } else { - struct file *res; - - path_get(&nd->path); - res = do_dentry_open(nd->path.dentry, nd->path.mnt, - filp, NULL, cred); - if (!IS_ERR(res)) { - int error; - - nd->intent.open.file = NULL; - BUG_ON(res != filp); - - error = open_check_o_direct(filp); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } - } else { - /* Allow nd->intent.open.file to be recycled */ - filp = res; - } - } - return filp; -} - /* * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an * error. diff --git a/include/linux/namei.h b/include/linux/namei.h index ffc0213..23d8598 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -7,12 +7,6 @@ struct vfsmount; -struct open_intent { - int flags; - int create_mode; - struct file *file; -}; - enum { MAX_NESTED_LINKS = 8 }; struct nameidata { @@ -25,11 +19,6 @@ struct nameidata { int last_type; unsigned depth; char *saved_names[MAX_NESTED_LINKS + 1]; - - /* Intent data */ - union { - struct open_intent open; - } intent; }; /* @@ -82,9 +71,6 @@ extern int kern_path_parent(const char *, struct nameidata *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); -extern struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry, - int (*open)(struct inode *, struct file *)); - extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern int follow_down_one(struct path *); -- cgit v0.10.2 From aa4caadb70b782999ce5d150ac2f4b1d18e2fc75 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:28 +0200 Subject: vfs: do_last(): clean up error handling Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 0ed8762..044215a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2634,21 +2634,14 @@ common: if (error) goto exit_fput; opened: - if (!IS_ERR(filp)) { - error = ima_file_check(filp, op->acc_mode); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } - } - if (!IS_ERR(filp)) { - if (will_truncate) { - error = handle_truncate(filp); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } - } + error = ima_file_check(filp, op->acc_mode); + if (error) + goto exit_fput; + + if (will_truncate) { + error = handle_truncate(filp); + if (error) + goto exit_fput; } out: if (want_write) -- cgit v0.10.2 From e83db167229702da0f48957641e0dbf36b2644aa Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:29 +0200 Subject: vfs: do_last(): clean up labels Reported-by: David Howells Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 044215a..ea24376 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2463,13 +2463,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, error = -EISDIR; goto exit; } - goto ok; + goto finish_open; case LAST_BIND: error = complete_walk(nd); if (error) return ERR_PTR(error); audit_inode(pathname, dir); - goto ok; + goto finish_open; } if (!(open_flag & O_CREAT)) { @@ -2526,7 +2526,7 @@ retry_lookup: will_truncate = 0; acc_mode = MAY_OPEN; path_to_nameidata(path, nd); - goto common; + goto finish_open_created; } /* @@ -2598,7 +2598,7 @@ finish_lookup: if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) goto exit; audit_inode(pathname, nd->path.dentry); -ok: +finish_open: if (!S_ISREG(nd->inode->i_mode)) will_truncate = 0; @@ -2608,7 +2608,7 @@ ok: goto exit; want_write = 1; } -common: +finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto exit; -- cgit v0.10.2 From 77d660a8a83036432dc33f092a367d06563d233e Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:30 +0200 Subject: vfs: do_last(): clean up bool Consistently use bool for boolean values in do_last(). Reported-by: David Howells Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index ea24376..6bdb8d7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2196,7 +2196,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct opendata *od, const struct open_flags *op, - int *want_write, bool need_lookup, + bool *want_write, bool need_lookup, bool *created) { struct inode *dir = nd->path.dentry->d_inode; @@ -2238,7 +2238,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, (open_flag & O_ACCMODE) != O_RDONLY) { error = mnt_want_write(nd->path.mnt); if (!error) { - *want_write = 1; + *want_write = true; } else if (!(open_flag & O_CREAT)) { /* * No O_CREATE -> atomicity not a requirement -> fall @@ -2360,7 +2360,7 @@ looked_up: static struct file *lookup_open(struct nameidata *nd, struct path *path, struct opendata *od, const struct open_flags *op, - int *want_write, bool *created) + bool *want_write, bool *created) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -2405,7 +2405,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, error = mnt_want_write(nd->path.mnt); if (error) goto out_dput; - *want_write = 1; + *want_write = true; *created = true; error = security_path_mknod(&nd->path, dentry, mode, 0); if (error) @@ -2433,13 +2433,13 @@ static struct file *do_last(struct nameidata *nd, struct path *path, { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; - int will_truncate = open_flag & O_TRUNC; - int want_write = 0; + bool will_truncate = (open_flag & O_TRUNC) != 0; + bool want_write = false; int acc_mode = op->acc_mode; struct file *filp; struct inode *inode; bool created; - int symlink_ok = 0; + bool symlink_ok = false; struct path save_parent = { .dentry = NULL, .mnt = NULL }; bool retried = false; int error; @@ -2476,7 +2476,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (nd->last.name[nd->last.len]) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) - symlink_ok = 1; + symlink_ok = true; /* we _can_ be in RCU mode here */ error = lookup_fast(nd, &nd->last, path, &inode); if (likely(!error)) @@ -2514,7 +2514,7 @@ retry_lookup: goto out; if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) - will_truncate = 0; + will_truncate = false; audit_inode(pathname, filp->f_path.dentry); goto opened; @@ -2523,7 +2523,7 @@ retry_lookup: if (created) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; - will_truncate = 0; + will_truncate = false; acc_mode = MAY_OPEN; path_to_nameidata(path, nd); goto finish_open_created; @@ -2541,7 +2541,7 @@ retry_lookup: */ if (want_write) { mnt_drop_write(nd->path.mnt); - want_write = 0; + want_write = false; } error = -EEXIST; @@ -2600,13 +2600,13 @@ finish_lookup: audit_inode(pathname, nd->path.dentry); finish_open: if (!S_ISREG(nd->inode->i_mode)) - will_truncate = 0; + will_truncate = false; if (will_truncate) { error = mnt_want_write(nd->path.mnt); if (error) goto exit; - want_write = 1; + want_write = true; } finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); @@ -2623,7 +2623,7 @@ finish_open_created: save_parent.dentry = NULL; if (want_write) { mnt_drop_write(nd->path.mnt); - want_write = 0; + want_write = false; } retried = true; goto retry_lookup; -- cgit v0.10.2 From f60dc3db6e24b7c36445cf1feb56b34c799074b3 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:31 +0200 Subject: vfs: do_last(): clean up retry Move the lookup retry logic to the bottom of the function to make the normal case simpler to read. Reported-by: David Howells Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 6bdb8d7..183a769 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2614,22 +2614,11 @@ finish_open_created: goto exit; od->mnt = nd->path.mnt; filp = finish_open(od, nd->path.dentry, NULL); - if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) { - BUG_ON(save_parent.dentry != dir); - path_put(&nd->path); - nd->path = save_parent; - nd->inode = dir->d_inode; - save_parent.mnt = NULL; - save_parent.dentry = NULL; - if (want_write) { - mnt_drop_write(nd->path.mnt); - want_write = false; - } - retried = true; - goto retry_lookup; - } - if (IS_ERR(filp)) + if (IS_ERR(filp)) { + if (filp == ERR_PTR(-EOPENSTALE)) + goto stale_open; goto out; + } error = open_check_o_direct(filp); if (error) goto exit_fput; @@ -2659,6 +2648,23 @@ exit_fput: fput(filp); goto exit; +stale_open: + /* If no saved parent or already retried then can't retry */ + if (!save_parent.dentry || retried) + goto out; + + BUG_ON(save_parent.dentry != dir); + path_put(&nd->path); + nd->path = save_parent; + nd->inode = dir->d_inode; + save_parent.mnt = NULL; + save_parent.dentry = NULL; + if (want_write) { + mnt_drop_write(nd->path.mnt); + want_write = false; + } + retried = true; + goto retry_lookup; } static struct file *path_openat(int dfd, const char *pathname, -- cgit v0.10.2 From a8277b9baa6268de386529a33061775bc716198b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 5 Jun 2012 15:10:32 +0200 Subject: vfs: move O_DIRECT check to common code Perform open_check_o_direct() in a common place in do_last after opening the file. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 183a769..4bc4bc6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2305,22 +2305,15 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, * here. */ error = may_open(&filp->f_path, acc_mode, open_flag); - if (error) - goto out_fput; - - error = open_check_o_direct(filp); - if (error) - goto out_fput; + if (error) { + fput(filp); + filp = ERR_PTR(error); + } out: dput(dentry); return filp; -out_fput: - fput(filp); - filp = ERR_PTR(error); - goto out; - no_open: if (need_lookup) { dentry = lookup_real(dir, dentry, nd); @@ -2619,10 +2612,10 @@ finish_open_created: goto stale_open; goto out; } +opened: error = open_check_o_direct(filp); if (error) goto exit_fput; -opened: error = ima_file_check(filp, op->acc_mode); if (error) goto exit_fput; -- cgit v0.10.2 From 47237687d73cbeae1dd7a133c3fc3d7239094568 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:01:45 -0400 Subject: ->atomic_open() prototype change - pass int * instead of bool * ... and let finish_open() report having opened the file via that sucker. Next step: don't modify od->filp at all. [AV: FILE_CREATE was already used by cifs; Miklos' fix folded] Signed-off-by: Al Viro diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 8157488..af4e45b 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -64,7 +64,7 @@ ata *); void (*update_time)(struct inode *, struct timespec *, int); struct file * (*atomic_open)(struct inode *, struct dentry *, struct opendata *, unsigned open_flag, - umode_t create_mode, bool *created); + umode_t create_mode, int *opened); locking rules: all may block diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index beb6e69..d712105 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -366,7 +366,7 @@ struct inode_operations { void (*update_time)(struct inode *, struct timespec *, int); struct file * (*atomic_open)(struct inode *, struct dentry *, struct opendata *, unsigned open_flag, - umode_t create_mode, bool *created); + umode_t create_mode, int *opened); }; Again, all methods are called without any locks being held, unless diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index e8c42ce..de626b3 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -859,7 +859,7 @@ error: static struct file * v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned flags, umode_t mode, - bool *created) + int *opened) { int err; u32 perm; @@ -918,7 +918,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, v9inode->writeback_fid = (void *) inode_fid; } mutex_unlock(&v9inode->v_mutex); - filp = finish_open(od, dentry, generic_file_open); + filp = finish_open(od, dentry, generic_file_open, opened); if (IS_ERR(filp)) { err = PTR_ERR(filp); goto error; @@ -930,7 +930,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, v9fs_cache_inode_set_cookie(dentry->d_inode, filp); #endif - *created = true; + *opened |= FILE_CREATED; out: dput(res); return filp; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a354fe2..3db5547 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -243,7 +243,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, static struct file * v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned flags, umode_t omode, - bool *created) + int *opened) { int err = 0; gid_t gid; @@ -357,7 +357,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, } mutex_unlock(&v9inode->v_mutex); /* Since we are opening a file, assign the open fid to the file */ - filp = finish_open(od, dentry, generic_file_open); + filp = finish_open(od, dentry, generic_file_open, opened); if (IS_ERR(filp)) { err = PTR_ERR(filp); goto err_clunk_old_fid; @@ -367,7 +367,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (v9ses->cache) v9fs_cache_inode_set_cookie(inode, filp); #endif - *created = true; + *opened |= FILE_CREATED; out: dput(res); return filp; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 75df600..81e5e90 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -636,7 +636,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned flags, umode_t mode, - bool *created) + int *opened) { int err; struct dentry *res = NULL; @@ -650,7 +650,7 @@ struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, if (err < 0) return ERR_PTR(err); - return ceph_lookup_open(dir, dentry, od, flags, mode); + return ceph_lookup_open(dir, dentry, od, flags, mode, opened); } if (d_unhashed(dentry)) { @@ -668,8 +668,8 @@ struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, return NULL; } - *created = true; - filp = ceph_lookup_open(dir, dentry, od, flags, mode); + *opened |= FILE_CREATED; + filp = ceph_lookup_open(dir, dentry, od, flags, mode, opened); dput(res); return filp; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index e34dc22..4c304a9 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -214,7 +214,8 @@ out: * ceph_release gets called). So fear not! */ struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode) + struct opendata *od, unsigned flags, umode_t mode, + int *opened) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -247,7 +248,7 @@ struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, err = ceph_handle_notrace_create(dir, dentry); if (err) goto out; - file = finish_open(od, req->r_dentry, ceph_open); + file = finish_open(od, req->r_dentry, ceph_open, opened); if (IS_ERR(file)) err = PTR_ERR(file); out: diff --git a/fs/ceph/super.h b/fs/ceph/super.h index e61e546..f9a3251 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -808,7 +808,7 @@ extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_open(struct inode *inode, struct file *file); extern struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned flags, - umode_t mode); + umode_t mode, int *opened); extern int ceph_release(struct inode *inode, struct file *filp); /* dir.c */ diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 3a572bf..92a7c3d 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -48,7 +48,7 @@ extern int cifs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); extern struct file *cifs_atomic_open(struct inode *, struct dentry *, struct opendata *, unsigned, umode_t, - bool *); + int *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, struct nameidata *); extern int cifs_unlink(struct inode *dir, struct dentry *dentry); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 7a3dcd1..6cdf23f 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -160,7 +160,7 @@ check_name(struct dentry *direntry) static int cifs_do_create(struct inode *inode, struct dentry *direntry, int xid, struct tcon_link *tlink, unsigned oflags, umode_t mode, __u32 *oplock, __u16 *fileHandle, - bool *created) + int *created) { int rc = -ENOENT; int create_options = CREATE_NOT_DIR; @@ -311,7 +311,7 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, .device = 0, }; - *created = true; + *created |= FILE_CREATED; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) { args.uid = (__u64) current_fsuid(); if (inode->i_mode & S_ISGID) @@ -379,7 +379,7 @@ out: struct file * cifs_atomic_open(struct inode *inode, struct dentry *direntry, struct opendata *od, unsigned oflags, umode_t mode, - bool *created) + int *opened) { int rc; int xid; @@ -426,14 +426,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, tcon = tlink_tcon(tlink); rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, - &oplock, &fileHandle, created); + &oplock, &fileHandle, opened); if (rc) { filp = ERR_PTR(rc); goto out; } - filp = finish_open(od, direntry, generic_file_open); + filp = finish_open(od, direntry, generic_file_open, opened); if (IS_ERR(filp)) { CIFSSMBClose(xid, tcon, fileHandle); goto out; @@ -469,7 +469,7 @@ int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, struct tcon_link *tlink; __u16 fileHandle; __u32 oplock; - bool created = true; + int created = FILE_CREATED; cFYI(1, "cifs_create parent inode = 0x%p name is: %s and dentry = 0x%p", inode, direntry->d_name.name, direntry); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e42442f..345f78e 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -371,7 +371,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, */ static struct file *fuse_create_open(struct inode *dir, struct dentry *entry, struct opendata *od, unsigned flags, - umode_t mode) + umode_t mode, int *opened) { int err; struct inode *inode; @@ -450,7 +450,7 @@ static struct file *fuse_create_open(struct inode *dir, struct dentry *entry, d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); fuse_invalidate_attr(dir); - file = finish_open(od, entry, generic_file_open); + file = finish_open(od, entry, generic_file_open, opened); if (IS_ERR(file)) { fuse_sync_release(ff, flags); } else { @@ -472,7 +472,7 @@ out_err: static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t); static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry, struct opendata *od, unsigned flags, - umode_t mode, bool *created) + umode_t mode, int *opened) { int err; struct fuse_conn *fc = get_fuse_conn(dir); @@ -492,12 +492,12 @@ static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry, goto no_open; /* Only creates */ - *created = true; + *opened |= FILE_CREATED; if (fc->no_create) goto mknod; - file = fuse_create_open(dir, entry, od, flags, mode); + file = fuse_create_open(dir, entry, od, flags, mode, opened); if (PTR_ERR(file) == -ENOSYS) { fc->no_create = 1; goto mknod; diff --git a/fs/namei.c b/fs/namei.c index 4bc4bc6..7a33f07 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2197,7 +2197,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct opendata *od, const struct open_flags *op, bool *want_write, bool need_lookup, - bool *created) + int *opened) { struct inode *dir = nd->path.dentry->d_inode; unsigned open_flag = open_to_namei_flags(op->open_flag); @@ -2222,7 +2222,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (open_flag & O_EXCL) { open_flag &= ~O_TRUNC; - *created = true; + *opened |= FILE_CREATED; } /* @@ -2272,7 +2272,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, od->dentry = DENTRY_NOT_SET; od->mnt = nd->path.mnt; filp = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, - created); + opened); if (IS_ERR(filp)) { if (WARN_ON(od->dentry != DENTRY_NOT_SET)) dput(od->dentry); @@ -2283,7 +2283,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, } acc_mode = op->acc_mode; - if (*created) { + if (*opened & FILE_CREATED) { fsnotify_create(dir, dentry); acc_mode = MAY_OPEN; } @@ -2353,7 +2353,7 @@ looked_up: static struct file *lookup_open(struct nameidata *nd, struct path *path, struct opendata *od, const struct open_flags *op, - bool *want_write, bool *created) + bool *want_write, int *opened) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -2361,7 +2361,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, int error; bool need_lookup; - *created = false; + *opened &= ~FILE_CREATED; dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup); if (IS_ERR(dentry)) return ERR_CAST(dentry); @@ -2372,7 +2372,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { return atomic_open(nd, dentry, path, od, op, want_write, - need_lookup, created); + need_lookup, opened); } if (need_lookup) { @@ -2399,7 +2399,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, if (error) goto out_dput; *want_write = true; - *created = true; + *opened |= FILE_CREATED; error = security_path_mknod(&nd->path, dentry, mode, 0); if (error) goto out_dput; @@ -2422,7 +2422,7 @@ out_dput: */ static struct file *do_last(struct nameidata *nd, struct path *path, struct opendata *od, const struct open_flags *op, - const char *pathname) + int *opened, const char *pathname) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; @@ -2431,7 +2431,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path, int acc_mode = op->acc_mode; struct file *filp; struct inode *inode; - bool created; bool symlink_ok = false; struct path save_parent = { .dentry = NULL, .mnt = NULL }; bool retried = false; @@ -2499,21 +2498,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path, retry_lookup: mutex_lock(&dir->d_inode->i_mutex); - filp = lookup_open(nd, path, od, op, &want_write, &created); + filp = lookup_open(nd, path, od, op, &want_write, opened); mutex_unlock(&dir->d_inode->i_mutex); if (filp) { if (IS_ERR(filp)) goto out; - if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + if ((*opened & FILE_CREATED) || + !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) will_truncate = false; audit_inode(pathname, filp->f_path.dentry); goto opened; } - if (created) { + if (*opened & FILE_CREATED) { /* Don't check for write permission, don't truncate */ open_flag &= ~O_TRUNC; will_truncate = false; @@ -2606,7 +2606,7 @@ finish_open_created: if (error) goto exit; od->mnt = nd->path.mnt; - filp = finish_open(od, nd->path.dentry, NULL); + filp = finish_open(od, nd->path.dentry, NULL, opened); if (IS_ERR(filp)) { if (filp == ERR_PTR(-EOPENSTALE)) goto stale_open; @@ -2667,6 +2667,7 @@ static struct file *path_openat(int dfd, const char *pathname, struct opendata od; struct file *res; struct path path; + int opened = 0; int error; od.filp = get_empty_filp(); @@ -2684,7 +2685,7 @@ static struct file *path_openat(int dfd, const char *pathname, if (unlikely(error)) goto out_filp; - res = do_last(nd, &path, &od, op, pathname); + res = do_last(nd, &path, &od, op, &opened, pathname); while (unlikely(!res)) { /* trailing symlink */ struct path link = path; void *cookie; @@ -2699,7 +2700,7 @@ static struct file *path_openat(int dfd, const char *pathname, error = follow_link(&link, nd, &cookie); if (unlikely(error)) goto out_filp; - res = do_last(nd, &path, &od, op, pathname); + res = do_last(nd, &path, &od, op, &opened, pathname); put_link(nd, &link, cookie); } out: diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e6d55dc..6deb254 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -113,7 +113,7 @@ const struct inode_operations nfs3_dir_inode_operations = { static struct file *nfs_atomic_open(struct inode *, struct dentry *, struct opendata *, unsigned, umode_t, - bool *); + int *); const struct inode_operations nfs4_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, @@ -1389,7 +1389,8 @@ static int do_open(struct inode *inode, struct file *filp) static struct file *nfs_finish_open(struct nfs_open_context *ctx, struct dentry *dentry, - struct opendata *od, unsigned open_flags) + struct opendata *od, unsigned open_flags, + int *opened) { struct file *filp; int err; @@ -1408,7 +1409,7 @@ static struct file *nfs_finish_open(struct nfs_open_context *ctx, } } - filp = finish_open(od, dentry, do_open); + filp = finish_open(od, dentry, do_open, opened); if (!IS_ERR(filp)) nfs_file_set_open_context(filp, ctx); @@ -1419,7 +1420,7 @@ out: static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned open_flags, - umode_t mode, bool *created) + umode_t mode, int *opened) { struct nfs_open_context *ctx; struct dentry *res; @@ -1497,7 +1498,7 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, nfs_unblock_sillyrename(dentry->d_parent); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - filp = nfs_finish_open(ctx, dentry, od, open_flags); + filp = nfs_finish_open(ctx, dentry, od, open_flags, opened); dput(res); return filp; diff --git a/fs/open.c b/fs/open.c index 937f4ec..89589bd 100644 --- a/fs/open.c +++ b/fs/open.c @@ -782,7 +782,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, * filesystem callback is substituted. */ struct file *finish_open(struct opendata *od, struct dentry *dentry, - int (*open)(struct inode *, struct file *)) + int (*open)(struct inode *, struct file *), + int *opened) { struct file *res; @@ -790,8 +791,10 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry, dget(dentry); res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred()); - if (!IS_ERR(res)) + if (!IS_ERR(res)) { + *opened |= FILE_OPENED; od->filp = NULL; + } return res; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 0314635..a7618cf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1696,7 +1696,7 @@ struct inode_operations { int (*update_time)(struct inode *, struct timespec *, int); struct file * (*atomic_open)(struct inode *, struct dentry *, struct opendata *, unsigned open_flag, - umode_t create_mode, bool *created); + umode_t create_mode, int *opened); } ____cacheline_aligned; struct seq_file; @@ -2065,8 +2065,13 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); +enum { + FILE_CREATED = 1, + FILE_OPENED = 2 +}; extern struct file *finish_open(struct opendata *od, struct dentry *dentry, - int (*open)(struct inode *, struct file *)); + int (*open)(struct inode *, struct file *), + int *opened); extern void finish_no_open(struct opendata *od, struct dentry *dentry); /* fs/ioctl.c */ -- cgit v0.10.2 From 3d8a00d2099ebc6d5a6e95fadaf861709d9919a8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:04:43 -0400 Subject: don't modify od->filp at all make put_filp() conditional on flag set by finish_open() Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 7a33f07..18b9326 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2708,10 +2708,8 @@ out: path_put(&nd->root); if (base) fput(base); - if (od.filp) { - BUG_ON(od.filp->f_path.dentry); + if (!(opened & FILE_OPENED)) put_filp(od.filp); - } if (res == ERR_PTR(-EOPENSTALE)) { if (flags & LOOKUP_RCU) res = ERR_PTR(-ECHILD); diff --git a/fs/open.c b/fs/open.c index 89589bd..c87f982 100644 --- a/fs/open.c +++ b/fs/open.c @@ -786,15 +786,14 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry, int *opened) { struct file *res; + BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ mntget(od->mnt); dget(dentry); res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred()); - if (!IS_ERR(res)) { + if (!IS_ERR(res)) *opened |= FILE_OPENED; - od->filp = NULL; - } return res; } -- cgit v0.10.2 From d95852777bc8ba6b3ad3397d495c5f9dd8ca8383 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:39:14 +0400 Subject: make ->atomic_open() return int Change of calling conventions: old new NULL 1 file 0 ERR_PTR(-ve) -ve Caller *knows* that struct file *; no need to return it. Signed-off-by: Al Viro diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index af4e45b..46a24a6 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -62,7 +62,7 @@ ata *); int (*removexattr) (struct dentry *, const char *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); void (*update_time)(struct inode *, struct timespec *, int); - struct file * (*atomic_open)(struct inode *, struct dentry *, + int (*atomic_open)(struct inode *, struct dentry *, struct opendata *, unsigned open_flag, umode_t create_mode, int *opened); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index d712105..d0d690b 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -364,7 +364,7 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*update_time)(struct inode *, struct timespec *, int); - struct file * (*atomic_open)(struct inode *, struct dentry *, + int (*atomic_open)(struct inode *, struct dentry *, struct opendata *, unsigned open_flag, umode_t create_mode, int *opened); }; @@ -482,8 +482,8 @@ otherwise noted. atomic_open: called on the last component of an open. Using this optional method the filesystem can look up, possibly create and open the file in one atomic operation. If it cannot perform this (e.g. the file type - turned out to be wrong) it may signal this by returning NULL instead of - an open struct file pointer. This method is only called if the last + turned out to be wrong) it may signal this by returning 1 instead of + usual 0 or -ve . This method is only called if the last component is negative or needs lookup. Cached positive dentries are still handled by f_op->open(). diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index de626b3..62ce8da 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -856,7 +856,7 @@ error: return ERR_PTR(result); } -static struct file * +static int v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned flags, umode_t mode, int *opened) @@ -872,7 +872,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, if (d_unhashed(dentry)) { res = v9fs_vfs_lookup(dir, dentry, NULL); if (IS_ERR(res)) - return ERR_CAST(res); + return PTR_ERR(res); if (res) dentry = res; @@ -881,7 +881,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, /* Only creates */ if (!(flags & O_CREAT) || dentry->d_inode) { finish_no_open(od, res); - return NULL; + return 1; } err = 0; @@ -933,13 +933,11 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, *opened |= FILE_CREATED; out: dput(res); - return filp; + return err; error: if (fid) p9_client_clunk(fid); - - filp = ERR_PTR(err); goto out; } diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 3db5547..69f0510 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -240,7 +240,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0); } -static struct file * +static int v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, struct opendata *od, unsigned flags, umode_t omode, int *opened) @@ -262,7 +262,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (d_unhashed(dentry)) { res = v9fs_vfs_lookup(dir, dentry, NULL); if (IS_ERR(res)) - return ERR_CAST(res); + return PTR_ERR(res); if (res) dentry = res; @@ -271,7 +271,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, /* Only creates */ if (!(flags & O_CREAT) || dentry->d_inode) { finish_no_open(od, res); - return NULL; + return 1; } v9ses = v9fs_inode2v9ses(dir); @@ -284,7 +284,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (IS_ERR(dfid)) { err = PTR_ERR(dfid); p9_debug(P9_DEBUG_VFS, "fid lookup failed %d\n", err); - goto err_return; + goto out; } /* clone a fid to use for creation */ @@ -292,7 +292,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, if (IS_ERR(ofid)) { err = PTR_ERR(ofid); p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); - goto err_return; + goto out; } gid = v9fs_get_fsgid_for_create(dir); @@ -370,7 +370,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, *opened |= FILE_CREATED; out: dput(res); - return filp; + return err; error: if (fid) @@ -379,8 +379,6 @@ err_clunk_old_fid: if (ofid) p9_client_clunk(ofid); v9fs_set_create_acl(NULL, &dacl, &pacl); -err_return: - filp = ERR_PTR(err); goto out; } diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 81e5e90..d8bfabe 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -634,21 +634,20 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, return dentry; } -struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode, - int *opened) +int ceph_atomic_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t mode, + int *opened) { int err; struct dentry *res = NULL; - struct file *filp; if (!(flags & O_CREAT)) { if (dentry->d_name.len > NAME_MAX) - return ERR_PTR(-ENAMETOOLONG); + return -ENAMETOOLONG; err = ceph_init_dentry(dentry); if (err < 0) - return ERR_PTR(err); + return err; return ceph_lookup_open(dir, dentry, od, flags, mode, opened); } @@ -656,7 +655,7 @@ struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, if (d_unhashed(dentry)) { res = ceph_lookup(dir, dentry, NULL); if (IS_ERR(res)) - return ERR_CAST(res); + return PTR_ERR(res); if (res) dentry = res; @@ -665,14 +664,14 @@ struct file *ceph_atomic_open(struct inode *dir, struct dentry *dentry, /* We don't deal with positive dentries here */ if (dentry->d_inode) { finish_no_open(od, res); - return NULL; + return 1; } *opened |= FILE_CREATED; - filp = ceph_lookup_open(dir, dentry, od, flags, mode, opened); + err = ceph_lookup_open(dir, dentry, od, flags, mode, opened); dput(res); - return filp; + return err; } /* diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 4c304a9..b8cc3ee 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -213,9 +213,9 @@ out: * may_open() fails, the struct *file gets cleaned up (i.e. * ceph_release gets called). So fear not! */ -struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode, - int *opened) +int ceph_lookup_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, umode_t mode, + int *opened) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -230,7 +230,7 @@ struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, /* do the open */ req = prepare_open_request(dir->i_sb, flags, mode); if (IS_ERR(req)) - return ERR_CAST(req); + return PTR_ERR(req); req->r_dentry = dget(dentry); req->r_num_caps = 2; if (flags & O_CREAT) { @@ -257,10 +257,10 @@ out: dout("ceph_lookup_open result=%p\n", ret); if (IS_ERR(ret)) - return ERR_CAST(ret); + return PTR_ERR(ret); dput(ret); - return err ? ERR_PTR(err) : file; + return err; } int ceph_release(struct inode *inode, struct file *file) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f9a3251..f7e8e82 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -806,9 +806,9 @@ extern int ceph_copy_from_page_vector(struct page **pages, loff_t off, size_t len); extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_open(struct inode *inode, struct file *file); -extern struct file *ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, - umode_t mode, int *opened); +extern int ceph_lookup_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned flags, + umode_t mode, int *opened); extern int ceph_release(struct inode *inode, struct file *filp); /* dir.c */ diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 92a7c3d..58d9aca 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -46,9 +46,9 @@ extern const struct inode_operations cifs_dir_inode_ops; extern struct inode *cifs_root_iget(struct super_block *); extern int cifs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); -extern struct file *cifs_atomic_open(struct inode *, struct dentry *, - struct opendata *, unsigned, umode_t, - int *); +extern int cifs_atomic_open(struct inode *, struct dentry *, + struct opendata *, unsigned, umode_t, + int *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, struct nameidata *); extern int cifs_unlink(struct inode *dir, struct dentry *dentry); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 6cdf23f..8ca70b1 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -376,7 +376,7 @@ out: return rc; } -struct file * +int cifs_atomic_open(struct inode *inode, struct dentry *direntry, struct opendata *od, unsigned oflags, umode_t mode, int *opened) @@ -403,15 +403,15 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (!(oflags & O_CREAT)) { struct dentry *res = cifs_lookup(inode, direntry, NULL); if (IS_ERR(res)) - return ERR_CAST(res); + return PTR_ERR(res); finish_no_open(od, res); - return NULL; + return 1; } rc = check_name(direntry); if (rc) - return ERR_PTR(rc); + return rc; xid = GetXid(); @@ -428,13 +428,12 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, rc = cifs_do_create(inode, direntry, xid, tlink, oflags, mode, &oplock, &fileHandle, opened); - if (rc) { - filp = ERR_PTR(rc); + if (rc) goto out; - } filp = finish_open(od, direntry, generic_file_open, opened); if (IS_ERR(filp)) { + rc = PTR_ERR(filp); CIFSSMBClose(xid, tcon, fileHandle); goto out; } @@ -443,14 +442,14 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (pfile_info == NULL) { CIFSSMBClose(xid, tcon, fileHandle); fput(filp); - filp = ERR_PTR(-ENOMEM); + rc = -ENOMEM; } out: cifs_put_tlink(tlink); free_xid: FreeXid(xid); - return filp; + return rc; } int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 345f78e..8a9ca09 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -369,9 +369,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, * If the filesystem doesn't support this, then fall back to separate * 'mknod' + 'open' requests. */ -static struct file *fuse_create_open(struct inode *dir, struct dentry *entry, - struct opendata *od, unsigned flags, - umode_t mode, int *opened) +static int fuse_create_open(struct inode *dir, struct dentry *entry, + struct opendata *od, unsigned flags, + umode_t mode, int *opened) { int err; struct inode *inode; @@ -452,12 +452,14 @@ static struct file *fuse_create_open(struct inode *dir, struct dentry *entry, fuse_invalidate_attr(dir); file = finish_open(od, entry, generic_file_open, opened); if (IS_ERR(file)) { + err = PTR_ERR(file); fuse_sync_release(ff, flags); } else { file->private_data = fuse_file_get(ff); fuse_finish_open(inode, file); + err = 0; } - return file; + return err; out_free_ff: fuse_file_free(ff); @@ -466,23 +468,22 @@ out_put_request: out_put_forget_req: kfree(forget); out_err: - return ERR_PTR(err); + return err; } static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t); -static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry, - struct opendata *od, unsigned flags, - umode_t mode, int *opened) +static int fuse_atomic_open(struct inode *dir, struct dentry *entry, + struct opendata *od, unsigned flags, + umode_t mode, int *opened) { int err; struct fuse_conn *fc = get_fuse_conn(dir); - struct file *file; struct dentry *res = NULL; if (d_unhashed(entry)) { res = fuse_lookup(dir, entry, NULL); if (IS_ERR(res)) - return ERR_CAST(res); + return PTR_ERR(res); if (res) entry = res; @@ -497,24 +498,22 @@ static struct file *fuse_atomic_open(struct inode *dir, struct dentry *entry, if (fc->no_create) goto mknod; - file = fuse_create_open(dir, entry, od, flags, mode, opened); - if (PTR_ERR(file) == -ENOSYS) { + err = fuse_create_open(dir, entry, od, flags, mode, opened); + if (err == -ENOSYS) { fc->no_create = 1; goto mknod; } out_dput: dput(res); - return file; + return err; mknod: err = fuse_mknod(dir, entry, mode, 0); - if (err) { - file = ERR_PTR(err); + if (err) goto out_dput; - } no_open: finish_no_open(od, res); - return NULL; + return 1; } /* diff --git a/fs/namei.c b/fs/namei.c index 18b9326..f0dae00 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2204,7 +2204,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, umode_t mode; int error; int acc_mode; - struct file *filp; + struct file *filp = NULL; int create_error = 0; struct dentry *const DENTRY_NOT_SET = (void *) -1UL; @@ -2271,14 +2271,15 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, od->dentry = DENTRY_NOT_SET; od->mnt = nd->path.mnt; - filp = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, + error = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, opened); - if (IS_ERR(filp)) { + if (error < 0) { if (WARN_ON(od->dentry != DENTRY_NOT_SET)) dput(od->dentry); - if (create_error && PTR_ERR(filp) == -ENOENT) - filp = ERR_PTR(create_error); + if (create_error && error == -ENOENT) + error = create_error; + filp = ERR_PTR(error); goto out; } @@ -2288,7 +2289,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, acc_mode = MAY_OPEN; } - if (!filp) { + if (error) { /* returned 1, that is */ if (WARN_ON(od->dentry == DENTRY_NOT_SET)) { filp = ERR_PTR(-EIO); goto out; @@ -2304,6 +2305,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, * We didn't have the inode before the open, so check open permission * here. */ + filp = od->filp; error = may_open(&filp->f_path, acc_mode, open_flag); if (error) { fput(filp); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 6deb254..b56f4b3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -111,9 +111,9 @@ const struct inode_operations nfs3_dir_inode_operations = { #ifdef CONFIG_NFS_V4 -static struct file *nfs_atomic_open(struct inode *, struct dentry *, - struct opendata *, unsigned, umode_t, - int *); +static int nfs_atomic_open(struct inode *, struct dentry *, + struct opendata *, unsigned, umode_t, + int *); const struct inode_operations nfs4_dir_inode_operations = { .create = nfs_create, .lookup = nfs_lookup, @@ -1387,10 +1387,10 @@ static int do_open(struct inode *inode, struct file *filp) return 0; } -static struct file *nfs_finish_open(struct nfs_open_context *ctx, - struct dentry *dentry, - struct opendata *od, unsigned open_flags, - int *opened) +static int nfs_finish_open(struct nfs_open_context *ctx, + struct dentry *dentry, + struct opendata *od, unsigned open_flags, + int *opened) { struct file *filp; int err; @@ -1403,30 +1403,31 @@ static struct file *nfs_finish_open(struct nfs_open_context *ctx, /* If the open_intent is for execute, we have an extra check to make */ if (ctx->mode & FMODE_EXEC) { err = nfs_may_open(dentry->d_inode, ctx->cred, open_flags); - if (err < 0) { - filp = ERR_PTR(err); + if (err < 0) goto out; - } } filp = finish_open(od, dentry, do_open, opened); - if (!IS_ERR(filp)) - nfs_file_set_open_context(filp, ctx); + if (IS_ERR(filp)) { + err = PTR_ERR(filp); + goto out; + } + nfs_file_set_open_context(filp, ctx); + err = 0; out: put_nfs_open_context(ctx); - return filp; + return err; } -static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned open_flags, - umode_t mode, int *opened) +static int nfs_atomic_open(struct inode *dir, struct dentry *dentry, + struct opendata *od, unsigned open_flags, + umode_t mode, int *opened) { struct nfs_open_context *ctx; struct dentry *res; struct iattr attr = { .ia_valid = ATTR_OPEN }; struct inode *inode; - struct file *filp; int err; /* Expect a negative dentry */ @@ -1437,21 +1438,19 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, /* NFS only supports OPEN on regular files */ if ((open_flags & O_DIRECTORY)) { - err = -ENOENT; if (!d_unhashed(dentry)) { /* * Hashed negative dentry with O_DIRECTORY: dentry was * revalidated and is fine, no need to perform lookup * again */ - goto out_err; + return -ENOENT; } goto no_open; } - err = -ENAMETOOLONG; if (dentry->d_name.len > NFS_SERVER(dir)->namelen) - goto out_err; + return -ENAMETOOLONG; if (open_flags & O_CREAT) { attr.ia_valid |= ATTR_MODE; @@ -1465,7 +1464,7 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, ctx = create_nfs_open_context(dentry, open_flags); err = PTR_ERR(ctx); if (IS_ERR(ctx)) - goto out_err; + goto out; nfs_block_sillyrename(dentry->d_parent); inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); @@ -1489,7 +1488,7 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, default: break; } - goto out_err; + goto out; } res = d_add_unique(dentry, inode); if (res != NULL) @@ -1498,22 +1497,20 @@ static struct file *nfs_atomic_open(struct inode *dir, struct dentry *dentry, nfs_unblock_sillyrename(dentry->d_parent); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - filp = nfs_finish_open(ctx, dentry, od, open_flags, opened); + err = nfs_finish_open(ctx, dentry, od, open_flags, opened); dput(res); - return filp; - -out_err: - return ERR_PTR(err); +out: + return err; no_open: res = nfs_lookup(dir, dentry, NULL); err = PTR_ERR(res); if (IS_ERR(res)) - goto out_err; + goto out; finish_no_open(od, res); - return NULL; + return 1; } static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) diff --git a/include/linux/fs.h b/include/linux/fs.h index a7618cf..33bda92 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1694,9 +1694,9 @@ struct inode_operations { int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); int (*update_time)(struct inode *, struct timespec *, int); - struct file * (*atomic_open)(struct inode *, struct dentry *, - struct opendata *, unsigned open_flag, - umode_t create_mode, int *opened); + int (*atomic_open)(struct inode *, struct dentry *, + struct opendata *, unsigned open_flag, + umode_t create_mode, int *opened); } ____cacheline_aligned; struct seq_file; -- cgit v0.10.2 From a4a3bdd778715999ddfeefdc52ab76254580fa76 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 05:55:37 -0400 Subject: kill opendata->{mnt,dentry} ->filp->f_path is there for purpose... Signed-off-by: Al Viro diff --git a/fs/internal.h b/fs/internal.h index ae69a3b..09003a0 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -83,8 +83,6 @@ extern struct super_block *user_get_super(dev_t); * open.c */ struct opendata { - struct dentry *dentry; - struct vfsmount *mnt; struct file *filp; }; struct open_flags { diff --git a/fs/namei.c b/fs/namei.c index f0dae00..af83ede 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2269,14 +2269,11 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (nd->flags & LOOKUP_DIRECTORY) open_flag |= O_DIRECTORY; - od->dentry = DENTRY_NOT_SET; - od->mnt = nd->path.mnt; + od->filp->f_path.dentry = DENTRY_NOT_SET; + od->filp->f_path.mnt = nd->path.mnt; error = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, opened); if (error < 0) { - if (WARN_ON(od->dentry != DENTRY_NOT_SET)) - dput(od->dentry); - if (create_error && error == -ENOENT) error = create_error; filp = ERR_PTR(error); @@ -2290,13 +2287,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, } if (error) { /* returned 1, that is */ - if (WARN_ON(od->dentry == DENTRY_NOT_SET)) { + if (WARN_ON(od->filp->f_path.dentry == DENTRY_NOT_SET)) { filp = ERR_PTR(-EIO); goto out; } - if (od->dentry) { + if (od->filp->f_path.dentry) { dput(dentry); - dentry = od->dentry; + dentry = od->filp->f_path.dentry; } goto looked_up; } @@ -2607,7 +2604,7 @@ finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto exit; - od->mnt = nd->path.mnt; + od->filp->f_path.mnt = nd->path.mnt; filp = finish_open(od, nd->path.dentry, NULL, opened); if (IS_ERR(filp)) { if (filp == ERR_PTR(-EOPENSTALE)) diff --git a/fs/open.c b/fs/open.c index c87f982..2b1654d 100644 --- a/fs/open.c +++ b/fs/open.c @@ -788,10 +788,10 @@ struct file *finish_open(struct opendata *od, struct dentry *dentry, struct file *res; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ - mntget(od->mnt); + mntget(od->filp->f_path.mnt); dget(dentry); - res = do_dentry_open(dentry, od->mnt, od->filp, open, current_cred()); + res = do_dentry_open(dentry, od->filp->f_path.mnt, od->filp, open, current_cred()); if (!IS_ERR(res)) *opened |= FILE_OPENED; @@ -810,7 +810,7 @@ EXPORT_SYMBOL(finish_open); */ void finish_no_open(struct opendata *od, struct dentry *dentry) { - od->dentry = dentry; + od->filp->f_path.dentry = dentry; } EXPORT_SYMBOL(finish_no_open); -- cgit v0.10.2 From 30d904947459cca2beb69e0110716f5248b31f2a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:40:19 +0400 Subject: kill struct opendata Just pass struct file *. Methods are happier that way... There's no need to return struct file * from finish_open() now, so let it return int. Next: saner prototypes for parts in namei.c Signed-off-by: Al Viro diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 46a24a6..33e5243 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -63,7 +63,7 @@ ata *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); void (*update_time)(struct inode *, struct timespec *, int); int (*atomic_open)(struct inode *, struct dentry *, - struct opendata *, unsigned open_flag, + struct file *, unsigned open_flag, umode_t create_mode, int *opened); locking rules: diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index d0d690b..279de21 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -365,7 +365,7 @@ struct inode_operations { int (*removexattr) (struct dentry *, const char *); void (*update_time)(struct inode *, struct timespec *, int); int (*atomic_open)(struct inode *, struct dentry *, - struct opendata *, unsigned open_flag, + struct file *, unsigned open_flag, umode_t create_mode, int *opened); }; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 62ce8da..2b05651 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -858,12 +858,11 @@ error: static int v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode, + struct file *file, unsigned flags, umode_t mode, int *opened) { int err; u32 perm; - struct file *filp; struct v9fs_inode *v9inode; struct v9fs_session_info *v9ses; struct p9_fid *fid, *inode_fid; @@ -880,7 +879,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, /* Only creates */ if (!(flags & O_CREAT) || dentry->d_inode) { - finish_no_open(od, res); + finish_no_open(file, res); return 1; } @@ -918,16 +917,14 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, v9inode->writeback_fid = (void *) inode_fid; } mutex_unlock(&v9inode->v_mutex); - filp = finish_open(od, dentry, generic_file_open, opened); - if (IS_ERR(filp)) { - err = PTR_ERR(filp); + err = finish_open(file, dentry, generic_file_open, opened); + if (err) goto error; - } - filp->private_data = fid; + file->private_data = fid; #ifdef CONFIG_9P_FSCACHE if (v9ses->cache) - v9fs_cache_inode_set_cookie(dentry->d_inode, filp); + v9fs_cache_inode_set_cookie(dentry->d_inode, file); #endif *opened |= FILE_CREATED; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 69f0510..cfaebde 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -242,14 +242,13 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, static int v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t omode, + struct file *file, unsigned flags, umode_t omode, int *opened) { int err = 0; gid_t gid; umode_t mode; char *name = NULL; - struct file *filp; struct p9_qid qid; struct inode *inode; struct p9_fid *fid = NULL; @@ -270,7 +269,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, /* Only creates */ if (!(flags & O_CREAT) || dentry->d_inode) { - finish_no_open(od, res); + finish_no_open(file, res); return 1; } @@ -357,15 +356,13 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, } mutex_unlock(&v9inode->v_mutex); /* Since we are opening a file, assign the open fid to the file */ - filp = finish_open(od, dentry, generic_file_open, opened); - if (IS_ERR(filp)) { - err = PTR_ERR(filp); + err = finish_open(file, dentry, generic_file_open, opened); + if (err) goto err_clunk_old_fid; - } - filp->private_data = ofid; + file->private_data = ofid; #ifdef CONFIG_9P_FSCACHE if (v9ses->cache) - v9fs_cache_inode_set_cookie(inode, filp); + v9fs_cache_inode_set_cookie(inode, file); #endif *opened |= FILE_CREATED; out: diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d8bfabe..80c848e 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -635,7 +635,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, } int ceph_atomic_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode, + struct file *file, unsigned flags, umode_t mode, int *opened) { int err; @@ -649,7 +649,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, if (err < 0) return err; - return ceph_lookup_open(dir, dentry, od, flags, mode, opened); + return ceph_lookup_open(dir, dentry, file, flags, mode, opened); } if (d_unhashed(dentry)) { @@ -663,12 +663,12 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, /* We don't deal with positive dentries here */ if (dentry->d_inode) { - finish_no_open(od, res); + finish_no_open(file, res); return 1; } *opened |= FILE_CREATED; - err = ceph_lookup_open(dir, dentry, od, flags, mode, opened); + err = ceph_lookup_open(dir, dentry, file, flags, mode, opened); dput(res); return err; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index b8cc3ee..1b81d6c 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -214,12 +214,11 @@ out: * ceph_release gets called). So fear not! */ int ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, umode_t mode, + struct file *file, unsigned flags, umode_t mode, int *opened) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; - struct file *file = NULL; struct ceph_mds_request *req; struct dentry *ret; int err; @@ -248,9 +247,7 @@ int ceph_lookup_open(struct inode *dir, struct dentry *dentry, err = ceph_handle_notrace_create(dir, dentry); if (err) goto out; - file = finish_open(od, req->r_dentry, ceph_open, opened); - if (IS_ERR(file)) - err = PTR_ERR(file); + err = finish_open(file, req->r_dentry, ceph_open, opened); out: ret = ceph_finish_lookup(req, dentry, err); ceph_mdsc_put_request(req); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index f7e8e82..f4d5522 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -807,7 +807,7 @@ extern int ceph_copy_from_page_vector(struct page **pages, extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); extern int ceph_open(struct inode *inode, struct file *file); extern int ceph_lookup_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned flags, + struct file *od, unsigned flags, umode_t mode, int *opened); extern int ceph_release(struct inode *inode, struct file *filp); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 58d9aca..48bb474 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -47,7 +47,7 @@ extern struct inode *cifs_root_iget(struct super_block *); extern int cifs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); extern int cifs_atomic_open(struct inode *, struct dentry *, - struct opendata *, unsigned, umode_t, + struct file *, unsigned, umode_t, int *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, struct nameidata *); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 8ca70b1..c00c192 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -378,7 +378,7 @@ out: int cifs_atomic_open(struct inode *inode, struct dentry *direntry, - struct opendata *od, unsigned oflags, umode_t mode, + struct file *file, unsigned oflags, umode_t mode, int *opened) { int rc; @@ -405,7 +405,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (IS_ERR(res)) return PTR_ERR(res); - finish_no_open(od, res); + finish_no_open(file, res); return 1; } @@ -431,9 +431,8 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (rc) goto out; - filp = finish_open(od, direntry, generic_file_open, opened); - if (IS_ERR(filp)) { - rc = PTR_ERR(filp); + rc = finish_open(file, direntry, generic_file_open, opened); + if (rc) { CIFSSMBClose(xid, tcon, fileHandle); goto out; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 8a9ca09..110db54 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -370,7 +370,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, * 'mknod' + 'open' requests. */ static int fuse_create_open(struct inode *dir, struct dentry *entry, - struct opendata *od, unsigned flags, + struct file *file, unsigned flags, umode_t mode, int *opened) { int err; @@ -382,7 +382,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, struct fuse_open_out outopen; struct fuse_entry_out outentry; struct fuse_file *ff; - struct file *file; forget = fuse_alloc_forget(); err = -ENOMEM; @@ -450,14 +449,12 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, d_instantiate(entry, inode); fuse_change_entry_timeout(entry, &outentry); fuse_invalidate_attr(dir); - file = finish_open(od, entry, generic_file_open, opened); - if (IS_ERR(file)) { - err = PTR_ERR(file); + err = finish_open(file, entry, generic_file_open, opened); + if (err) { fuse_sync_release(ff, flags); } else { file->private_data = fuse_file_get(ff); fuse_finish_open(inode, file); - err = 0; } return err; @@ -473,7 +470,7 @@ out_err: static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t); static int fuse_atomic_open(struct inode *dir, struct dentry *entry, - struct opendata *od, unsigned flags, + struct file *file, unsigned flags, umode_t mode, int *opened) { int err; @@ -498,7 +495,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, if (fc->no_create) goto mknod; - err = fuse_create_open(dir, entry, od, flags, mode, opened); + err = fuse_create_open(dir, entry, file, flags, mode, opened); if (err == -ENOSYS) { fc->no_create = 1; goto mknod; @@ -512,7 +509,7 @@ mknod: if (err) goto out_dput; no_open: - finish_no_open(od, res); + finish_no_open(file, res); return 1; } diff --git a/fs/internal.h b/fs/internal.h index 09003a0..8a9f5fa 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -82,9 +82,6 @@ extern struct super_block *user_get_super(dev_t); /* * open.c */ -struct opendata { - struct file *filp; -}; struct open_flags { int open_flag; umode_t mode; diff --git a/fs/namei.c b/fs/namei.c index af83ede..aaff8a8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2194,7 +2194,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) } static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, - struct path *path, struct opendata *od, + struct path *path, struct file *file, const struct open_flags *op, bool *want_write, bool need_lookup, int *opened) @@ -2269,9 +2269,9 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (nd->flags & LOOKUP_DIRECTORY) open_flag |= O_DIRECTORY; - od->filp->f_path.dentry = DENTRY_NOT_SET; - od->filp->f_path.mnt = nd->path.mnt; - error = dir->i_op->atomic_open(dir, dentry, od, open_flag, mode, + file->f_path.dentry = DENTRY_NOT_SET; + file->f_path.mnt = nd->path.mnt; + error = dir->i_op->atomic_open(dir, dentry, file, open_flag, mode, opened); if (error < 0) { if (create_error && error == -ENOENT) @@ -2287,13 +2287,13 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, } if (error) { /* returned 1, that is */ - if (WARN_ON(od->filp->f_path.dentry == DENTRY_NOT_SET)) { + if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { filp = ERR_PTR(-EIO); goto out; } - if (od->filp->f_path.dentry) { + if (file->f_path.dentry) { dput(dentry); - dentry = od->filp->f_path.dentry; + dentry = file->f_path.dentry; } goto looked_up; } @@ -2302,7 +2302,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, * We didn't have the inode before the open, so check open permission * here. */ - filp = od->filp; + filp = file; error = may_open(&filp->f_path, acc_mode, open_flag); if (error) { fput(filp); @@ -2350,7 +2350,7 @@ looked_up: * was performed, only lookup. */ static struct file *lookup_open(struct nameidata *nd, struct path *path, - struct opendata *od, + struct file *file, const struct open_flags *op, bool *want_write, int *opened) { @@ -2370,7 +2370,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, goto out_no_open; if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { - return atomic_open(nd, dentry, path, od, op, want_write, + return atomic_open(nd, dentry, path, file, op, want_write, need_lookup, opened); } @@ -2420,7 +2420,7 @@ out_dput: * Handle the last step of open() */ static struct file *do_last(struct nameidata *nd, struct path *path, - struct opendata *od, const struct open_flags *op, + struct file *file, const struct open_flags *op, int *opened, const char *pathname) { struct dentry *dir = nd->path.dentry; @@ -2497,7 +2497,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, retry_lookup: mutex_lock(&dir->d_inode->i_mutex); - filp = lookup_open(nd, path, od, op, &want_write, opened); + filp = lookup_open(nd, path, file, op, &want_write, opened); mutex_unlock(&dir->d_inode->i_mutex); if (filp) { @@ -2604,13 +2604,15 @@ finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) goto exit; - od->filp->f_path.mnt = nd->path.mnt; - filp = finish_open(od, nd->path.dentry, NULL, opened); - if (IS_ERR(filp)) { - if (filp == ERR_PTR(-EOPENSTALE)) + file->f_path.mnt = nd->path.mnt; + error = finish_open(file, nd->path.dentry, NULL, opened); + if (error) { + filp = ERR_PTR(error); + if (error == -EOPENSTALE) goto stale_open; goto out; } + filp = file; opened: error = open_check_o_direct(filp); if (error) @@ -2663,17 +2665,17 @@ static struct file *path_openat(int dfd, const char *pathname, struct nameidata *nd, const struct open_flags *op, int flags) { struct file *base = NULL; - struct opendata od; + struct file *file; struct file *res; struct path path; int opened = 0; int error; - od.filp = get_empty_filp(); - if (!od.filp) + file = get_empty_filp(); + if (!file) return ERR_PTR(-ENFILE); - od.filp->f_flags = op->open_flag; + file->f_flags = op->open_flag; error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); if (unlikely(error)) @@ -2684,7 +2686,7 @@ static struct file *path_openat(int dfd, const char *pathname, if (unlikely(error)) goto out_filp; - res = do_last(nd, &path, &od, op, &opened, pathname); + res = do_last(nd, &path, file, op, &opened, pathname); while (unlikely(!res)) { /* trailing symlink */ struct path link = path; void *cookie; @@ -2699,7 +2701,7 @@ static struct file *path_openat(int dfd, const char *pathname, error = follow_link(&link, nd, &cookie); if (unlikely(error)) goto out_filp; - res = do_last(nd, &path, &od, op, &opened, pathname); + res = do_last(nd, &path, file, op, &opened, pathname); put_link(nd, &link, cookie); } out: @@ -2708,7 +2710,7 @@ out: if (base) fput(base); if (!(opened & FILE_OPENED)) - put_filp(od.filp); + put_filp(file); if (res == ERR_PTR(-EOPENSTALE)) { if (flags & LOOKUP_RCU) res = ERR_PTR(-ECHILD); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b56f4b3..dafc86c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -112,7 +112,7 @@ const struct inode_operations nfs3_dir_inode_operations = { #ifdef CONFIG_NFS_V4 static int nfs_atomic_open(struct inode *, struct dentry *, - struct opendata *, unsigned, umode_t, + struct file *, unsigned, umode_t, int *); const struct inode_operations nfs4_dir_inode_operations = { .create = nfs_create, @@ -1389,10 +1389,9 @@ static int do_open(struct inode *inode, struct file *filp) static int nfs_finish_open(struct nfs_open_context *ctx, struct dentry *dentry, - struct opendata *od, unsigned open_flags, + struct file *file, unsigned open_flags, int *opened) { - struct file *filp; int err; if (ctx->dentry != dentry) { @@ -1407,13 +1406,10 @@ static int nfs_finish_open(struct nfs_open_context *ctx, goto out; } - filp = finish_open(od, dentry, do_open, opened); - if (IS_ERR(filp)) { - err = PTR_ERR(filp); + err = finish_open(file, dentry, do_open, opened); + if (err) goto out; - } - nfs_file_set_open_context(filp, ctx); - err = 0; + nfs_file_set_open_context(file, ctx); out: put_nfs_open_context(ctx); @@ -1421,7 +1417,7 @@ out: } static int nfs_atomic_open(struct inode *dir, struct dentry *dentry, - struct opendata *od, unsigned open_flags, + struct file *file, unsigned open_flags, umode_t mode, int *opened) { struct nfs_open_context *ctx; @@ -1497,7 +1493,7 @@ static int nfs_atomic_open(struct inode *dir, struct dentry *dentry, nfs_unblock_sillyrename(dentry->d_parent); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - err = nfs_finish_open(ctx, dentry, od, open_flags, opened); + err = nfs_finish_open(ctx, dentry, file, open_flags, opened); dput(res); out: @@ -1509,7 +1505,7 @@ no_open: if (IS_ERR(res)) goto out; - finish_no_open(od, res); + finish_no_open(file, res); return 1; } diff --git a/fs/open.c b/fs/open.c index 2b1654d..fc829d6 100644 --- a/fs/open.c +++ b/fs/open.c @@ -781,21 +781,23 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, * If the open callback is set to NULL, then the standard f_op->open() * filesystem callback is substituted. */ -struct file *finish_open(struct opendata *od, struct dentry *dentry, - int (*open)(struct inode *, struct file *), - int *opened) +int finish_open(struct file *file, struct dentry *dentry, + int (*open)(struct inode *, struct file *), + int *opened) { struct file *res; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ - mntget(od->filp->f_path.mnt); + mntget(file->f_path.mnt); dget(dentry); - res = do_dentry_open(dentry, od->filp->f_path.mnt, od->filp, open, current_cred()); - if (!IS_ERR(res)) + res = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred()); + if (!IS_ERR(res)) { *opened |= FILE_OPENED; + return 0; + } - return res; + return PTR_ERR(res); } EXPORT_SYMBOL(finish_open); @@ -808,9 +810,9 @@ EXPORT_SYMBOL(finish_open); * This can be used to set the result of a successful lookup in ->atomic_open(). * The filesystem's atomic_open() method shall return NULL after calling this. */ -void finish_no_open(struct opendata *od, struct dentry *dentry) +void finish_no_open(struct file *file, struct dentry *dentry) { - od->filp->f_path.dentry = dentry; + file->f_path.dentry = dentry; } EXPORT_SYMBOL(finish_no_open); diff --git a/include/linux/fs.h b/include/linux/fs.h index 33bda92..1dcc75c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -427,7 +427,6 @@ struct kstatfs; struct vm_area_struct; struct vfsmount; struct cred; -struct opendata; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -1695,7 +1694,7 @@ struct inode_operations { u64 len); int (*update_time)(struct inode *, struct timespec *, int); int (*atomic_open)(struct inode *, struct dentry *, - struct opendata *, unsigned open_flag, + struct file *, unsigned open_flag, umode_t create_mode, int *opened); } ____cacheline_aligned; @@ -2069,10 +2068,10 @@ enum { FILE_CREATED = 1, FILE_OPENED = 2 }; -extern struct file *finish_open(struct opendata *od, struct dentry *dentry, - int (*open)(struct inode *, struct file *), - int *opened); -extern void finish_no_open(struct opendata *od, struct dentry *dentry); +extern int finish_open(struct file *file, struct dentry *dentry, + int (*open)(struct inode *, struct file *), + int *opened); +extern void finish_no_open(struct file *file, struct dentry *dentry); /* fs/ioctl.c */ -- cgit v0.10.2 From 2675a4eb6a9f1240098721c8a84ede28abd9d7b3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:41:10 +0400 Subject: fs/namei.c: get do_last() and friends return int Same conventions as for ->atomic_open(). Trimmed the forest of labels a bit, while we are at it... Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index aaff8a8..16256d9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2193,18 +2193,17 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) return security_inode_create(dir->dentry->d_inode, dentry, mode); } -static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, - struct path *path, struct file *file, - const struct open_flags *op, - bool *want_write, bool need_lookup, - int *opened) +static int atomic_open(struct nameidata *nd, struct dentry *dentry, + struct path *path, struct file *file, + const struct open_flags *op, + bool *want_write, bool need_lookup, + int *opened) { struct inode *dir = nd->path.dentry->d_inode; unsigned open_flag = open_to_namei_flags(op->open_flag); umode_t mode; int error; int acc_mode; - struct file *filp = NULL; int create_error = 0; struct dentry *const DENTRY_NOT_SET = (void *) -1UL; @@ -2212,7 +2211,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, /* Don't create child dentry for a dead directory. */ if (unlikely(IS_DEADDIR(dir))) { - filp = ERR_PTR(-ENOENT); + error = -ENOENT; goto out; } @@ -2276,7 +2275,6 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (error < 0) { if (create_error && error == -ENOENT) error = create_error; - filp = ERR_PTR(error); goto out; } @@ -2288,7 +2286,7 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, if (error) { /* returned 1, that is */ if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { - filp = ERR_PTR(-EIO); + error = -EIO; goto out; } if (file->f_path.dentry) { @@ -2302,27 +2300,24 @@ static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry, * We didn't have the inode before the open, so check open permission * here. */ - filp = file; - error = may_open(&filp->f_path, acc_mode, open_flag); - if (error) { - fput(filp); - filp = ERR_PTR(error); - } + error = may_open(&file->f_path, acc_mode, open_flag); + if (error) + fput(file); out: dput(dentry); - return filp; + return error; no_open: if (need_lookup) { dentry = lookup_real(dir, dentry, nd); if (IS_ERR(dentry)) - return ERR_CAST(dentry); + return PTR_ERR(dentry); if (create_error) { int open_flag = op->open_flag; - filp = ERR_PTR(create_error); + error = create_error; if ((open_flag & O_EXCL)) { if (!dentry->d_inode) goto out; @@ -2338,7 +2333,7 @@ no_open: looked_up: path->dentry = dentry; path->mnt = nd->path.mnt; - return NULL; + return 1; } /* @@ -2349,10 +2344,10 @@ looked_up: * Returns open file or NULL on success, error otherwise. NULL means no open * was performed, only lookup. */ -static struct file *lookup_open(struct nameidata *nd, struct path *path, - struct file *file, - const struct open_flags *op, - bool *want_write, int *opened) +static int lookup_open(struct nameidata *nd, struct path *path, + struct file *file, + const struct open_flags *op, + bool *want_write, int *opened) { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; @@ -2363,7 +2358,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, *opened &= ~FILE_CREATED; dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup); if (IS_ERR(dentry)) - return ERR_CAST(dentry); + return PTR_ERR(dentry); /* Cached positive dentry: will open in f_op->open */ if (!need_lookup && dentry->d_inode) @@ -2379,7 +2374,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, dentry = lookup_real(dir_inode, dentry, nd); if (IS_ERR(dentry)) - return ERR_CAST(dentry); + return PTR_ERR(dentry); } /* Negative dentry, just create the file */ @@ -2409,26 +2404,25 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path, out_no_open: path->dentry = dentry; path->mnt = nd->path.mnt; - return NULL; + return 1; out_dput: dput(dentry); - return ERR_PTR(error); + return error; } /* * Handle the last step of open() */ -static struct file *do_last(struct nameidata *nd, struct path *path, - struct file *file, const struct open_flags *op, - int *opened, const char *pathname) +static int do_last(struct nameidata *nd, struct path *path, + struct file *file, const struct open_flags *op, + int *opened, const char *pathname) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; bool will_truncate = (open_flag & O_TRUNC) != 0; bool want_write = false; int acc_mode = op->acc_mode; - struct file *filp; struct inode *inode; bool symlink_ok = false; struct path save_parent = { .dentry = NULL, .mnt = NULL }; @@ -2443,22 +2437,22 @@ static struct file *do_last(struct nameidata *nd, struct path *path, case LAST_DOT: error = handle_dots(nd, nd->last_type); if (error) - return ERR_PTR(error); + return error; /* fallthrough */ case LAST_ROOT: error = complete_walk(nd); if (error) - return ERR_PTR(error); + return error; audit_inode(pathname, nd->path.dentry); if (open_flag & O_CREAT) { error = -EISDIR; - goto exit; + goto out; } goto finish_open; case LAST_BIND: error = complete_walk(nd); if (error) - return ERR_PTR(error); + return error; audit_inode(pathname, dir); goto finish_open; } @@ -2474,7 +2468,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, goto finish_lookup; if (error < 0) - goto exit; + goto out; BUG_ON(nd->inode != dir->d_inode); } else { @@ -2486,29 +2480,29 @@ static struct file *do_last(struct nameidata *nd, struct path *path, */ error = complete_walk(nd); if (error) - return ERR_PTR(error); + return error; audit_inode(pathname, dir); error = -EISDIR; /* trailing slashes? */ if (nd->last.name[nd->last.len]) - goto exit; + goto out; } retry_lookup: mutex_lock(&dir->d_inode->i_mutex); - filp = lookup_open(nd, path, file, op, &want_write, opened); + error = lookup_open(nd, path, file, op, &want_write, opened); mutex_unlock(&dir->d_inode->i_mutex); - if (filp) { - if (IS_ERR(filp)) + if (error <= 0) { + if (error) goto out; if ((*opened & FILE_CREATED) || - !S_ISREG(filp->f_path.dentry->d_inode->i_mode)) + !S_ISREG(file->f_path.dentry->d_inode->i_mode)) will_truncate = false; - audit_inode(pathname, filp->f_path.dentry); + audit_inode(pathname, file->f_path.dentry); goto opened; } @@ -2554,18 +2548,18 @@ finish_lookup: error = -ENOENT; if (!inode) { path_to_nameidata(path, nd); - goto exit; + goto out; } if (should_follow_link(inode, !symlink_ok)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { error = -ECHILD; - goto exit; + goto out; } } BUG_ON(inode != path->dentry->d_inode); - return NULL; + return 1; } if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) { @@ -2581,14 +2575,14 @@ finish_lookup: error = complete_walk(nd); if (error) { path_put(&save_parent); - return ERR_PTR(error); + return error; } error = -EISDIR; if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) - goto exit; + goto out; error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) - goto exit; + goto out; audit_inode(pathname, nd->path.dentry); finish_open: if (!S_ISREG(nd->inode->i_mode)) @@ -2597,32 +2591,30 @@ finish_open: if (will_truncate) { error = mnt_want_write(nd->path.mnt); if (error) - goto exit; + goto out; want_write = true; } finish_open_created: error = may_open(&nd->path, acc_mode, open_flag); if (error) - goto exit; + goto out; file->f_path.mnt = nd->path.mnt; error = finish_open(file, nd->path.dentry, NULL, opened); if (error) { - filp = ERR_PTR(error); if (error == -EOPENSTALE) goto stale_open; goto out; } - filp = file; opened: - error = open_check_o_direct(filp); + error = open_check_o_direct(file); if (error) goto exit_fput; - error = ima_file_check(filp, op->acc_mode); + error = ima_file_check(file, op->acc_mode); if (error) goto exit_fput; if (will_truncate) { - error = handle_truncate(filp); + error = handle_truncate(file); if (error) goto exit_fput; } @@ -2631,16 +2623,14 @@ out: mnt_drop_write(nd->path.mnt); path_put(&save_parent); terminate_walk(nd); - return filp; + return error; exit_dput: path_put_conditional(path, nd); -exit: - filp = ERR_PTR(error); goto out; exit_fput: - fput(filp); - goto exit; + fput(file); + goto out; stale_open: /* If no saved parent or already retried then can't retry */ @@ -2666,7 +2656,6 @@ static struct file *path_openat(int dfd, const char *pathname, { struct file *base = NULL; struct file *file; - struct file *res; struct path path; int opened = 0; int error; @@ -2679,29 +2668,29 @@ static struct file *path_openat(int dfd, const char *pathname, error = path_init(dfd, pathname, flags | LOOKUP_PARENT, nd, &base); if (unlikely(error)) - goto out_filp; + goto out; current->total_link_count = 0; error = link_path_walk(pathname, nd); if (unlikely(error)) - goto out_filp; + goto out; - res = do_last(nd, &path, file, op, &opened, pathname); - while (unlikely(!res)) { /* trailing symlink */ + error = do_last(nd, &path, file, op, &opened, pathname); + while (unlikely(error > 0)) { /* trailing symlink */ struct path link = path; void *cookie; if (!(nd->flags & LOOKUP_FOLLOW)) { path_put_conditional(&path, nd); path_put(&nd->path); - res = ERR_PTR(-ELOOP); + error = -ELOOP; break; } nd->flags |= LOOKUP_PARENT; nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); error = follow_link(&link, nd, &cookie); if (unlikely(error)) - goto out_filp; - res = do_last(nd, &path, file, op, &opened, pathname); + break; + error = do_last(nd, &path, file, op, &opened, pathname); put_link(nd, &link, cookie); } out: @@ -2709,19 +2698,20 @@ out: path_put(&nd->root); if (base) fput(base); - if (!(opened & FILE_OPENED)) + if (!(opened & FILE_OPENED)) { + BUG_ON(!error); put_filp(file); - if (res == ERR_PTR(-EOPENSTALE)) { - if (flags & LOOKUP_RCU) - res = ERR_PTR(-ECHILD); - else - res = ERR_PTR(-ESTALE); } - return res; - -out_filp: - res = ERR_PTR(error); - goto out; + if (unlikely(error)) { + if (error == -EOPENSTALE) { + if (flags & LOOKUP_RCU) + error = -ECHILD; + else + error = -ESTALE; + } + file = ERR_PTR(error); + } + return file; } struct file *do_filp_open(int dfd, const char *pathname, -- cgit v0.10.2 From e45198a6ac24bd2c4ad4a43b670c2f1a23dd2df3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 06:48:09 -0400 Subject: make finish_no_open() return int namely, 1 ;-) That's what we want to return from ->atomic_open() instances after finish_no_open(). Signed-off-by: Al Viro diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 2b05651..eae476f 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -878,10 +878,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, } /* Only creates */ - if (!(flags & O_CREAT) || dentry->d_inode) { - finish_no_open(file, res); - return 1; - } + if (!(flags & O_CREAT) || dentry->d_inode) + return finish_no_open(file, res); err = 0; fid = NULL; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index cfaebde..1ee10c8 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -268,10 +268,8 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, } /* Only creates */ - if (!(flags & O_CREAT) || dentry->d_inode) { - finish_no_open(file, res); - return 1; - } + if (!(flags & O_CREAT) || dentry->d_inode) + return finish_no_open(file, res); v9ses = v9fs_inode2v9ses(dir); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 80c848e..d42eee1 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -662,10 +662,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, } /* We don't deal with positive dentries here */ - if (dentry->d_inode) { - finish_no_open(file, res); - return 1; - } + if (dentry->d_inode) + return finish_no_open(file, res); *opened |= FILE_CREATED; err = ceph_lookup_open(dir, dentry, file, flags, mode, opened); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index c00c192..e8c53c8 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -405,8 +405,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (IS_ERR(res)) return PTR_ERR(res); - finish_no_open(file, res); - return 1; + return finish_no_open(file, res); } rc = check_name(direntry); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 110db54..ccdab3a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -509,8 +509,7 @@ mknod: if (err) goto out_dput; no_open: - finish_no_open(file, res); - return 1; + return finish_no_open(file, res); } /* diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index dafc86c..f167c7a 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1505,8 +1505,7 @@ no_open: if (IS_ERR(res)) goto out; - finish_no_open(file, res); - return 1; + return finish_no_open(file, res); } static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) diff --git a/fs/open.c b/fs/open.c index fc829d6..d51c1b7 100644 --- a/fs/open.c +++ b/fs/open.c @@ -810,9 +810,10 @@ EXPORT_SYMBOL(finish_open); * This can be used to set the result of a successful lookup in ->atomic_open(). * The filesystem's atomic_open() method shall return NULL after calling this. */ -void finish_no_open(struct file *file, struct dentry *dentry) +int finish_no_open(struct file *file, struct dentry *dentry) { file->f_path.dentry = dentry; + return 1; } EXPORT_SYMBOL(finish_no_open); diff --git a/include/linux/fs.h b/include/linux/fs.h index 1dcc75c..17ee20d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2071,7 +2071,7 @@ enum { extern int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *), int *opened); -extern void finish_no_open(struct file *file, struct dentry *dentry); +extern int finish_no_open(struct file *file, struct dentry *dentry); /* fs/ioctl.c */ -- cgit v0.10.2 From 96b7e579addd3cdc806c1667bf5b6b126070827c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 14:22:04 -0400 Subject: switch do_dentry_open() to returning int Signed-off-by: Al Viro diff --git a/fs/open.c b/fs/open.c index d51c1b7..1241c59 100644 --- a/fs/open.c +++ b/fs/open.c @@ -667,10 +667,10 @@ int open_check_o_direct(struct file *f) return 0; } -static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, - struct file *f, - int (*open)(struct inode *, struct file *), - const struct cred *cred) +static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, + struct file *f, + int (*open)(struct inode *, struct file *), + const struct cred *cred) { static const struct file_operations empty_fops = {}; struct inode *inode; @@ -699,7 +699,7 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, if (unlikely(f->f_mode & FMODE_PATH)) { f->f_op = &empty_fops; - return f; + return 0; } f->f_op = fops_get(inode->i_fop); @@ -726,7 +726,7 @@ static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); - return f; + return 0; cleanup_all: fops_put(f->f_op); @@ -749,7 +749,7 @@ cleanup_all: cleanup_file: dput(dentry); mntput(mnt); - return ERR_PTR(error); + return error; } static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, @@ -757,17 +757,19 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int (*open)(struct inode *, struct file *), const struct cred *cred) { - struct file *res = do_dentry_open(dentry, mnt, f, open, cred); - if (!IS_ERR(res)) { - int error = open_check_o_direct(f); + int error; + error = do_dentry_open(dentry, mnt, f, open, cred); + if (!error) { + error = open_check_o_direct(f); if (error) { - fput(res); - res = ERR_PTR(error); + fput(f); + f = ERR_PTR(error); } - } else { + } else { put_filp(f); + f = ERR_PTR(error); } - return res; + return f; } /** @@ -785,19 +787,17 @@ int finish_open(struct file *file, struct dentry *dentry, int (*open)(struct inode *, struct file *), int *opened) { - struct file *res; + int error; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ mntget(file->f_path.mnt); dget(dentry); - res = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred()); - if (!IS_ERR(res)) { + error = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred()); + if (!error) *opened |= FILE_OPENED; - return 0; - } - return PTR_ERR(res); + return error; } EXPORT_SYMBOL(finish_open); -- cgit v0.10.2 From 2a027e7a1873812240cbdac0f55c4734ff0042a5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 14:24:38 -0400 Subject: fold __dentry_open() into its sole caller Signed-off-by: Al Viro diff --git a/fs/open.c b/fs/open.c index 1241c59..28fbacb 100644 --- a/fs/open.c +++ b/fs/open.c @@ -752,26 +752,6 @@ cleanup_file: return error; } -static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, - struct file *f, - int (*open)(struct inode *, struct file *), - const struct cred *cred) -{ - int error; - error = do_dentry_open(dentry, mnt, f, open, cred); - if (!error) { - error = open_check_o_direct(f); - if (error) { - fput(f); - f = ERR_PTR(error); - } - } else { - put_filp(f); - f = ERR_PTR(error); - } - return f; -} - /** * finish_open - finish opening a file * @od: opaque open data @@ -841,7 +821,18 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, } f->f_flags = flags; - return __dentry_open(dentry, mnt, f, NULL, cred); + error = do_dentry_open(dentry, mnt, f, NULL, cred); + if (!error) { + error = open_check_o_direct(f); + if (error) { + fput(f); + f = ERR_PTR(error); + } + } else { + put_filp(f); + f = ERR_PTR(error); + } + return f; } EXPORT_SYMBOL(dentry_open); -- cgit v0.10.2 From 02e5180d991f203441687cecd0b7e6a2ba0a34d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 14:32:45 -0400 Subject: do_dentry_open(): take initialization of file->f_path to caller ... and get rid of a couple of arguments and a pointless reassignment in finish_open() case. Signed-off-by: Al Viro diff --git a/fs/open.c b/fs/open.c index 28fbacb..124ccb1 100644 --- a/fs/open.c +++ b/fs/open.c @@ -667,8 +667,7 @@ int open_check_o_direct(struct file *f) return 0; } -static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, - struct file *f, +static int do_dentry_open(struct file *f, int (*open)(struct inode *, struct file *), const struct cred *cred) { @@ -682,9 +681,9 @@ static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, if (unlikely(f->f_flags & O_PATH)) f->f_mode = FMODE_PATH; - inode = dentry->d_inode; + inode = f->f_path.dentry->d_inode; if (f->f_mode & FMODE_WRITE) { - error = __get_file_write_access(inode, mnt); + error = __get_file_write_access(inode, f->f_path.mnt); if (error) goto cleanup_file; if (!special_file(inode->i_mode)) @@ -692,8 +691,6 @@ static int do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, } f->f_mapping = inode->i_mapping; - f->f_path.dentry = dentry; - f->f_path.mnt = mnt; f->f_pos = 0; file_sb_list_add(f, inode->i_sb); @@ -740,15 +737,14 @@ cleanup_all: * here, so just reset the state. */ file_reset_write(f); - mnt_drop_write(mnt); + mnt_drop_write(f->f_path.mnt); } } file_sb_list_del(f); - f->f_path.dentry = NULL; - f->f_path.mnt = NULL; cleanup_file: - dput(dentry); - mntput(mnt); + path_put(&f->f_path); + f->f_path.mnt = NULL; + f->f_path.dentry = NULL; return error; } @@ -771,9 +767,9 @@ int finish_open(struct file *file, struct dentry *dentry, BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ mntget(file->f_path.mnt); - dget(dentry); + file->f_path.dentry = dget(dentry); - error = do_dentry_open(dentry, file->f_path.mnt, file, open, current_cred()); + error = do_dentry_open(file, open, current_cred()); if (!error) *opened |= FILE_OPENED; @@ -821,7 +817,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, } f->f_flags = flags; - error = do_dentry_open(dentry, mnt, f, NULL, cred); + f->f_path.mnt = mnt; + f->f_path.dentry = dentry; + error = do_dentry_open(f, NULL, cred); if (!error) { error = open_check_o_direct(f); if (error) { -- cgit v0.10.2 From 93420b40bb19433c3bc01c37c6c908ae7ce13228 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 15:18:15 -0400 Subject: switch nfs_lookup_check_intent() away from nameidata just pass the flags Signed-off-by: Al Viro diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f167c7a..48485f1 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1037,10 +1037,10 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) * component of the path and none of them is set before that last * component. */ -static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, +static inline unsigned int nfs_lookup_check_intent(unsigned int flags, unsigned int mask) { - return nd->flags & mask; + return flags & mask; } /* @@ -1051,7 +1051,7 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) { if (NFS_PROTO(dir)->version == 2) return 0; - return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL); + return nd && nfs_lookup_check_intent(nd->flags, LOOKUP_EXCL); } /* @@ -1074,7 +1074,7 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) if (nd->flags & LOOKUP_REVAL) goto out_force; /* This is an open(2) */ - if (nfs_lookup_check_intent(nd, LOOKUP_OPEN) != 0 && + if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 && !(server->flags & NFS_MOUNT_NOCTO) && (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) @@ -1098,7 +1098,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { /* Don't revalidate a negative dentry if we're creating a new file */ - if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) + if (nd != NULL && nfs_lookup_check_intent(nd->flags, LOOKUP_CREATE) != 0) return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; -- cgit v0.10.2 From facc3530fb5c89a40bc83045422add392b8db4a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 15:33:51 -0400 Subject: nfs_lookup_verify_inode() - nd is *always* non-NULL here Signed-off-by: Al Viro diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 48485f1..ad5aef4 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1069,19 +1069,16 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) if (IS_AUTOMOUNT(inode)) return 0; - if (nd != NULL) { - /* VFS wants an on-the-wire revalidation */ - if (nd->flags & LOOKUP_REVAL) - goto out_force; - /* This is an open(2) */ - if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 && - !(server->flags & NFS_MOUNT_NOCTO) && - (S_ISREG(inode->i_mode) || - S_ISDIR(inode->i_mode))) - goto out_force; - return 0; - } - return nfs_revalidate_inode(server, inode); + /* VFS wants an on-the-wire revalidation */ + if (nd->flags & LOOKUP_REVAL) + goto out_force; + /* This is an open(2) */ + if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 && + !(server->flags & NFS_MOUNT_NOCTO) && + (S_ISREG(inode->i_mode) || + S_ISDIR(inode->i_mode))) + goto out_force; + return 0; out_force: return __nfs_revalidate_inode(server, inode); } -- cgit v0.10.2 From fa3c56bbda6c2ac2a57d96ba501dbe85cccd312b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 15:36:40 -0400 Subject: fs/nfs/dir.c: switch to passing nd->flags instead of nd wherever possible Signed-off-by: Al Viro diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ad5aef4..71a1994 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1031,27 +1031,14 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) } /* - * Return the intent data that applies to this particular path component - * - * Note that the current set of intents only apply to the very last - * component of the path and none of them is set before that last - * component. - */ -static inline unsigned int nfs_lookup_check_intent(unsigned int flags, - unsigned int mask) -{ - return flags & mask; -} - -/* * Use intent information to check whether or not we're going to do * an O_EXCL create using this path component. */ -static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) +static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags) { if (NFS_PROTO(dir)->version == 2) return 0; - return nd && nfs_lookup_check_intent(nd->flags, LOOKUP_EXCL); + return flags & LOOKUP_EXCL; } /* @@ -1063,20 +1050,18 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) * */ static inline -int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) +int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags) { struct nfs_server *server = NFS_SERVER(inode); if (IS_AUTOMOUNT(inode)) return 0; /* VFS wants an on-the-wire revalidation */ - if (nd->flags & LOOKUP_REVAL) + if (flags & LOOKUP_REVAL) goto out_force; /* This is an open(2) */ - if (nfs_lookup_check_intent(nd->flags, LOOKUP_OPEN) != 0 && - !(server->flags & NFS_MOUNT_NOCTO) && - (S_ISREG(inode->i_mode) || - S_ISDIR(inode->i_mode))) + if ((flags & LOOKUP_OPEN) && !(server->flags & NFS_MOUNT_NOCTO) && + (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) goto out_force; return 0; out_force: @@ -1092,10 +1077,10 @@ out_force: */ static inline int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { /* Don't revalidate a negative dentry if we're creating a new file */ - if (nd != NULL && nfs_lookup_check_intent(nd->flags, LOOKUP_CREATE) != 0) + if (flags & LOOKUP_CREATE) return 0; if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) return 1; @@ -1115,6 +1100,7 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, */ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) { + unsigned int flags = nd->flags; struct inode *dir; struct inode *inode; struct dentry *parent; @@ -1122,7 +1108,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) struct nfs_fattr *fattr = NULL; int error; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; parent = dget_parent(dentry); @@ -1131,7 +1117,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) inode = dentry->d_inode; if (!inode) { - if (nfs_neg_need_reval(dir, dentry, nd)) + if (nfs_neg_need_reval(dir, dentry, flags)) goto out_bad; goto out_valid_noent; } @@ -1147,8 +1133,8 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) goto out_set_verifier; /* Force a full look up iff the parent directory has changed */ - if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) { - if (nfs_lookup_verify_inode(inode, nd)) + if (!nfs_is_exclusive_create(dir, flags) && nfs_check_verifier(dir, dentry)) { + if (nfs_lookup_verify_inode(inode, flags)) goto out_zap_parent; goto out_valid; } @@ -1306,7 +1292,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru * If we're doing an exclusive create, optimize away the lookup * but don't hash the dentry. */ - if (nfs_is_exclusive_create(dir, nd)) { + if (nd && nfs_is_exclusive_create(dir, nd->flags)) { d_instantiate(dentry, NULL); res = NULL; goto out; @@ -1507,15 +1493,16 @@ no_open: static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) { + unsigned int flags = nd->flags; struct dentry *parent = NULL; struct inode *inode; struct inode *dir; int ret = 0; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; - if (!(nd->flags & LOOKUP_OPEN) || (nd->flags & LOOKUP_DIRECTORY)) + if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) goto no_open; if (d_mountpoint(dentry)) goto no_open; @@ -1528,7 +1515,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) * optimize away revalidation of negative dentries. */ if (inode == NULL) { - if (!nfs_neg_need_reval(dir, dentry, nd)) + if (!nfs_neg_need_reval(dir, dentry, flags)) ret = 1; goto out; } @@ -1537,7 +1524,7 @@ static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) if (!S_ISREG(inode->i_mode)) goto no_open_dput; /* We cannot do exclusive creation on a positive dentry */ - if (nd && nd->flags & LOOKUP_EXCL) + if (flags & LOOKUP_EXCL) goto no_open_dput; /* Let f_op->open() actually open (and revalidate) the file */ -- cgit v0.10.2 From 0b728e1911cbe6e24020727c3870628b9653f32a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 16:03:43 -0400 Subject: stop passing nameidata * to ->d_revalidate() Just the lookup flags. Die, bastard, die... Signed-off-by: Al Viro diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 33e5243..52a0573 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -9,7 +9,7 @@ be able to use diff(1). --------------------------- dentry_operations -------------------------- prototypes: - int (*d_revalidate)(struct dentry *, struct nameidata *); + int (*d_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, const struct inode *, struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index ed9fbc2..56750b7 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -431,3 +431,8 @@ release it yourself. d_alloc_root() is gone, along with a lot of bugs caused by code misusing it. Replacement: d_make_root(inode). The difference is, d_make_root() drops the reference to inode if dentry allocation fails. + +-- +[mandatory] + The witch is dead! Well, 1/3 of it, anyway. ->d_revalidate() does *not* +take struct nameidata anymore; just the flags. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 279de21..b9a406b 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -902,7 +902,7 @@ the VFS uses a default. As of kernel 2.6.22, the following members are defined: struct dentry_operations { - int (*d_revalidate)(struct dentry *, struct nameidata *); + int (*d_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, const struct inode *, struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, @@ -921,11 +921,11 @@ struct dentry_operations { dcache. Most filesystems leave this as NULL, because all their dentries in the dcache are valid - d_revalidate may be called in rcu-walk mode (nd->flags & LOOKUP_RCU). + d_revalidate may be called in rcu-walk mode (flags & LOOKUP_RCU). If in rcu-walk mode, the filesystem must revalidate the dentry without blocking or storing to the dentry, d_parent and d_inode should not be - used without care (because they can go NULL), instead nd->inode should - be used. + used without care (because they can change and, in d_inode case, even + become NULL under us). If a situation is encountered that rcu-walk cannot handle, return -ECHILD and it will be called again in ref-walk mode. diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index d529437..64600b5 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -100,13 +100,13 @@ static void v9fs_dentry_release(struct dentry *dentry) } } -static int v9fs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) +static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) { struct p9_fid *fid; struct inode *inode; struct v9fs_inode *v9inode; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; inode = dentry->d_inode; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index e22dc4b..65c54ab 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -23,7 +23,7 @@ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, void *dirent, filldir_t filldir); -static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd); +static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); static int afs_d_delete(const struct dentry *dentry); static void afs_d_release(struct dentry *dentry); static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, @@ -598,7 +598,7 @@ success: * - NOTE! the hit can be a negative hit too, so we can't assume we have an * inode */ -static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) +static int afs_d_revalidate(struct dentry *dentry, unsigned int flags) { struct afs_vnode *vnode, *dir; struct afs_fid uninitialized_var(fid); @@ -607,7 +607,7 @@ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd) void *dir_version; int ret; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; vnode = AFS_FS_I(dentry->d_inode); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index d42eee1..8898eef 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1042,12 +1042,12 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) /* * Check if cached dentry can be trusted. */ -static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) +static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags) { int valid = 0; struct inode *dir; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, @@ -1094,7 +1094,7 @@ static void ceph_d_release(struct dentry *dentry) } static int ceph_snapdir_d_revalidate(struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { /* * Eventually, we'll want to revalidate snapped metadata diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index e8c53c8..b97ff48 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -700,9 +700,9 @@ lookup_out: } static int -cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) +cifs_d_revalidate(struct dentry *direntry, unsigned int flags) { - if (nd && (nd->flags & LOOKUP_RCU)) + if (flags & LOOKUP_RCU) return -ECHILD; if (direntry->d_inode) { @@ -731,7 +731,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) * This may be nfsd (or something), anyway, we can't see the * intent of this. So, since this can be for creation, drop it. */ - if (!nd) + if (!flags) return 0; /* @@ -739,7 +739,7 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) * case sensitive name which is specified by user if this is * for creation. */ - if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled) diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 1775158..7f8f1a7 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -46,7 +46,7 @@ static int coda_rename(struct inode *old_inode, struct dentry *old_dentry, static int coda_readdir(struct file *file, void *buf, filldir_t filldir); /* dentry ops */ -static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd); +static int coda_dentry_revalidate(struct dentry *de, unsigned int flags); static int coda_dentry_delete(const struct dentry *); /* support routines */ @@ -536,12 +536,12 @@ out: } /* called when a cache lookup succeeds */ -static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) +static int coda_dentry_revalidate(struct dentry *de, unsigned int flags) { struct inode *inode; struct coda_inode_info *cii; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; inode = de->d_inode; diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c index 534c1d4..1b5d9af 100644 --- a/fs/ecryptfs/dentry.c +++ b/fs/ecryptfs/dentry.c @@ -32,7 +32,7 @@ /** * ecryptfs_d_revalidate - revalidate an ecryptfs dentry * @dentry: The ecryptfs dentry - * @nd: The associated nameidata + * @flags: lookup flags * * Called when the VFS needs to revalidate a dentry. This * is called whenever a name lookup finds a dentry in the @@ -42,32 +42,20 @@ * Returns 1 if valid, 0 otherwise. * */ -static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd) +static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags) { struct dentry *lower_dentry; struct vfsmount *lower_mnt; - struct dentry *dentry_save = NULL; - struct vfsmount *vfsmount_save = NULL; int rc = 1; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; lower_dentry = ecryptfs_dentry_to_lower(dentry); lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry); if (!lower_dentry->d_op || !lower_dentry->d_op->d_revalidate) goto out; - if (nd) { - dentry_save = nd->path.dentry; - vfsmount_save = nd->path.mnt; - nd->path.dentry = lower_dentry; - nd->path.mnt = lower_mnt; - } - rc = lower_dentry->d_op->d_revalidate(lower_dentry, nd); - if (nd) { - nd->path.dentry = dentry_save; - nd->path.mnt = vfsmount_save; - } + rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags); if (dentry->d_inode) { struct inode *lower_inode = ecryptfs_inode_to_lower(dentry->d_inode); diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 98ae804..0bbdf39 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -41,9 +41,9 @@ static int vfat_revalidate_shortname(struct dentry *dentry) return ret; } -static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) +static int vfat_revalidate(struct dentry *dentry, unsigned int flags) { - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; /* This is not negative dentry. Always valid. */ @@ -52,9 +52,9 @@ static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) return vfat_revalidate_shortname(dentry); } -static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) +static int vfat_revalidate_ci(struct dentry *dentry, unsigned int flags) { - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; /* @@ -74,7 +74,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) * This may be nfsd (or something), anyway, we can't see the * intent of this. So, since this can be for creation, drop it. */ - if (!nd) + if (!flags) return 0; /* @@ -82,7 +82,7 @@ static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) * case sensitive name which is specified by user if this is * for creation. */ - if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; return vfat_revalidate_shortname(dentry); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index ccdab3a..eba30bd 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -154,7 +154,7 @@ u64 fuse_get_attr_version(struct fuse_conn *fc) * the lookup once more. If the lookup results in the same inode, * then refresh the attributes, timeouts and mark the dentry valid. */ -static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) +static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags) { struct inode *inode; @@ -174,7 +174,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, struct nameidata *nd) if (!inode) return 0; - if (nd && (nd->flags & LOOKUP_RCU)) + if (flags & LOOKUP_RCU) return -ECHILD; fc = get_fuse_conn(inode); diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index 0da8da2..4fddb3c 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c @@ -25,7 +25,7 @@ /** * gfs2_drevalidate - Check directory lookup consistency * @dentry: the mapping to check - * @nd: + * @flags: lookup flags * * Check to make sure the lookup necessary to arrive at this inode from its * parent is still good. @@ -33,7 +33,7 @@ * Returns: 1 if the dentry is ok, 0 if it isn't */ -static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) +static int gfs2_drevalidate(struct dentry *dentry, unsigned int flags) { struct dentry *parent; struct gfs2_sbd *sdp; @@ -44,7 +44,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) int error; int had_lock = 0; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; parent = dget_parent(dentry); diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c index 19cf291..91b91fd 100644 --- a/fs/hfs/sysdep.c +++ b/fs/hfs/sysdep.c @@ -13,12 +13,12 @@ /* dentry case-handling: just lowercase everything */ -static int hfs_revalidate_dentry(struct dentry *dentry, struct nameidata *nd) +static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags) { struct inode *inode; int diff; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; inode = dentry->d_inode; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 07c91ca..f37977f 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1570,7 +1570,7 @@ out: return result; } -static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) +static int jfs_ci_revalidate(struct dentry *dentry, unsigned int flags) { /* * This is not negative dentry. Always valid. @@ -1589,7 +1589,7 @@ static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) * This may be nfsd (or something), anyway, we can't see the * intent of this. So, since this can be for creation, drop it. */ - if (!nd) + if (!flags) return 0; /* @@ -1597,7 +1597,7 @@ static int jfs_ci_revalidate(struct dentry *dentry, struct nameidata *nd) * case sensitive name which is specified by user if this is * for creation. */ - if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) return 0; return 1; } diff --git a/fs/namei.c b/fs/namei.c index 16256d9..1a5707a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -465,7 +465,7 @@ err_root: static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd) { - return dentry->d_op->d_revalidate(dentry, nd); + return dentry->d_op->d_revalidate(dentry, nd ? nd->flags : 0); } /** diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index aeed93a..32607f7 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -72,7 +72,7 @@ const struct inode_operations ncp_dir_inode_operations = /* * Dentry operations routines */ -static int ncp_lookup_validate(struct dentry *, struct nameidata *); +static int ncp_lookup_validate(struct dentry *, unsigned int); static int ncp_hash_dentry(const struct dentry *, const struct inode *, struct qstr *); static int ncp_compare_dentry(const struct dentry *, const struct inode *, @@ -290,7 +290,7 @@ leave_me:; static int -ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd) +ncp_lookup_validate(struct dentry *dentry, unsigned int flags) { struct ncp_server *server; struct dentry *parent; @@ -302,7 +302,7 @@ ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd) if (dentry == dentry->d_sb->s_root) return 1; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; parent = dget_parent(dentry); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 71a1994..656f52e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1098,9 +1098,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, * If the parent directory is seen to have changed, we throw out the * cached dentry and do a new lookup. */ -static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) +static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) { - unsigned int flags = nd->flags; struct inode *dir; struct inode *inode; struct dentry *parent; @@ -1339,7 +1338,7 @@ out: } #ifdef CONFIG_NFS_V4 -static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *); +static int nfs4_lookup_revalidate(struct dentry *, unsigned int); const struct dentry_operations nfs4_dentry_operations = { .d_revalidate = nfs4_lookup_revalidate, @@ -1491,9 +1490,8 @@ no_open: return finish_no_open(file, res); } -static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) +static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags) { - unsigned int flags = nd->flags; struct dentry *parent = NULL; struct inode *inode; struct inode *dir; @@ -1537,7 +1535,7 @@ out: no_open_dput: dput(parent); no_open: - return nfs_lookup_revalidate(dentry, nd); + return nfs_lookup_revalidate(dentry, flags); } #endif /* CONFIG_NFSV4 */ diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index af44882..8db4b58 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c @@ -49,14 +49,13 @@ void ocfs2_dentry_attach_gen(struct dentry *dentry) } -static int ocfs2_dentry_revalidate(struct dentry *dentry, - struct nameidata *nd) +static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; int ret = 0; /* if all else fails, just return false */ struct ocfs2_super *osb; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; inode = dentry->d_inode; diff --git a/fs/proc/base.c b/fs/proc/base.c index 437195f..bf749cc 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1601,13 +1601,13 @@ int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) * made this apply to all per process world readable and executable * directories. */ -int pid_revalidate(struct dentry *dentry, struct nameidata *nd) +int pid_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; const struct cred *cred; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; inode = dentry->d_inode; @@ -1781,7 +1781,7 @@ static int proc_fd_link(struct dentry *dentry, struct path *path) return proc_fd_info(dentry->d_inode, path, NULL); } -static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) +static int tid_fd_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; @@ -1789,7 +1789,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) struct files_struct *files; const struct cred *cred; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; inode = dentry->d_inode; @@ -1868,7 +1868,7 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, NULL)) + if (tid_fd_revalidate(dentry, 0)) error = NULL; out: @@ -2003,7 +2003,7 @@ static int dname_to_vma_addr(struct dentry *dentry, return 0; } -static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) +static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; bool exact_vma_exists = false; @@ -2013,7 +2013,7 @@ static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) struct inode *inode; int status = 0; - if (nd && nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; if (!capable(CAP_SYS_ADMIN)) { @@ -2371,7 +2371,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir, d_set_d_op(dentry, &tid_fd_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (tid_fd_revalidate(dentry, NULL)) + if (tid_fd_revalidate(dentry, 0)) error = NULL; out: @@ -2430,7 +2430,7 @@ static struct dentry *proc_pident_instantiate(struct inode *dir, d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) + if (pid_revalidate(dentry, 0)) error = NULL; out: return error; @@ -3237,7 +3237,7 @@ static struct dentry *proc_pid_instantiate(struct inode *dir, d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) + if (pid_revalidate(dentry, 0)) error = NULL; out: return error; @@ -3508,7 +3508,7 @@ static struct dentry *proc_task_instantiate(struct inode *dir, d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) + if (pid_revalidate(dentry, 0)) error = NULL; out: return error; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index eca4aca..e0c2a48 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -142,7 +142,7 @@ typedef struct dentry *instantiate_t(struct inode *, struct dentry *, int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, const char *name, int len, instantiate_t instantiate, struct task_struct *task, const void *ptr); -int pid_revalidate(struct dentry *dentry, struct nameidata *nd); +int pid_revalidate(struct dentry *dentry, unsigned int flags); struct inode *proc_pid_make_inode(struct super_block * sb, struct task_struct *task); extern const struct dentry_operations pid_dentry_operations; int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 0d9e23a..40ceb40 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -56,7 +56,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); /* Close the race of the process dying before we return the dentry */ - if (pid_revalidate(dentry, NULL)) + if (pid_revalidate(dentry, 0)) error = NULL; out: return error; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 3476bca..fda69fa 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -794,9 +794,9 @@ static const struct inode_operations proc_sys_dir_operations = { .getattr = proc_sys_getattr, }; -static int proc_sys_revalidate(struct dentry *dentry, struct nameidata *nd) +static int proc_sys_revalidate(struct dentry *dentry, unsigned int flags) { - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; return !PROC_I(dentry->d_inode)->sysctl->unregistering; } diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 46fc1c2..e6ad8d7 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -942,7 +942,7 @@ int reiserfs_permission(struct inode *inode, int mask) return generic_permission(inode, mask); } -static int xattr_hide_revalidate(struct dentry *dentry, struct nameidata *nd) +static int xattr_hide_revalidate(struct dentry *dentry, unsigned int flags) { return -EPERM; } diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index e6bb9b2..038e74b 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -303,12 +303,12 @@ static int sysfs_dentry_delete(const struct dentry *dentry) return !!(sd->s_flags & SYSFS_FLAG_REMOVED); } -static int sysfs_dentry_revalidate(struct dentry *dentry, struct nameidata *nd) +static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags) { struct sysfs_dirent *sd; int is_dir; - if (nd->flags & LOOKUP_RCU) + if (flags & LOOKUP_RCU) return -ECHILD; sd = dentry->d_fsdata; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8ca2555..caa34e5 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -144,7 +144,7 @@ enum dentry_d_lock_class }; struct dentry_operations { - int (*d_revalidate)(struct dentry *, struct nameidata *); + int (*d_revalidate)(struct dentry *, unsigned int); int (*d_hash)(const struct dentry *, const struct inode *, struct qstr *); int (*d_compare)(const struct dentry *, const struct inode *, -- cgit v0.10.2 From 4ce16ef3fed92c627b4b0136c02c85c81ee105e0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 16:10:59 -0400 Subject: fs/namei.c: don't pass nameidata to d_revalidate() since the method wrapped by it doesn't need that anymore... Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 1a5707a..91c637b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -463,9 +463,9 @@ err_root: return -ECHILD; } -static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd) +static inline int d_revalidate(struct dentry *dentry, unsigned int flags) { - return dentry->d_op->d_revalidate(dentry, nd ? nd->flags : 0); + return dentry->d_op->d_revalidate(dentry, flags); } /** @@ -511,7 +511,7 @@ static int complete_walk(struct nameidata *nd) return 0; /* Note: we do not d_invalidate() */ - status = d_revalidate(dentry, nd); + status = d_revalidate(dentry, nd->flags); if (status > 0) return 0; @@ -1050,7 +1050,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, if (d_need_lookup(dentry)) { *need_lookup = true; } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) { - error = d_revalidate(dentry, nd); + error = d_revalidate(dentry, nd ? nd->flags : 0); if (unlikely(error <= 0)) { if (error < 0) { dput(dentry); @@ -1158,7 +1158,7 @@ static int lookup_fast(struct nameidata *nd, struct qstr *name, if (unlikely(d_need_lookup(dentry))) goto unlazy; if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { - status = d_revalidate(dentry, nd); + status = d_revalidate(dentry, nd->flags); if (unlikely(status <= 0)) { if (status != -ECHILD) need_reval = 0; @@ -1188,7 +1188,7 @@ unlazy: } if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval) - status = d_revalidate(dentry, nd); + status = d_revalidate(dentry, nd->flags); if (unlikely(status <= 0)) { if (status < 0) { dput(dentry); -- cgit v0.10.2 From 201f956e43d4542723514e024d948011dd766d43 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Jun 2012 12:42:10 +0400 Subject: fs/namei.c: don't pass namedata to lookup_dcache() just the flags... Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 91c637b..2e943ab 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1039,7 +1039,7 @@ static void follow_dotdot(struct nameidata *nd) * dir->d_inode->i_mutex must be held */ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, - struct nameidata *nd, bool *need_lookup) + unsigned int flags, bool *need_lookup) { struct dentry *dentry; int error; @@ -1050,7 +1050,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, if (d_need_lookup(dentry)) { *need_lookup = true; } else if (dentry->d_flags & DCACHE_OP_REVALIDATE) { - error = d_revalidate(dentry, nd ? nd->flags : 0); + error = d_revalidate(dentry, flags); if (unlikely(error <= 0)) { if (error < 0) { dput(dentry); @@ -1104,7 +1104,7 @@ static struct dentry *__lookup_hash(struct qstr *name, bool need_lookup; struct dentry *dentry; - dentry = lookup_dcache(name, base, nd, &need_lookup); + dentry = lookup_dcache(name, base, nd ? nd->flags : 0, &need_lookup); if (!need_lookup) return dentry; @@ -2356,7 +2356,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, bool need_lookup; *opened &= ~FILE_CREATED; - dentry = lookup_dcache(&nd->last, dir, nd, &need_lookup); + dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup); if (IS_ERR(dentry)) return PTR_ERR(dentry); -- cgit v0.10.2 From 00cd8dd3bf95f2cc8435b4cac01d9995635c6d0b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 17:13:09 -0400 Subject: stop passing nameidata to ->lookup() Just the flags; only NFS cares even about that, but there are legitimate uses for such argument. And getting rid of that completely would require splitting ->lookup() into a couple of methods (at least), so let's leave that alone for now... Signed-off-by: Al Viro diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 52a0573..33f2c8f 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -38,8 +38,7 @@ d_manage: no no yes (ref-walk) maybe --------------------------- inode_operations --------------------------- prototypes: int (*create) (struct inode *,struct dentry *,umode_t, struct nameidata *); - struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameid -ata *); + struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 56750b7..690f573 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -434,5 +434,5 @@ d_make_root() drops the reference to inode if dentry allocation fails. -- [mandatory] - The witch is dead! Well, 1/3 of it, anyway. ->d_revalidate() does *not* -take struct nameidata anymore; just the flags. + The witch is dead! Well, 2/3 of it, anyway. ->d_revalidate() and +->lookup() do *not* take struct nameidata anymore; just the flags. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index b9a406b..ee78635 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -342,7 +342,7 @@ filesystem. As of kernel 2.6.22, the following members are defined: struct inode_operations { int (*create) (struct inode *,struct dentry *, umode_t, struct nameidata *); - struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); + struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index e78956c..34c59f1 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -144,7 +144,7 @@ extern void v9fs_session_close(struct v9fs_session_info *v9ses); extern void v9fs_session_cancel(struct v9fs_session_info *v9ses); extern void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses); extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nameidata); + unsigned int flags); extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d); extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d); extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index eae476f..bb0d762 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -785,7 +785,7 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode */ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nameidata) + unsigned int flags) { struct dentry *res; struct super_block *sb; @@ -795,8 +795,8 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, char *name; int result = 0; - p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p nameidata: %p\n", - dir, dentry->d_name.name, dentry, nameidata); + p9_debug(P9_DEBUG_VFS, "dir: %p dentry: (%s) %p flags: %x\n", + dir, dentry->d_name.name, dentry, flags); if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); @@ -869,7 +869,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry, struct dentry *res = NULL; if (d_unhashed(dentry)) { - res = v9fs_vfs_lookup(dir, dentry, NULL); + res = v9fs_vfs_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 1ee10c8..b97619f 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -259,7 +259,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry, struct dentry *res = NULL; if (d_unhashed(dentry)) { - res = v9fs_vfs_lookup(dir, dentry, NULL); + res = v9fs_vfs_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 3d83075a..b3be2e7 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -266,7 +266,7 @@ const struct dentry_operations adfs_dentry_operations = { }; static struct dentry * -adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +adfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode = NULL; struct object_info obj; diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 3a130e2..49e4e34 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -153,7 +153,7 @@ extern void affs_free_bitmap(struct super_block *sb); /* namei.c */ extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len); -extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *); +extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int); extern int affs_unlink(struct inode *dir, struct dentry *dentry); extern int affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *); extern int affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 4780694..7f9721b 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -211,7 +211,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry) } struct dentry * -affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct super_block *sb = dir->i_sb; struct buffer_head *bh; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 65c54ab..ffb33e3 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -20,7 +20,7 @@ #include "internal.h" static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd); + unsigned int flags); static int afs_dir_open(struct inode *inode, struct file *file); static int afs_readdir(struct file *file, void *dirent, filldir_t filldir); static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); @@ -516,7 +516,7 @@ out: * look up an entry in a directory */ static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct afs_vnode *vnode; struct afs_fid fid; diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 298cf89..9682c33 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -22,7 +22,7 @@ static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd); + unsigned int flags); static int afs_mntpt_open(struct inode *inode, struct file *file); static void afs_mntpt_expiry_timed_out(struct work_struct *work); @@ -104,7 +104,7 @@ out: */ static struct dentry *afs_mntpt_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { _enter("%p,%p{%p{%s},%s}", dir, diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 75e5f1c..e7396cf 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -32,7 +32,7 @@ static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long); static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); #endif static int autofs4_dir_open(struct inode *inode, struct file *file); -static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); +static struct dentry *autofs4_lookup(struct inode *,struct dentry *, unsigned int); static struct vfsmount *autofs4_d_automount(struct path *); static int autofs4_d_manage(struct dentry *, bool); static void autofs4_dentry_release(struct dentry *); @@ -458,7 +458,7 @@ int autofs4_d_manage(struct dentry *dentry, bool rcu_walk) } /* Lookups in the root directory */ -static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct autofs_sb_info *sbi; struct autofs_info *ino; diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 1b35d6b..d27e73c 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -179,7 +179,7 @@ static int bad_inode_create (struct inode *dir, struct dentry *dentry, } static struct dentry *bad_inode_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { return ERR_PTR(-EIO); } diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index e18da23..cf7f3c6 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -34,7 +34,7 @@ static int befs_readdir(struct file *, void *, filldir_t); static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); static int befs_readpage(struct file *file, struct page *page); static sector_t befs_bmap(struct address_space *mapping, sector_t block); -static struct dentry *befs_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry *befs_lookup(struct inode *, struct dentry *, unsigned int); static struct inode *befs_iget(struct super_block *, unsigned long); static struct inode *befs_alloc_inode(struct super_block *sb); static void befs_destroy_inode(struct inode *inode); @@ -159,7 +159,7 @@ befs_get_block(struct inode *inode, sector_t block, } static struct dentry * -befs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode = NULL; struct super_block *sb = dir->i_sb; diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index d12c796..3f1cd3b 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -133,7 +133,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, } static struct dentry *bfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode = NULL; struct buffer_head *bh; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a101572..e5f1f81 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4247,7 +4247,7 @@ static void btrfs_dentry_release(struct dentry *dentry) } static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct dentry *ret; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 8898eef..74b2f3c 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -576,7 +576,7 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) * the MDS so that it gets our 'caps wanted' value in a single op. */ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; @@ -653,7 +653,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, } if (d_unhashed(dentry)) { - res = ceph_lookup(dir, dentry, NULL); + res = ceph_lookup(dir, dentry, 0); if (IS_ERR(res)) return PTR_ERR(res); @@ -678,7 +678,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, */ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) { - struct dentry *result = ceph_lookup(dir, dentry, NULL); + struct dentry *result = ceph_lookup(dir, dentry, 0); if (result && !IS_ERR(result)) { /* diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 48bb474..1abd31f 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -50,7 +50,7 @@ extern int cifs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t, int *); extern struct dentry *cifs_lookup(struct inode *, struct dentry *, - struct nameidata *); + unsigned int); extern int cifs_unlink(struct inode *dir, struct dentry *dentry); extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index b97ff48..2d732b9 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -401,7 +401,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, * in network traffic in the other paths. */ if (!(oflags & O_CREAT)) { - struct dentry *res = cifs_lookup(inode, direntry, NULL); + struct dentry *res = cifs_lookup(inode, direntry, 0); if (IS_ERR(res)) return PTR_ERR(res); @@ -621,7 +621,7 @@ mknod_out: struct dentry * cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, - struct nameidata *nd) + unsigned int flags) { int xid; int rc = 0; /* to get around spurious gcc warning, set to zero here */ diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 7f8f1a7..da35e96 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -31,7 +31,7 @@ /* dir inode-ops */ static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, struct nameidata *nd); -static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, struct nameidata *nd); +static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, unsigned int flags); static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, struct dentry *entry); static int coda_unlink(struct inode *dir_inode, struct dentry *entry); @@ -94,7 +94,7 @@ const struct file_operations coda_dir_operations = { /* inode operations for directories */ /* access routines: lookup, readlink, permission */ -static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struct nameidata *nd) +static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsigned int flags) { struct super_block *sb = dir->i_sb; const char *name = entry->d_name.name; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 7e6c52d..7414ae2 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -442,7 +442,7 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den static struct dentry * configfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata; struct configfs_dirent * sd; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index d013c46..28cca01 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -417,7 +417,7 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * Lookup and fill in the inode data.. */ -static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry * cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { unsigned int offset = 0; struct inode *inode = NULL; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index a07441a..4ab50c3 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -374,7 +374,7 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry, */ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode, struct dentry *ecryptfs_dentry, - struct nameidata *ecryptfs_nd) + unsigned int flags) { char *encrypted_and_encoded_name = NULL; size_t encrypted_and_encoded_name_size; diff --git a/fs/efs/efs.h b/fs/efs/efs.h index d8305b5..5528926 100644 --- a/fs/efs/efs.h +++ b/fs/efs/efs.h @@ -129,7 +129,7 @@ extern struct inode *efs_iget(struct super_block *, unsigned long); extern efs_block_t efs_map_block(struct inode *, efs_block_t); extern int efs_get_block(struct inode *, sector_t, struct buffer_head *, int); -extern struct dentry *efs_lookup(struct inode *, struct dentry *, struct nameidata *); +extern struct dentry *efs_lookup(struct inode *, struct dentry *, unsigned int); extern struct dentry *efs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); extern struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid, diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 832b10d..96f66d2 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -58,7 +58,8 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len) return(0); } -struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { +struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ efs_ino_t inodenum; struct inode *inode = NULL; diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index fc7161d..909ed6e 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -46,7 +46,7 @@ static inline int exofs_add_nondir(struct dentry *dentry, struct inode *inode) } static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode; ino_t ino; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index f663a67..b3e6778 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -55,7 +55,7 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) * Methods themselves. */ -static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) { struct inode * inode; ino_t ino; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index eeb63df..86d25f3 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1011,7 +1011,7 @@ errout: return NULL; } -static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) { struct inode * inode; struct ext3_dir_entry_2 * de; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5845cd9..4fba3cd 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1312,7 +1312,7 @@ errout: return NULL; } -static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; struct ext4_dir_entry_2 *de; diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index c5938c9..47c608b 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -201,7 +201,7 @@ static const struct dentry_operations msdos_dentry_operations = { /***** Get inode using directory and name */ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 0bbdf39..4415257 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -714,7 +714,7 @@ static int vfat_d_anon_disconn(struct dentry *dentry) } static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 3360f1e..bd447e8 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -48,7 +48,7 @@ #define VXFS_BLOCK_PER_PAGE(sbp) ((PAGE_CACHE_SIZE / (sbp)->s_blocksize)) -static struct dentry * vxfs_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry * vxfs_lookup(struct inode *, struct dentry *, unsigned int); static int vxfs_readdir(struct file *, void *, filldir_t); const struct inode_operations vxfs_dir_inode_ops = { @@ -203,7 +203,7 @@ vxfs_inode_by_name(struct inode *dip, struct dentry *dp) * in the return pointer. */ static struct dentry * -vxfs_lookup(struct inode *dip, struct dentry *dp, struct nameidata *nd) +vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags) { struct inode *ip = NULL; ino_t ino; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index eba30bd..385235a 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -316,7 +316,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name, } static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry, - struct nameidata *nd) + unsigned int flags) { int err; struct fuse_entry_out outarg; @@ -478,7 +478,7 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry, struct dentry *res = NULL; if (d_unhashed(entry)) { - res = fuse_lookup(dir, entry, NULL); + res = fuse_lookup(dir, entry, 0); if (IS_ERR(res)) return PTR_ERR(res); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index a9ba244..19e443b 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -775,7 +775,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, */ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0); if (inode && !IS_ERR(inode)) { diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 62fc14e..617b1ed 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -18,7 +18,7 @@ * hfs_lookup() */ static struct dentry *hfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { hfs_cat_rec rec; struct hfs_find_data fd; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 761ec06..451c972 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -489,7 +489,7 @@ out: } static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode = NULL; hfs_cat_rec rec; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 26b53fb..90c2f78 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -25,7 +25,7 @@ static inline void hfsplus_instantiate(struct dentry *dentry, /* Find the entry inside dir named dentry->d_name */ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode = NULL; struct hfs_find_data fd; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 82b69ee..7009265 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -168,7 +168,7 @@ const struct dentry_operations hfsplus_dentry_operations = { }; static struct dentry *hfsplus_file_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { struct hfs_find_data fd; struct super_block *sb = dir->i_sb; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 2afa5bb..0ea0052 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -595,7 +595,7 @@ int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, } struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode; char *name; diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index b8472f8..78e12b2 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -189,7 +189,7 @@ out: * to tell read_inode to read fnode or not. */ -struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { const unsigned char *name = dentry->d_name.name; unsigned len = dentry->d_name.len; diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index c07ef1f..ac1ead1 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -220,7 +220,7 @@ extern const struct dentry_operations hpfs_dentry_operations; /* dir.c */ -struct dentry *hpfs_lookup(struct inode *, struct dentry *, struct nameidata *); +struct dentry *hpfs_lookup(struct inode *, struct dentry *, unsigned int); extern const struct file_operations hpfs_dir_ops; /* dnode.c */ diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index d4f93b5..e5c0653 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -138,7 +138,7 @@ static int file_removed(struct dentry *dentry, const char *file) } static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct dentry *proc_dentry, *parent; struct qstr *name = &dentry->d_name; diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 0e73f63..3620ad1 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -114,7 +114,7 @@ extern int isofs_name_translate(struct iso_directory_record *, char *, struct in int get_joliet_filename(struct iso_directory_record *, unsigned char *, struct inode *); int get_acorn_filename(struct iso_directory_record *, char *, struct inode *); -extern struct dentry *isofs_lookup(struct inode *, struct dentry *, struct nameidata *); +extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int flags); extern struct buffer_head *isofs_bread(struct inode *, sector_t); extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long); diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 1e2946f..c167028 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -163,7 +163,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, return 0; } -struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { int found; unsigned long uninitialized_var(block); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index b560188..6a60167 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -27,7 +27,7 @@ static int jffs2_readdir (struct file *, void *, filldir_t); static int jffs2_create (struct inode *,struct dentry *,umode_t, struct nameidata *); static struct dentry *jffs2_lookup (struct inode *,struct dentry *, - struct nameidata *); + unsigned int); static int jffs2_link (struct dentry *,struct inode *,struct dentry *); static int jffs2_unlink (struct inode *,struct dentry *); static int jffs2_symlink (struct inode *,struct dentry *,const char *); @@ -74,7 +74,7 @@ const struct inode_operations jffs2_dir_inode_operations = nice and simple */ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, - struct nameidata *nd) + unsigned int flags) { struct jffs2_inode_info *dir_f; struct jffs2_full_dirent *fd = NULL, *fd_list; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index f37977f..34fe855 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1436,7 +1436,7 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, return rc; } -static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, struct nameidata *nd) +static struct dentry *jfs_lookup(struct inode *dip, struct dentry *dentry, unsigned int flags) { struct btstack btstack; ino_t inum; diff --git a/fs/libfs.c b/fs/libfs.c index f86ec27..ebd03f6 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -53,7 +53,7 @@ static int simple_delete_dentry(const struct dentry *dentry) * Lookup the data. This is trivial - if the dentry didn't already * exist, we know it is negative. Set d_op to delete negative dentries. */ -struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { static const struct dentry_operations simple_dentry_operations = { .d_delete = simple_delete_dentry, diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index bea5d1b..8a3dcc6 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -349,7 +349,7 @@ static void logfs_set_name(struct logfs_disk_dentry *dd, struct qstr *name) } static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct page *page; struct logfs_disk_dentry *dd; diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 2d0ee17..1f24524 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -18,7 +18,7 @@ static int add_nondir(struct dentry *dentry, struct inode *inode) return err; } -static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) { struct inode * inode = NULL; ino_t ino; diff --git a/fs/namei.c b/fs/namei.c index 2e943ab..175e81b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1090,7 +1090,7 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENOENT); } - old = dir->i_op->lookup(dir, dentry, nd); + old = dir->i_op->lookup(dir, dentry, nd ? nd->flags : 0); if (unlikely(old)) { dput(dentry); dentry = old; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 32607f7..a0cff22 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -31,7 +31,7 @@ static void ncp_do_readdir(struct file *, void *, filldir_t, static int ncp_readdir(struct file *, void *, filldir_t); static int ncp_create(struct inode *, struct dentry *, umode_t, struct nameidata *); -static struct dentry *ncp_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int); static int ncp_unlink(struct inode *, struct dentry *); static int ncp_mkdir(struct inode *, struct dentry *, umode_t); static int ncp_rmdir(struct inode *, struct dentry *); @@ -836,7 +836,7 @@ out: return result; } -static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct ncp_server *server = NCP_SERVER(dir); struct inode *inode = NULL; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 656f52e..8f21205 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -46,7 +46,7 @@ static int nfs_opendir(struct inode *, struct file *); static int nfs_closedir(struct inode *, struct file *); static int nfs_readdir(struct file *, void *, filldir_t); -static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); static int nfs_mkdir(struct inode *, struct dentry *, umode_t); static int nfs_rmdir(struct inode *, struct dentry *); @@ -1270,7 +1270,7 @@ const struct dentry_operations nfs_dentry_operations = { .d_release = nfs_d_release, }; -static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *res; struct dentry *parent; @@ -1291,7 +1291,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru * If we're doing an exclusive create, optimize away the lookup * but don't hash the dentry. */ - if (nd && nfs_is_exclusive_create(dir, nd->flags)) { + if (nfs_is_exclusive_create(dir, flags)) { d_instantiate(dentry, NULL); res = NULL; goto out; @@ -1482,7 +1482,7 @@ out: return err; no_open: - res = nfs_lookup(dir, dentry, NULL); + res = nfs_lookup(dir, dentry, 0); err = PTR_ERR(res); if (IS_ERR(res)) goto out; diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index b728479..5e5f779 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -63,7 +63,7 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) */ static struct dentry * -nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode; ino_t ino; diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 358273e..436f360 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -101,7 +101,7 @@ * Locking: Caller must hold i_mutex on the directory. */ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, - struct nameidata *nd) + unsigned int flags) { ntfs_volume *vol = NTFS_SB(dir_ino->i_sb); struct inode *dent_inode; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 9f39c64..fd71f6e 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -98,7 +98,7 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb, #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { int status; u64 blkno; diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index f00576e..3d25487 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -291,7 +291,7 @@ static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, } static struct dentry *omfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct buffer_head *bh; struct inode *inode = NULL; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index bc49c97..4a34779 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -170,13 +170,13 @@ static const struct file_operations openprom_operations = { .llseek = generic_file_llseek, }; -static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, unsigned int); static const struct inode_operations openprom_inode_operations = { .lookup = openpromfs_lookup, }; -static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *openpromfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct op_inode_info *ent_oi, *oi = OP_I(dir); struct device_node *dp, *child; diff --git a/fs/proc/base.c b/fs/proc/base.c index bf749cc..8eaa5ea 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1956,7 +1956,7 @@ out_no_task: } static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { return proc_lookupfd_common(dir, dentry, proc_fd_instantiate); } @@ -2145,7 +2145,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, } static struct dentry *proc_map_files_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; struct vm_area_struct *vma; @@ -2380,7 +2380,7 @@ static struct dentry *proc_fdinfo_instantiate(struct inode *dir, static struct dentry *proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); } @@ -2630,7 +2630,7 @@ static const struct file_operations proc_attr_dir_operations = { }; static struct dentry *proc_attr_dir_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); @@ -3114,7 +3114,8 @@ static const struct file_operations proc_tgid_base_operations = { .llseek = default_llseek, }; -static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ +static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ return proc_pident_lookup(dir, dentry, tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); } @@ -3243,7 +3244,7 @@ out: return error; } -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *result; struct task_struct *task; @@ -3470,7 +3471,8 @@ static int proc_tid_base_readdir(struct file * filp, tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); } -static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ +static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ return proc_pident_lookup(dir, dentry, tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); } @@ -3514,7 +3516,7 @@ out: return error; } -static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct dentry *result = ERR_PTR(-ENOENT); struct task_struct *task; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 2edf34f..b3647fe 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -446,7 +446,7 @@ out_unlock: } struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { return proc_lookup_de(PDE(dir), dir, dentry); } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index e0c2a48..e1167a1 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -106,7 +106,7 @@ void pde_users_dec(struct proc_dir_entry *pde); extern spinlock_t proc_subdir_lock; -struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *); +struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int); int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir); unsigned long task_vsize(struct mm_struct *); unsigned long task_statm(struct mm_struct *, @@ -132,7 +132,7 @@ int proc_remount(struct super_block *sb, int *flags, char *data); * of the /proc/ subdirectories. */ int proc_readdir(struct file *, void *, filldir_t); -struct dentry *proc_lookup(struct inode *, struct dentry *, struct nameidata *); +struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 40ceb40..b178ed7 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -140,7 +140,7 @@ const struct file_operations proc_ns_dir_operations = { }; static struct dentry *proc_ns_dir_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { struct dentry *error; struct task_struct *task = get_proc_task(dir); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 06e1cc1..fe72cd0 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -119,7 +119,7 @@ static struct net *get_proc_task_net(struct inode *dir) } static struct dentry *proc_tgid_net_lookup(struct inode *dir, - struct dentry *dentry, struct nameidata *nd) + struct dentry *dentry, unsigned int flags) { struct dentry *de; struct net *net; diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index fda69fa..dfafeb2 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -433,7 +433,7 @@ static struct ctl_table_header *grab_header(struct inode *inode) } static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct ctl_table_header *head = grab_header(dir); struct ctl_table_header *h = NULL; diff --git a/fs/proc/root.c b/fs/proc/root.c index 7c30fce..568b202 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -200,13 +200,12 @@ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct return 0; } -static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags) { - if (!proc_lookup(dir, dentry, nd)) { + if (!proc_lookup(dir, dentry, flags)) return NULL; - } - return proc_pid_lookup(dir, dentry, nd); + return proc_pid_lookup(dir, dentry, flags); } static int proc_root_readdir(struct file * filp, diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index a512c0b..d024505 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c @@ -95,7 +95,7 @@ static struct buffer_head *qnx4_find_entry(int len, struct inode *dir, return NULL; } -struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { int ino; struct qnx4_inode_entry *de; diff --git a/fs/qnx4/qnx4.h b/fs/qnx4/qnx4.h index 244d462..34e2d32 100644 --- a/fs/qnx4/qnx4.h +++ b/fs/qnx4/qnx4.h @@ -23,7 +23,7 @@ struct qnx4_inode_info { }; extern struct inode *qnx4_iget(struct super_block *, unsigned long); -extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd); +extern struct dentry *qnx4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags); extern unsigned long qnx4_count_free_blocks(struct super_block *sb); extern unsigned long qnx4_block_map(struct inode *inode, long iblock); diff --git a/fs/qnx6/namei.c b/fs/qnx6/namei.c index 8a97289..0561326 100644 --- a/fs/qnx6/namei.c +++ b/fs/qnx6/namei.c @@ -13,7 +13,7 @@ #include "qnx6.h" struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { unsigned ino; struct page *page; diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h index 6c5e02a..b00fcc9 100644 --- a/fs/qnx6/qnx6.h +++ b/fs/qnx6/qnx6.h @@ -45,7 +45,7 @@ struct qnx6_inode_info { extern struct inode *qnx6_iget(struct super_block *sb, unsigned ino); extern struct dentry *qnx6_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd); + unsigned int flags); #ifdef CONFIG_QNX6FS_DEBUG extern void qnx6_superblock_debug(struct qnx6_super_block *, diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 84e8a69..1d9cf24 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -322,7 +322,7 @@ static int reiserfs_find_entry(struct inode *dir, const char *name, int namelen, } static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { int retval; int lock_depth; diff --git a/fs/romfs/super.c b/fs/romfs/super.c index e64f6b5..77c5f21 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -210,7 +210,7 @@ out: * look up an entry in a directory */ static struct dentry *romfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { unsigned long offset, maxoff; struct inode *inode; diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c index abcc58f..7834a51 100644 --- a/fs/squashfs/namei.c +++ b/fs/squashfs/namei.c @@ -134,7 +134,7 @@ out: static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { const unsigned char *name = dentry->d_name.name; int len = dentry->d_name.len; diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 038e74b..efd373e 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -764,7 +764,7 @@ int sysfs_create_dir(struct kobject * kobj) } static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct dentry *ret = NULL; struct dentry *parent = dentry->d_parent; diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index d7466e2..a8c4359 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -43,7 +43,7 @@ const struct dentry_operations sysv_dentry_operations = { .d_hash = sysv_hash, }; -static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, struct nameidata *nd) +static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags) { struct inode * inode = NULL; ino_t ino; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index a6d42ef..845b2df 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -184,7 +184,7 @@ static int dbg_check_name(const struct ubifs_info *c, } static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { int err; union ubifs_key key; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 1802417..929cc20 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -251,7 +251,7 @@ out_ok: } static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct inode *inode = NULL; struct fileIdentDesc cfi; diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index a2281ca..bc77fa1 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -46,7 +46,7 @@ static inline int ufs_add_nondir(struct dentry *dentry, struct inode *inode) return err; } -static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsigned int flags) { struct inode * inode = NULL; ino_t ino; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 1a25fd8..b41cfba 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -197,7 +197,7 @@ STATIC struct dentry * xfs_vn_lookup( struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct xfs_inode *cip; struct xfs_name name; @@ -222,7 +222,7 @@ STATIC struct dentry * xfs_vn_ci_lookup( struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct xfs_inode *ip; struct xfs_name xname; diff --git a/include/linux/fs.h b/include/linux/fs.h index 17ee20d..7a71709 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1666,7 +1666,7 @@ struct file_operations { }; struct inode_operations { - struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); + struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); void * (*follow_link) (struct dentry *, struct nameidata *); int (*permission) (struct inode *, int); struct posix_acl * (*get_acl)(struct inode *, int); @@ -2571,7 +2571,7 @@ extern int simple_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); -extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); +extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index b303dfc..0cd1314 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -822,7 +822,7 @@ EXPORT_SYMBOL_GPL(cgroup_unlock); */ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); -static struct dentry *cgroup_lookup(struct inode *, struct dentry *, struct nameidata *); +static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); static int cgroup_populate_dir(struct cgroup *cgrp); static const struct inode_operations cgroup_dir_inode_operations; @@ -2570,7 +2570,7 @@ static const struct inode_operations cgroup_dir_inode_operations = { .rename = cgroup_rename, }; -static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +static struct dentry *cgroup_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { if (dentry->d_name.len > NAME_MAX) return ERR_PTR(-ENAMETOOLONG); -- cgit v0.10.2 From 72bd866a01fc62ccbc466f3eb7599b14c937e96b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 17:17:17 -0400 Subject: fs/namei.c: don't pass nameidata to __lookup_hash() and lookup_real() Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 175e81b..fc01090 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1080,7 +1080,7 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir, * dir->d_inode->i_mutex must be held */ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, - struct nameidata *nd) + unsigned int flags) { struct dentry *old; @@ -1090,7 +1090,7 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENOENT); } - old = dir->i_op->lookup(dir, dentry, nd ? nd->flags : 0); + old = dir->i_op->lookup(dir, dentry, flags); if (unlikely(old)) { dput(dentry); dentry = old; @@ -1099,16 +1099,16 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry, } static struct dentry *__lookup_hash(struct qstr *name, - struct dentry *base, struct nameidata *nd) + struct dentry *base, unsigned int flags) { bool need_lookup; struct dentry *dentry; - dentry = lookup_dcache(name, base, nd ? nd->flags : 0, &need_lookup); + dentry = lookup_dcache(name, base, flags, &need_lookup); if (!need_lookup) return dentry; - return lookup_real(base->d_inode, dentry, nd); + return lookup_real(base->d_inode, dentry, flags); } /* @@ -1227,7 +1227,7 @@ static int lookup_slow(struct nameidata *nd, struct qstr *name, BUG_ON(nd->inode != parent->d_inode); mutex_lock(&parent->d_inode->i_mutex); - dentry = __lookup_hash(name, parent, nd); + dentry = __lookup_hash(name, parent, nd->flags); mutex_unlock(&parent->d_inode->i_mutex); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -1859,7 +1859,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, */ static struct dentry *lookup_hash(struct nameidata *nd) { - return __lookup_hash(&nd->last, nd->path.dentry, nd); + return __lookup_hash(&nd->last, nd->path.dentry, nd->flags); } /** @@ -1906,7 +1906,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) if (err) return ERR_PTR(err); - return __lookup_hash(&this, base, NULL); + return __lookup_hash(&this, base, 0); } int user_path_at_empty(int dfd, const char __user *name, unsigned flags, @@ -2310,7 +2310,7 @@ out: no_open: if (need_lookup) { - dentry = lookup_real(dir, dentry, nd); + dentry = lookup_real(dir, dentry, nd->flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); @@ -2372,7 +2372,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, if (need_lookup) { BUG_ON(dentry->d_inode); - dentry = lookup_real(dir_inode, dentry, nd); + dentry = lookup_real(dir_inode, dentry, nd->flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); } -- cgit v0.10.2 From ebfc3b49a7ac25920cb5be5445f602e51d2ea559 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 18:05:36 -0400 Subject: don't pass nameidata to ->create() boolean "does it have to be exclusive?" flag is passed instead; Local filesystem should just ignore it - the object is guaranteed not to be there yet. Signed-off-by: Al Viro diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 33f2c8f..e0cce2a 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -37,7 +37,7 @@ d_manage: no no yes (ref-walk) maybe --------------------------- inode_operations --------------------------- prototypes: - int (*create) (struct inode *,struct dentry *,umode_t, struct nameidata *); + int (*create) (struct inode *,struct dentry *,umode_t, bool); struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 690f573..2bef2b3 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -436,3 +436,9 @@ d_make_root() drops the reference to inode if dentry allocation fails. [mandatory] The witch is dead! Well, 2/3 of it, anyway. ->d_revalidate() and ->lookup() do *not* take struct nameidata anymore; just the flags. +-- +[mandatory] + ->create() doesn't take struct nameidata *; unlike the previous +two, it gets "is it an O_EXCL or equivalent?" boolean argument. Note that +local filesystems can ignore tha argument - they are guaranteed that the +object doesn't exist. It's remote/distributed ones that might care... diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index ee78635..aa754e0 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -341,7 +341,7 @@ This describes how the VFS can manipulate an inode in your filesystem. As of kernel 2.6.22, the following members are defined: struct inode_operations { - int (*create) (struct inode *,struct dentry *, umode_t, struct nameidata *); + int (*create) (struct inode *,struct dentry *, umode_t, bool); struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index bb0d762..cbf9dbb 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -725,7 +725,7 @@ error: static int v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); u32 perm = unixmode2p9mode(v9ses, mode); diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index b97619f..4089554 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -235,7 +235,7 @@ int v9fs_open_to_dotl_flags(int flags) static int v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, - struct nameidata *nd) + bool excl) { return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0); } diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 49e4e34..6e21641 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -155,7 +155,7 @@ extern void affs_free_bitmap(struct super_block *sb); extern int affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len); extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int); extern int affs_unlink(struct inode *dir, struct dentry *dentry); -extern int affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *); +extern int affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool); extern int affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); extern int affs_rmdir(struct inode *dir, struct dentry *dentry); extern int affs_link(struct dentry *olddentry, struct inode *dir, diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 7f9721b..ff65884 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -255,7 +255,7 @@ affs_unlink(struct inode *dir, struct dentry *dentry) } int -affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) +affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct super_block *sb = dir->i_sb; struct inode *inode; diff --git a/fs/afs/dir.c b/fs/afs/dir.c index ffb33e3..db47790 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -29,7 +29,7 @@ static void afs_d_release(struct dentry *dentry); static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, loff_t fpos, u64 ino, unsigned dtype); static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd); + bool excl); static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); static int afs_rmdir(struct inode *dir, struct dentry *dentry); static int afs_unlink(struct inode *dir, struct dentry *dentry); @@ -949,7 +949,7 @@ error: * create a regular file on an AFS filesystem */ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct afs_file_status status; struct afs_callback cb; diff --git a/fs/bad_inode.c b/fs/bad_inode.c index d27e73c..b1342ff 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -173,7 +173,7 @@ static const struct file_operations bad_file_ops = }; static int bad_inode_create (struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { return -EIO; } diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 3f1cd3b..2785ef9 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -85,7 +85,7 @@ const struct file_operations bfs_dir_operations = { extern void dump_imap(const char *, struct super_block *); static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { int err; struct inode *inode; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e5f1f81..fb8d671 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4893,7 +4893,7 @@ out_unlock: } static int btrfs_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 74b2f3c..00894ff 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -730,7 +730,7 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry, } static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { return ceph_mknod(dir, dentry, mode, 0); } diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 1abd31f..1c49c5a 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -45,7 +45,7 @@ extern const struct address_space_operations cifs_addr_ops_smallbuf; extern const struct inode_operations cifs_dir_inode_ops; extern struct inode *cifs_root_iget(struct super_block *); extern int cifs_create(struct inode *, struct dentry *, umode_t, - struct nameidata *); + bool excl); extern int cifs_atomic_open(struct inode *, struct dentry *, struct file *, unsigned, umode_t, int *); diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 2d732b9..a180265 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -451,7 +451,7 @@ free_xid: } int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode, - struct nameidata *nd) + bool excl) { int rc; int xid = GetXid(); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index da35e96..49fe52d 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -30,7 +30,7 @@ #include "coda_int.h" /* dir inode-ops */ -static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, struct nameidata *nd); +static int coda_create(struct inode *dir, struct dentry *new, umode_t mode, bool excl); static struct dentry *coda_lookup(struct inode *dir, struct dentry *target, unsigned int flags); static int coda_link(struct dentry *old_dentry, struct inode *dir_inode, struct dentry *entry); @@ -188,7 +188,7 @@ static inline void coda_dir_drop_nlink(struct inode *dir) } /* creation routines: create, mknod, mkdir, link, symlink */ -static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, struct nameidata *nd) +static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, bool excl) { int error; const char *name=de->d_name.name; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 4ab50c3..f079daf 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -240,7 +240,6 @@ out: * @dir: The inode of the directory in which to create the file. * @dentry: The eCryptfs dentry * @mode: The mode of the new file. - * @nd: nameidata * * Creates a new file. * @@ -248,7 +247,7 @@ out: */ static int ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { struct inode *ecryptfs_inode; int rc; diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 909ed6e..4731fd9 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -60,7 +60,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry, } static int exofs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode = exofs_new_inode(dir, mode); int err = PTR_ERR(inode); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index b3e6778..9ba7de0 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -94,7 +94,7 @@ struct dentry *ext2_get_parent(struct dentry *child) * If the create succeeds, we fill in the inode information * with d_instantiate(). */ -static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd) +static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, bool excl) { struct inode *inode; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 86d25f3..85286db 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1690,7 +1690,7 @@ static int ext3_add_nondir(handle_t *handle, * with d_instantiate(). */ static int ext3_create (struct inode * dir, struct dentry * dentry, umode_t mode, - struct nameidata *nd) + bool excl) { handle_t *handle; struct inode * inode; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4fba3cd..eca3e48 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2091,7 +2091,7 @@ static int ext4_add_nondir(handle_t *handle, * with d_instantiate(). */ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { handle_t *handle; struct inode *inode; diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 47c608b..70d993a 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -265,7 +265,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, /***** Create a file */ static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct super_block *sb = dir->i_sb; struct inode *inode = NULL; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 4415257..6cc4806 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -772,7 +772,7 @@ error: } static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct super_block *sb = dir->i_sb; struct inode *inode; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 385235a..8964cf3 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -611,7 +611,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode, } static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode, - struct nameidata *nd) + bool excl) { return fuse_mknod(dir, entry, mode, 0); } diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 19e443b..8676747 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -755,11 +755,8 @@ fail: */ static int gfs2_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { - int excl = 0; - if (nd && (nd->flags & LOOKUP_EXCL)) - excl = 1; return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl); } diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index 617b1ed..422dde2 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -187,7 +187,7 @@ static int hfs_dir_release(struct inode *inode, struct file *file) * the directory and the name (and its length) of the new file. */ static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode; int res; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 90c2f78..378ea0c 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -465,7 +465,7 @@ out: } static int hfsplus_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { return hfsplus_mknod(dir, dentry, mode, 0); } diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 0ea0052..1241465 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -553,7 +553,7 @@ static int read_name(struct inode *ino, char *name) } int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode; char *name; diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 9083ef8..bc90824 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -115,7 +115,7 @@ bail: return err; } -static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) +static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { const unsigned char *name = dentry->d_name.name; unsigned len = dentry->d_name.len; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index cc9281b..e13e9bd 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -565,7 +565,7 @@ static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mod return retval; } -static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) +static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0); } diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 6a60167..2324519 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -25,7 +25,7 @@ static int jffs2_readdir (struct file *, void *, filldir_t); static int jffs2_create (struct inode *,struct dentry *,umode_t, - struct nameidata *); + bool); static struct dentry *jffs2_lookup (struct inode *,struct dentry *, unsigned int); static int jffs2_link (struct dentry *,struct inode *,struct dentry *); @@ -175,7 +175,7 @@ static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir) static int jffs2_create(struct inode *dir_i, struct dentry *dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { struct jffs2_raw_inode *ri; struct jffs2_inode_info *f, *dir_f; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 34fe855..c426293 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -73,7 +73,7 @@ static inline void free_ea_wmap(struct inode *inode) * */ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { int rc = 0; tid_t tid; /* transaction id */ diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 8a3dcc6..26e4a94 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -502,7 +502,7 @@ static int logfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } static int logfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode; diff --git a/fs/minix/namei.c b/fs/minix/namei.c index 1f24524..0db73d9 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -55,7 +55,7 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, } static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { return minix_mknod(dir, dentry, mode, 0); } diff --git a/fs/namei.c b/fs/namei.c index fc01090..fd71156 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2082,7 +2082,6 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) { int error = may_create(dir, dentry); - if (error) return error; @@ -2093,7 +2092,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, error = security_inode_create(dir, dentry, mode); if (error) return error; - error = dir->i_op->create(dir, dentry, mode, nd); + error = dir->i_op->create(dir, dentry, mode, !nd || (nd->flags & LOOKUP_EXCL)); if (!error) fsnotify_create(dir, dentry); return error; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index a0cff22..4117e7b 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -30,7 +30,7 @@ static void ncp_do_readdir(struct file *, void *, filldir_t, static int ncp_readdir(struct file *, void *, filldir_t); -static int ncp_create(struct inode *, struct dentry *, umode_t, struct nameidata *); +static int ncp_create(struct inode *, struct dentry *, umode_t, bool); static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int); static int ncp_unlink(struct inode *, struct dentry *); static int ncp_mkdir(struct inode *, struct dentry *, umode_t); @@ -980,7 +980,7 @@ out: } static int ncp_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { return ncp_create_new(dir, dentry, mode, 0, 0); } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8f21205..a6b1c7f 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -47,7 +47,7 @@ static int nfs_opendir(struct inode *, struct file *); static int nfs_closedir(struct inode *, struct file *); static int nfs_readdir(struct file *, void *, filldir_t); static struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int); -static int nfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); +static int nfs_create(struct inode *, struct dentry *, umode_t, bool); static int nfs_mkdir(struct inode *, struct dentry *, umode_t); static int nfs_rmdir(struct inode *, struct dentry *); static int nfs_unlink(struct inode *, struct dentry *); @@ -1589,11 +1589,11 @@ out_error: * reply path made it appear to have failed. */ static int nfs_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { struct iattr attr; + int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT; int error; - int open_flags = O_CREAT|O_EXCL; dfprintk(VFS, "NFS: create(%s/%ld), %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -1601,9 +1601,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, attr.ia_mode = mode; attr.ia_valid = ATTR_MODE; - if (nd && !(nd->flags & LOOKUP_EXCL)) - open_flags = O_CREAT; - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); if (error != 0) goto out_err; diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 5e5f779..1d0c0b8 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -85,7 +85,7 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) * with d_instantiate(). */ static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode; struct nilfs_transaction_info ti; diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index e31d6ae..83b6f98 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -526,7 +526,7 @@ bail: static int dlmfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { int status = 0; struct inode *inode; diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index fd71f6e..f1fd074 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -618,7 +618,7 @@ static int ocfs2_mkdir(struct inode *dir, static int ocfs2_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { int ret; diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index 3d25487..fb5b3ff 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -285,7 +285,7 @@ static int omfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { return omfs_add_node(dir, dentry, mode | S_IFREG); } diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index a1fdabe..eab8c09 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -114,7 +114,7 @@ static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) return retval; } -static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, struct nameidata *nd) +static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { return ramfs_mknod(dir, dentry, mode | S_IFREG, 0); } diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 1d9cf24..3916be1 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -573,7 +573,7 @@ static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode) } static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { int retval; struct inode *inode; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index e6ad8d7..d319963 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -62,7 +62,7 @@ static int xattr_create(struct inode *dir, struct dentry *dentry, int mode) { BUG_ON(!mutex_is_locked(&dir->i_mutex)); - return dir->i_op->create(dir, dentry, mode, NULL); + return dir->i_op->create(dir, dentry, mode, true); } #endif diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index a8c4359..1c0d5f2 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -80,7 +80,7 @@ static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode, return err; } -static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, struct nameidata *nd) +static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, bool excl) { return sysv_mknod(dir, dentry, mode, 0); } diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 845b2df..b1cca89 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -246,7 +246,7 @@ out: } static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode; struct ubifs_info *c = dir->i_sb->s_fs_info; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index 929cc20..544b279 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -551,7 +551,7 @@ static int udf_delete_entry(struct inode *inode, struct fileIdentDesc *fi, } static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct udf_fileident_bh fibh; struct inode *inode; diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index bc77fa1..90d74b8 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -71,7 +71,7 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsi * with d_instantiate(). */ static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode, - struct nameidata *nd) + bool excl) { struct inode *inode; int err; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index b41cfba..9c4340f 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -179,7 +179,7 @@ xfs_vn_create( struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool flags) { return xfs_vn_mknod(dir, dentry, mode, 0); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 7a71709..df869d2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1674,7 +1674,7 @@ struct inode_operations { int (*readlink) (struct dentry *, char __user *,int); void (*put_link) (struct dentry *, struct nameidata *, void *); - int (*create) (struct inode *,struct dentry *,umode_t,struct nameidata *); + int (*create) (struct inode *,struct dentry *, umode_t, bool); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 8ce5769..da2c188 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -413,7 +413,7 @@ static void mqueue_evict_inode(struct inode *inode) } static int mqueue_create(struct inode *dir, struct dentry *dentry, - umode_t mode, struct nameidata *nd) + umode_t mode, bool excl) { struct inode *inode; struct mq_attr *attr = dentry->d_fsdata; diff --git a/mm/shmem.c b/mm/shmem.c index bd10636..c15b998 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1877,7 +1877,7 @@ static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) } static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool excl) { return shmem_mknod(dir, dentry, mode | S_IFREG, 0); } -- cgit v0.10.2 From 312b63fba9e88a0dcf800834b8ede8716bcc1e17 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Jun 2012 18:09:36 -0400 Subject: don't pass nameidata * to vfs_create() all we want is a boolean flag, same as the method gets now Signed-off-by: Al Viro diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 7f0771d..b0b5f7cd 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c @@ -567,7 +567,7 @@ lookup_again: if (ret < 0) goto create_error; start = jiffies; - ret = vfs_create(dir->d_inode, next, S_IFREG, NULL); + ret = vfs_create(dir->d_inode, next, S_IFREG, true); cachefiles_hist(cachefiles_create_histogram, start); if (ret < 0) goto create_error; diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index f079daf..da52cdb 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -173,7 +173,7 @@ ecryptfs_do_create(struct inode *directory_inode, inode = ERR_CAST(lower_dir_dentry); goto out; } - rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, NULL); + rc = vfs_create(lower_dir_dentry->d_inode, lower_dentry, mode, true); if (rc) { printk(KERN_ERR "%s: Failure to create dentry in lower fs; " "rc = [%d]\n", __func__, rc); diff --git a/fs/namei.c b/fs/namei.c index fd71156..ffcd4e1 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2079,7 +2079,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) } int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, - struct nameidata *nd) + bool want_excl) { int error = may_create(dir, dentry); if (error) @@ -2092,7 +2092,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, error = security_inode_create(dir, dentry, mode); if (error) return error; - error = dir->i_op->create(dir, dentry, mode, !nd || (nd->flags & LOOKUP_EXCL)); + error = dir->i_op->create(dir, dentry, mode, want_excl); if (!error) fsnotify_create(dir, dentry); return error; @@ -2396,7 +2396,8 @@ static int lookup_open(struct nameidata *nd, struct path *path, error = security_path_mknod(&nd->path, dentry, mode, 0); if (error) goto out_dput; - error = vfs_create(dir->d_inode, dentry, mode, nd); + error = vfs_create(dir->d_inode, dentry, mode, + nd->flags & LOOKUP_EXCL); if (error) goto out_dput; } @@ -2883,7 +2884,7 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode, goto out_drop_write; switch (mode & S_IFMT) { case 0: case S_IFREG: - error = vfs_create(path.dentry->d_inode,dentry,mode,NULL); + error = vfs_create(path.dentry->d_inode,dentry,mode,true); break; case S_IFCHR: case S_IFBLK: error = vfs_mknod(path.dentry->d_inode,dentry,mode, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index c8bd9c3..05d9eee 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1329,7 +1329,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, err = 0; switch (type) { case S_IFREG: - host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); + host_err = vfs_create(dirp, dchild, iap->ia_mode, true); if (!host_err) nfsd_check_ignore_resizing(iap); break; @@ -1492,7 +1492,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, goto out; } - host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL); + host_err = vfs_create(dirp, dchild, iap->ia_mode, true); if (host_err < 0) { fh_drop_write(fhp); goto out_nfserr; diff --git a/include/linux/fs.h b/include/linux/fs.h index df869d2..2f857e9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1571,7 +1571,7 @@ extern void unlock_super(struct super_block *); /* * VFS helper functions.. */ -extern int vfs_create(struct inode *, struct dentry *, umode_t, struct nameidata *); +extern int vfs_create(struct inode *, struct dentry *, umode_t, bool); extern int vfs_mkdir(struct inode *, struct dentry *, umode_t); extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t); extern int vfs_symlink(struct inode *, struct dentry *, const char *); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index da2c188..2dee38d 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -751,7 +751,7 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir, ret = mnt_want_write(ipc_ns->mq_mnt); if (ret) goto out; - ret = vfs_create(dir->d_inode, dentry, mode, NULL); + ret = vfs_create(dir->d_inode, dentry, mode, true); dentry->d_fsdata = NULL; if (ret) goto out_drop_write; -- cgit v0.10.2 From 1acf0af9b981027f3e73e93f0d3f85abdc794f71 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 14 Jun 2012 16:13:46 +0100 Subject: VFS: Fix the banner comment on lookup_open() Since commit 197e37d9, the banner comment on lookup_open() no longer matches what the function returns. It used to return a struct file pointer or NULL and now it returns an integer and is passed the struct file pointer it is to use amongst its arguments. Update the comment to reflect this. Also add a banner comment to atomic_open(). Signed-off-by: David Howells Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index ffcd4e1..5abab91 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2192,6 +2192,19 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) return security_inode_create(dir->dentry->d_inode, dentry, mode); } +/* + * Attempt to atomically look up, create and open a file from a negative + * dentry. + * + * Returns 0 if successful. The file will have been created and attached to + * @file by the filesystem calling finish_open(). + * + * Returns 1 if the file was looked up only or didn't need creating. The + * caller will need to perform the open themselves. @path will have been + * updated to point to the new dentry. This may be negative. + * + * Returns an error code otherwise. + */ static int atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct file *file, const struct open_flags *op, @@ -2336,12 +2349,22 @@ looked_up: } /* - * Lookup, maybe create and open the last component + * Look up and maybe create and open the last component. * * Must be called with i_mutex held on parent. * - * Returns open file or NULL on success, error otherwise. NULL means no open - * was performed, only lookup. + * Returns 0 if the file was successfully atomically created (if necessary) and + * opened. In this case the file will be returned attached to @file. + * + * Returns 1 if the file was not completely opened at this time, though lookups + * and creations will have been performed and the dentry returned in @path will + * be positive upon return if O_CREAT was specified. If O_CREAT wasn't + * specified then a negative dentry may be returned. + * + * An error code is returned otherwise. + * + * FILE_CREATE will be set in @*opened if the dentry was created and will be + * cleared otherwise prior to returning. */ static int lookup_open(struct nameidata *nd, struct path *path, struct file *file, -- cgit v0.10.2 From 79714f72d3b964611997de512cb29198c9f2dbbb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Jun 2012 03:01:42 +0400 Subject: get rid of kern_path_parent() all callers want the same thing, actually - a kinda-sorta analog of kern_path_create(). I.e. they want parent vfsmount/dentry (with ->i_mutex held, to make sure the child dentry is still their child) + the child dentry. Signed-off-by Al Viro diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 765c3a2..d91a3a0 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -227,33 +227,24 @@ static int handle_create(const char *nodename, umode_t mode, struct device *dev) static int dev_rmdir(const char *name) { - struct nameidata nd; + struct path parent; struct dentry *dentry; int err; - err = kern_path_parent(name, &nd); - if (err) - return err; - - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); - if (!IS_ERR(dentry)) { - if (dentry->d_inode) { - if (dentry->d_inode->i_private == &thread) - err = vfs_rmdir(nd.path.dentry->d_inode, - dentry); - else - err = -EPERM; - } else { - err = -ENOENT; - } - dput(dentry); + dentry = kern_path_locked(name, &parent); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + if (dentry->d_inode) { + if (dentry->d_inode->i_private == &thread) + err = vfs_rmdir(parent.dentry->d_inode, dentry); + else + err = -EPERM; } else { - err = PTR_ERR(dentry); + err = -ENOENT; } - - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - path_put(&nd.path); + dput(dentry); + mutex_unlock(&parent.dentry->d_inode->i_mutex); + path_put(&parent); return err; } @@ -305,50 +296,43 @@ static int dev_mynode(struct device *dev, struct inode *inode, struct kstat *sta static int handle_remove(const char *nodename, struct device *dev) { - struct nameidata nd; + struct path parent; struct dentry *dentry; - struct kstat stat; int deleted = 1; int err; - err = kern_path_parent(nodename, &nd); - if (err) - return err; + dentry = kern_path_locked(nodename, &parent); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - dentry = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); - if (!IS_ERR(dentry)) { - if (dentry->d_inode) { - err = vfs_getattr(nd.path.mnt, dentry, &stat); - if (!err && dev_mynode(dev, dentry->d_inode, &stat)) { - struct iattr newattrs; - /* - * before unlinking this node, reset permissions - * of possible references like hardlinks - */ - newattrs.ia_uid = 0; - newattrs.ia_gid = 0; - newattrs.ia_mode = stat.mode & ~0777; - newattrs.ia_valid = - ATTR_UID|ATTR_GID|ATTR_MODE; - mutex_lock(&dentry->d_inode->i_mutex); - notify_change(dentry, &newattrs); - mutex_unlock(&dentry->d_inode->i_mutex); - err = vfs_unlink(nd.path.dentry->d_inode, - dentry); - if (!err || err == -ENOENT) - deleted = 1; - } - } else { - err = -ENOENT; + if (dentry->d_inode) { + struct kstat stat; + err = vfs_getattr(parent.mnt, dentry, &stat); + if (!err && dev_mynode(dev, dentry->d_inode, &stat)) { + struct iattr newattrs; + /* + * before unlinking this node, reset permissions + * of possible references like hardlinks + */ + newattrs.ia_uid = 0; + newattrs.ia_gid = 0; + newattrs.ia_mode = stat.mode & ~0777; + newattrs.ia_valid = + ATTR_UID|ATTR_GID|ATTR_MODE; + mutex_lock(&dentry->d_inode->i_mutex); + notify_change(dentry, &newattrs); + mutex_unlock(&dentry->d_inode->i_mutex); + err = vfs_unlink(parent.dentry->d_inode, dentry); + if (!err || err == -ENOENT) + deleted = 1; } - dput(dentry); } else { - err = PTR_ERR(dentry); + err = -ENOENT; } - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + dput(dentry); + mutex_unlock(&parent.dentry->d_inode->i_mutex); - path_put(&nd.path); + path_put(&parent); if (deleted && strchr(nodename, '/')) delete_path(nodename); return err; diff --git a/fs/namei.c b/fs/namei.c index 5abab91..6b29a51 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1814,9 +1814,27 @@ static int do_path_lookup(int dfd, const char *name, return retval; } -int kern_path_parent(const char *name, struct nameidata *nd) +/* does lookup, returns the object with parent locked */ +struct dentry *kern_path_locked(const char *name, struct path *path) { - return do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, nd); + struct nameidata nd; + struct dentry *d; + int err = do_path_lookup(AT_FDCWD, name, LOOKUP_PARENT, &nd); + if (err) + return ERR_PTR(err); + if (nd.last_type != LAST_NORM) { + path_put(&nd.path); + return ERR_PTR(-EINVAL); + } + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); + d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); + if (IS_ERR(d)) { + mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + path_put(&nd.path); + return d; + } + *path = nd.path; + return d; } int kern_path(const char *name, unsigned int flags, struct path *path) diff --git a/include/linux/namei.h b/include/linux/namei.h index 23d8598..f593148 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -67,7 +67,7 @@ extern int kern_path(const char *, unsigned, struct path *); extern struct dentry *kern_path_create(int, const char *, struct path *, int); extern struct dentry *user_path_create(int, const char __user *, struct path *, int); -extern int kern_path_parent(const char *, struct nameidata *); +extern struct dentry *kern_path_locked(const char *, struct path *); extern int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index e683869..3823281 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -355,34 +355,15 @@ static void audit_remove_parent_watches(struct audit_parent *parent) /* Get path information necessary for adding watches. */ static int audit_get_nd(struct audit_watch *watch, struct path *parent) { - struct nameidata nd; - struct dentry *d; - int err; - - err = kern_path_parent(watch->path, &nd); - if (err) - return err; - - if (nd.last_type != LAST_NORM) { - path_put(&nd.path); - return -EINVAL; - } - - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); - if (IS_ERR(d)) { - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - path_put(&nd.path); + struct dentry *d = kern_path_locked(watch->path, parent); + if (IS_ERR(d)) return PTR_ERR(d); - } + mutex_unlock(&parent->dentry->d_inode->i_mutex); if (d->d_inode) { /* update watch filter fields */ watch->dev = d->d_inode->i_sb->s_dev; watch->ino = d->d_inode->i_ino; } - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - - *parent = nd.path; dput(d); return 0; } -- cgit v0.10.2 From 469796d10590341c53cff0a2959254eaf5d465de Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 Jun 2012 20:51:39 -0400 Subject: sysfs: switch to ->s_d_op and ->d_release() a) ->d_iput() is wrong here - what we do to inode is completely usual, it's dentry->d_fsdata that we want to drop. Just use ->d_release(). b) switch to ->s_d_op - no need to play with d_set_d_op() Signed-off-by: Al Viro diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index efd373e..77c44ce 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -300,7 +300,7 @@ void release_sysfs_dirent(struct sysfs_dirent * sd) static int sysfs_dentry_delete(const struct dentry *dentry) { struct sysfs_dirent *sd = dentry->d_fsdata; - return !!(sd->s_flags & SYSFS_FLAG_REMOVED); + return !(sd && !(sd->s_flags & SYSFS_FLAG_REMOVED)); } static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags) @@ -355,18 +355,15 @@ out_bad: return 0; } -static void sysfs_dentry_iput(struct dentry *dentry, struct inode *inode) +static void sysfs_dentry_release(struct dentry *dentry) { - struct sysfs_dirent * sd = dentry->d_fsdata; - - sysfs_put(sd); - iput(inode); + sysfs_put(dentry->d_fsdata); } -static const struct dentry_operations sysfs_dentry_ops = { +const struct dentry_operations sysfs_dentry_ops = { .d_revalidate = sysfs_dentry_revalidate, .d_delete = sysfs_dentry_delete, - .d_iput = sysfs_dentry_iput, + .d_release = sysfs_dentry_release, }; struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) @@ -786,6 +783,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, ret = ERR_PTR(-ENOENT); goto out_unlock; } + dentry->d_fsdata = sysfs_get(sd); /* attach dentry and inode */ inode = sysfs_get_inode(dir->i_sb, sd); @@ -797,8 +795,6 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, /* instantiate and hash dentry */ ret = d_find_alias(inode); if (!ret) { - d_set_d_op(dentry, &sysfs_dentry_ops); - dentry->d_fsdata = sysfs_get(sd); d_add(dentry, inode); } else { d_move(ret, dentry); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 52c3bdb..c15a7a3 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -68,6 +68,7 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) } root->d_fsdata = &sysfs_root; sb->s_root = root; + sb->s_d_op = &sysfs_dentry_ops; return 0; } diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index 661a963..d73c093 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -157,6 +157,7 @@ extern struct kmem_cache *sysfs_dir_cachep; */ extern struct mutex sysfs_mutex; extern spinlock_t sysfs_assoc_lock; +extern const struct dentry_operations sysfs_dentry_ops; extern const struct file_operations sysfs_dir_operations; extern const struct inode_operations sysfs_dir_inode_operations; -- cgit v0.10.2 From e77fb7cef87856d9d35f2f4d617d0b97148ee7c2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 7 Jun 2012 20:56:54 -0400 Subject: sysfs: just use d_materialise_unique() same as for nfs et.al. Signed-off-by: Al Viro diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 77c44ce..a5cf784 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -793,14 +793,7 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, } /* instantiate and hash dentry */ - ret = d_find_alias(inode); - if (!ret) { - d_add(dentry, inode); - } else { - d_move(ret, dentry); - iput(inode); - } - + ret = d_materialise_unique(dentry, inode); out_unlock: mutex_unlock(&sysfs_mutex); return ret; -- cgit v0.10.2 From ee3efa91e240f513898050ef305a49a653c8ed90 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 8 Jun 2012 15:59:33 -0400 Subject: __d_unalias() should refuse to move mountpoints Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 015586f..8086636 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2387,14 +2387,13 @@ static struct dentry *__d_unalias(struct inode *inode, struct dentry *dentry, struct dentry *alias) { struct mutex *m1 = NULL, *m2 = NULL; - struct dentry *ret; + struct dentry *ret = ERR_PTR(-EBUSY); /* If alias and dentry share a parent, then no extra locks required */ if (alias->d_parent == dentry->d_parent) goto out_unalias; /* See lock_rename() */ - ret = ERR_PTR(-EBUSY); if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) goto out_err; m1 = &dentry->d_sb->s_vfs_rename_mutex; @@ -2402,8 +2401,10 @@ static struct dentry *__d_unalias(struct inode *inode, goto out_err; m2 = &alias->d_parent->d_inode->i_mutex; out_unalias: - __d_move(alias, dentry); - ret = alias; + if (likely(!d_mountpoint(alias))) { + __d_move(alias, dentry); + ret = alias; + } out_err: spin_unlock(&inode->i_lock); if (m2) -- cgit v0.10.2 From c3b1a350846a11dd1054cb7832e098aa37025deb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 20:28:22 -0400 Subject: debugfs: make sure that debugfs_create_file() gets used only for regulars It, debugfs_create_dir() and debugfs_create_link() use the common helper now. Signed-off-by: Al Viro diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b80bc84..d423b96 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -335,6 +335,30 @@ static int debugfs_create_by_name(const char *name, umode_t mode, return error; } +struct dentry *__create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) +{ + struct dentry *dentry = NULL; + int error; + + pr_debug("debugfs: creating file '%s'\n",name); + + error = simple_pin_fs(&debug_fs_type, &debugfs_mount, + &debugfs_mount_count); + if (error) + goto exit; + + error = debugfs_create_by_name(name, mode, parent, &dentry, + data, fops); + if (error) { + dentry = NULL; + simple_release_fs(&debugfs_mount, &debugfs_mount_count); + } +exit: + return dentry; +} + /** * debugfs_create_file - create a file in the debugfs filesystem * @name: a pointer to a string containing the name of the file to create. @@ -365,25 +389,15 @@ struct dentry *debugfs_create_file(const char *name, umode_t mode, struct dentry *parent, void *data, const struct file_operations *fops) { - struct dentry *dentry = NULL; - int error; - - pr_debug("debugfs: creating file '%s'\n",name); - - error = simple_pin_fs(&debug_fs_type, &debugfs_mount, - &debugfs_mount_count); - if (error) - goto exit; - - error = debugfs_create_by_name(name, mode, parent, &dentry, - data, fops); - if (error) { - dentry = NULL; - simple_release_fs(&debugfs_mount, &debugfs_mount_count); - goto exit; + switch (mode & S_IFMT) { + case S_IFREG: + case 0: + break; + default: + BUG(); } -exit: - return dentry; + + return __create_file(name, mode, parent, data, fops); } EXPORT_SYMBOL_GPL(debugfs_create_file); @@ -407,8 +421,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_file); */ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) { - return debugfs_create_file(name, - S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, + return __create_file(name, S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO, parent, NULL, NULL); } EXPORT_SYMBOL_GPL(debugfs_create_dir); @@ -446,8 +459,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, if (!link) return NULL; - result = debugfs_create_file(name, S_IFLNK | S_IRWXUGO, parent, link, - NULL); + result = __create_file(name, S_IFLNK | S_IRWXUGO, parent, link, NULL); if (!result) kfree(link); return result; -- cgit v0.10.2 From cfa57c11b0d5a80f7bffa1ab35bc46892127817f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 20:33:28 -0400 Subject: debugfs: fold debugfs_create_by_name() into the only caller Signed-off-by: Al Viro diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index d423b96..79f53f3 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -293,13 +293,19 @@ static struct file_system_type debug_fs_type = { .kill_sb = kill_litter_super, }; -static int debugfs_create_by_name(const char *name, umode_t mode, - struct dentry *parent, - struct dentry **dentry, - void *data, - const struct file_operations *fops) +struct dentry *__create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) { - int error = 0; + struct dentry *dentry = NULL; + int error; + + pr_debug("debugfs: creating file '%s'\n",name); + + error = simple_pin_fs(&debug_fs_type, &debugfs_mount, + &debugfs_mount_count); + if (error) + goto exit; /* If the parent is not specified, we create it in the root. * We need the root dentry to do this, which is in the super @@ -309,48 +315,29 @@ static int debugfs_create_by_name(const char *name, umode_t mode, if (!parent) parent = debugfs_mount->mnt_root; - *dentry = NULL; + dentry = NULL; mutex_lock(&parent->d_inode->i_mutex); - *dentry = lookup_one_len(name, parent, strlen(name)); - if (!IS_ERR(*dentry)) { + dentry = lookup_one_len(name, parent, strlen(name)); + if (!IS_ERR(dentry)) { switch (mode & S_IFMT) { case S_IFDIR: - error = debugfs_mkdir(parent->d_inode, *dentry, mode, + error = debugfs_mkdir(parent->d_inode, dentry, mode, data, fops); break; case S_IFLNK: - error = debugfs_link(parent->d_inode, *dentry, mode, + error = debugfs_link(parent->d_inode, dentry, mode, data, fops); break; default: - error = debugfs_create(parent->d_inode, *dentry, mode, + error = debugfs_create(parent->d_inode, dentry, mode, data, fops); break; } - dput(*dentry); + dput(dentry); } else - error = PTR_ERR(*dentry); + error = PTR_ERR(dentry); mutex_unlock(&parent->d_inode->i_mutex); - return error; -} - -struct dentry *__create_file(const char *name, umode_t mode, - struct dentry *parent, void *data, - const struct file_operations *fops) -{ - struct dentry *dentry = NULL; - int error; - - pr_debug("debugfs: creating file '%s'\n",name); - - error = simple_pin_fs(&debug_fs_type, &debugfs_mount, - &debugfs_mount_count); - if (error) - goto exit; - - error = debugfs_create_by_name(name, mode, parent, &dentry, - data, fops); if (error) { dentry = NULL; simple_release_fs(&debugfs_mount, &debugfs_mount_count); -- cgit v0.10.2 From ac481d6ca4081bdd348cbd84963d1ece843a3407 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 9 Jun 2012 20:40:20 -0400 Subject: debugfs: get rid of useless arguments to debugfs_{mkdir,symlink} Signed-off-by: Al Viro diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 79f53f3..d17c20f 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -54,13 +54,12 @@ static struct inode *debugfs_get_inode(struct super_block *sb, umode_t mode, dev break; case S_IFLNK: inode->i_op = &debugfs_link_operations; - inode->i_fop = fops; inode->i_private = data; break; case S_IFDIR: inode->i_op = &simple_dir_inode_operations; - inode->i_fop = fops ? fops : &simple_dir_operations; - inode->i_private = data; + inode->i_fop = &simple_dir_operations; + inode->i_private = NULL; /* directory inodes start off with i_nlink == 2 * (for "." entry) */ @@ -91,13 +90,12 @@ static int debugfs_mknod(struct inode *dir, struct dentry *dentry, return error; } -static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, - void *data, const struct file_operations *fops) +static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { int res; mode = (mode & (S_IRWXUGO | S_ISVTX)) | S_IFDIR; - res = debugfs_mknod(dir, dentry, mode, 0, data, fops); + res = debugfs_mknod(dir, dentry, mode, 0, NULL, NULL); if (!res) { inc_nlink(dir); fsnotify_mkdir(dir, dentry); @@ -106,10 +104,10 @@ static int debugfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode, } static int debugfs_link(struct inode *dir, struct dentry *dentry, umode_t mode, - void *data, const struct file_operations *fops) + void *data) { mode = (mode & S_IALLUGO) | S_IFLNK; - return debugfs_mknod(dir, dentry, mode, 0, data, fops); + return debugfs_mknod(dir, dentry, mode, 0, data, NULL); } static int debugfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, @@ -321,12 +319,12 @@ struct dentry *__create_file(const char *name, umode_t mode, if (!IS_ERR(dentry)) { switch (mode & S_IFMT) { case S_IFDIR: - error = debugfs_mkdir(parent->d_inode, dentry, mode, - data, fops); + error = debugfs_mkdir(parent->d_inode, dentry, mode); + break; case S_IFLNK: error = debugfs_link(parent->d_inode, dentry, mode, - data, fops); + data); break; default: error = debugfs_create(parent->d_inode, dentry, mode, -- cgit v0.10.2 From 408ef013cc9e2f94a14f7ccbbe52ddfb18437a99 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Jun 2012 10:47:03 -0400 Subject: fs: move path_put on failure out of ->follow_link Currently the non-nd_set_link based versions of ->follow_link are expected to do a path_put(&nd->path) on failure. This calling convention is unexpected, undocumented and doesn't match what the nd_set_link-based instances do. Move the path_put out of the only non-nd_set_link based ->follow_link instance into the caller. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 6b29a51..a9b94c6 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -624,7 +624,7 @@ follow_link(struct path *link, struct nameidata *nd, void **p) *p = dentry->d_inode->i_op->follow_link(dentry, nd); error = PTR_ERR(*p); if (IS_ERR(*p)) - goto out_put_link; + goto out_put_nd_path; error = 0; s = nd_get_link(nd); @@ -646,7 +646,6 @@ follow_link(struct path *link, struct nameidata *nd, void **p) out_put_nd_path: path_put(&nd->path); -out_put_link: path_put(link); return error; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 8eaa5ea..3bd5ac1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1427,16 +1427,20 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path) static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; + struct path path; int error = -EACCES; - /* We don't need a base pointer in the /proc filesystem */ - path_put(&nd->path); - /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; - error = PROC_I(inode)->op.proc_get_link(dentry, &nd->path); + error = PROC_I(inode)->op.proc_get_link(dentry, &path); + if (error) + goto out; + + path_put(&nd->path); + nd->path = path; + return NULL; out: return ERR_PTR(error); } -- cgit v0.10.2 From b5fb63c18315c5510c1d0636179c057e0c761c77 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 18 Jun 2012 10:47:04 -0400 Subject: fs: add nd_jump_link Add a helper that abstracts out the jump to an already parsed struct path from ->follow_link operation from procfs. Not only does this clean up the code by moving the two sides of this game into a single helper, but it also prepares for making struct nameidata private to namei.c Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index a9b94c6..0e1b9c3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -586,6 +586,21 @@ static inline void path_to_nameidata(const struct path *path, nd->path.dentry = path->dentry; } +/* + * Helper to directly jump to a known parsed path from ->follow_link, + * caller must have taken a reference to path beforehand. + */ +void nd_jump_link(struct nameidata *nd, struct path *path) +{ + path_put(&nd->path); + + nd->path = *path; + nd->inode = nd->path.dentry->d_inode; + nd->flags |= LOOKUP_JUMPED; + + BUG_ON(nd->inode->i_op->follow_link); +} + static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) { struct inode *inode = link->dentry->d_inode; @@ -630,17 +645,9 @@ follow_link(struct path *link, struct nameidata *nd, void **p) s = nd_get_link(nd); if (s) { error = __vfs_follow_link(nd, s); - } else if (nd->last_type == LAST_BIND) { - nd->flags |= LOOKUP_JUMPED; - nd->inode = nd->path.dentry->d_inode; - if (nd->inode->i_op->follow_link) { - /* stepped on a _really_ weird one */ - path_put(&nd->path); - error = -ELOOP; - } + if (unlikely(error)) + put_link(nd, link, *p); } - if (unlikely(error)) - put_link(nd, link, *p); return error; diff --git a/fs/proc/base.c b/fs/proc/base.c index 3bd5ac1..2772208 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1438,8 +1438,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) if (error) goto out; - path_put(&nd->path); - nd->path = path; + nd_jump_link(nd, &path); return NULL; out: return ERR_PTR(error); diff --git a/include/linux/namei.h b/include/linux/namei.h index f593148..d2ef8b3 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -80,6 +80,8 @@ extern int follow_up(struct path *); extern struct dentry *lock_rename(struct dentry *, struct dentry *); extern void unlock_rename(struct dentry *, struct dentry *); +extern void nd_jump_link(struct nameidata *nd, struct path *path); + static inline void nd_set_link(struct nameidata *nd, char *path) { nd->saved_names[nd->depth] = path; -- cgit v0.10.2 From c4107b3097465e25f7d6a9b0ac0518b07b24e774 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 20 Jun 2012 09:55:58 +1000 Subject: notify_change(): check that i_mutex is held Cc: Djalal Harouni Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro diff --git a/fs/attr.c b/fs/attr.c index 0da9095..29e38a1 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -171,6 +171,8 @@ int notify_change(struct dentry * dentry, struct iattr * attr) struct timespec now; unsigned int ia_valid = attr->ia_valid; + WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex)); + if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) { if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) return -EPERM; @@ -250,5 +252,4 @@ int notify_change(struct dentry * dentry, struct iattr * attr) return error; } - EXPORT_SYMBOL(notify_change); -- cgit v0.10.2 From 85d7d618c17a09cfd824c1ad4483c19e6f9637ff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jun 2012 22:41:54 +0400 Subject: mark_files_ro(): don't bother with mntget/mntput mnt_drop_write_file() is safe under any lock Signed-off-by: Al Viro diff --git a/fs/file_table.c b/fs/file_table.c index a305d9e..9ace278 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -483,10 +483,8 @@ void mark_files_ro(struct super_block *sb) { struct file *f; -retry: lg_global_lock(&files_lglock); do_file_list_for_each_entry(sb, f) { - struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) continue; if (!file_count(f)) @@ -499,12 +497,7 @@ retry: if (file_check_writeable(f) != 0) continue; file_release_write(f); - mnt = mntget(f->f_path.mnt); - /* This can sleep, so we can't hold the spinlock. */ - lg_global_unlock(&files_lglock); - mnt_drop_write(mnt); - mntput(mnt); - goto retry; + mnt_drop_write_file(f); } while_file_list_for_each_entry; lg_global_unlock(&files_lglock); } -- cgit v0.10.2 From c3c4f69424db0760239762d36d0b1b6ae524008b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Jun 2012 22:49:45 +0400 Subject: do_dentry_open(): close the race with mark_files_ro() in failure exit we want to take it out of mark_files_ro() reach *before* we start checking if we ought to drop write access. Signed-off-by: Al Viro diff --git a/fs/open.c b/fs/open.c index 124ccb1..764cc9c 100644 --- a/fs/open.c +++ b/fs/open.c @@ -727,6 +727,7 @@ static int do_dentry_open(struct file *f, cleanup_all: fops_put(f->f_op); + file_sb_list_del(f); if (f->f_mode & FMODE_WRITE) { put_write_access(inode); if (!special_file(inode->i_mode)) { @@ -740,7 +741,6 @@ cleanup_all: mnt_drop_write(f->f_path.mnt); } } - file_sb_list_del(f); cleanup_file: path_put(&f->f_path); f->f_path.mnt = NULL; -- cgit v0.10.2 From 55e4def0a6e79e7eb53017c4935adfed76510cd7 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:09 +0100 Subject: VFS: Make chown() and lchown() call fchownat() Make the chown() and lchown() syscalls jump to the fchownat() syscall with the appropriate extra arguments. Signed-off-by: David Howells Signed-off-by: Al Viro diff --git a/fs/open.c b/fs/open.c index 764cc9c..75bea86 100644 --- a/fs/open.c +++ b/fs/open.c @@ -537,25 +537,6 @@ static int chown_common(struct path *path, uid_t user, gid_t group) return error; } -SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) -{ - struct path path; - int error; - - error = user_path(filename, &path); - if (error) - goto out; - error = mnt_want_write(path.mnt); - if (error) - goto out_release; - error = chown_common(&path, user, group); - mnt_drop_write(path.mnt); -out_release: - path_put(&path); -out: - return error; -} - SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, gid_t, group, int, flag) { @@ -583,23 +564,15 @@ out: return error; } -SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group) +SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group) { - struct path path; - int error; + return sys_fchownat(AT_FDCWD, filename, user, group, 0); +} - error = user_lpath(filename, &path); - if (error) - goto out; - error = mnt_want_write(path.mnt); - if (error) - goto out_release; - error = chown_common(&path, user, group); - mnt_drop_write(path.mnt); -out_release: - path_put(&path); -out: - return error; +SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group) +{ + return sys_fchownat(AT_FDCWD, filename, user, group, + AT_SYMLINK_NOFOLLOW); } SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group) -- cgit v0.10.2 From be34d1a3bc4b6f357a49acb55ae870c81337e4f0 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:18 +0100 Subject: VFS: Make clone_mnt()/copy_tree()/collect_mounts() return errors copy_tree() can theoretically fail in a case other than ENOMEM, but always returns NULL which is interpreted by callers as -ENOMEM. Change it to return an explicit error. Also change clone_mnt() for consistency and because union mounts will add new error cases. Thanks to Andreas Gruenbacher for a bug fix. [AV: folded braino fix by Dan Carpenter] Original-author: Valerie Aurora Signed-off-by: David Howells Cc: Valerie Aurora Cc: Andreas Gruenbacher Signed-off-by: Al Viro diff --git a/fs/namespace.c b/fs/namespace.c index 8f412ab..be1b07a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -708,56 +708,60 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, int flag) { struct super_block *sb = old->mnt.mnt_sb; - struct mount *mnt = alloc_vfsmnt(old->mnt_devname); + struct mount *mnt; + int err; - if (mnt) { - if (flag & (CL_SLAVE | CL_PRIVATE)) - mnt->mnt_group_id = 0; /* not a peer of original */ - else - mnt->mnt_group_id = old->mnt_group_id; - - if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) { - int err = mnt_alloc_group_id(mnt); - if (err) - goto out_free; - } + mnt = alloc_vfsmnt(old->mnt_devname); + if (!mnt) + return ERR_PTR(-ENOMEM); - mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; - atomic_inc(&sb->s_active); - mnt->mnt.mnt_sb = sb; - mnt->mnt.mnt_root = dget(root); - mnt->mnt_mountpoint = mnt->mnt.mnt_root; - mnt->mnt_parent = mnt; - br_write_lock(&vfsmount_lock); - list_add_tail(&mnt->mnt_instance, &sb->s_mounts); - br_write_unlock(&vfsmount_lock); + if (flag & (CL_SLAVE | CL_PRIVATE)) + mnt->mnt_group_id = 0; /* not a peer of original */ + else + mnt->mnt_group_id = old->mnt_group_id; - if (flag & CL_SLAVE) { - list_add(&mnt->mnt_slave, &old->mnt_slave_list); - mnt->mnt_master = old; - CLEAR_MNT_SHARED(mnt); - } else if (!(flag & CL_PRIVATE)) { - if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old)) - list_add(&mnt->mnt_share, &old->mnt_share); - if (IS_MNT_SLAVE(old)) - list_add(&mnt->mnt_slave, &old->mnt_slave); - mnt->mnt_master = old->mnt_master; - } - if (flag & CL_MAKE_SHARED) - set_mnt_shared(mnt); - - /* stick the duplicate mount on the same expiry list - * as the original if that was on one */ - if (flag & CL_EXPIRE) { - if (!list_empty(&old->mnt_expire)) - list_add(&mnt->mnt_expire, &old->mnt_expire); - } + if ((flag & CL_MAKE_SHARED) && !mnt->mnt_group_id) { + err = mnt_alloc_group_id(mnt); + if (err) + goto out_free; + } + + mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD; + atomic_inc(&sb->s_active); + mnt->mnt.mnt_sb = sb; + mnt->mnt.mnt_root = dget(root); + mnt->mnt_mountpoint = mnt->mnt.mnt_root; + mnt->mnt_parent = mnt; + br_write_lock(&vfsmount_lock); + list_add_tail(&mnt->mnt_instance, &sb->s_mounts); + br_write_unlock(&vfsmount_lock); + + if (flag & CL_SLAVE) { + list_add(&mnt->mnt_slave, &old->mnt_slave_list); + mnt->mnt_master = old; + CLEAR_MNT_SHARED(mnt); + } else if (!(flag & CL_PRIVATE)) { + if ((flag & CL_MAKE_SHARED) || IS_MNT_SHARED(old)) + list_add(&mnt->mnt_share, &old->mnt_share); + if (IS_MNT_SLAVE(old)) + list_add(&mnt->mnt_slave, &old->mnt_slave); + mnt->mnt_master = old->mnt_master; + } + if (flag & CL_MAKE_SHARED) + set_mnt_shared(mnt); + + /* stick the duplicate mount on the same expiry list + * as the original if that was on one */ + if (flag & CL_EXPIRE) { + if (!list_empty(&old->mnt_expire)) + list_add(&mnt->mnt_expire, &old->mnt_expire); } + return mnt; out_free: free_vfsmnt(mnt); - return NULL; + return ERR_PTR(err); } static inline void mntfree(struct mount *mnt) @@ -1242,11 +1246,12 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, struct path path; if (!(flag & CL_COPY_ALL) && IS_MNT_UNBINDABLE(mnt)) - return NULL; + return ERR_PTR(-EINVAL); res = q = clone_mnt(mnt, dentry, flag); - if (!q) - goto Enomem; + if (IS_ERR(q)) + return q; + q->mnt_mountpoint = mnt->mnt_mountpoint; p = mnt; @@ -1268,8 +1273,8 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, path.mnt = &q->mnt; path.dentry = p->mnt_mountpoint; q = clone_mnt(p, p->mnt.mnt_root, flag); - if (!q) - goto Enomem; + if (IS_ERR(q)) + goto out; br_write_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); @@ -1277,7 +1282,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, } } return res; -Enomem: +out: if (res) { LIST_HEAD(umount_list); br_write_lock(&vfsmount_lock); @@ -1285,9 +1290,11 @@ Enomem: br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); } - return NULL; + return q; } +/* Caller should check returned pointer for errors */ + struct vfsmount *collect_mounts(struct path *path) { struct mount *tree; @@ -1295,7 +1302,9 @@ struct vfsmount *collect_mounts(struct path *path) tree = copy_tree(real_mount(path->mnt), path->dentry, CL_COPY_ALL | CL_PRIVATE); up_write(&namespace_sem); - return tree ? &tree->mnt : NULL; + if (IS_ERR(tree)) + return NULL; + return &tree->mnt; } void drop_collected_mounts(struct vfsmount *mnt) @@ -1590,14 +1599,15 @@ static int do_loopback(struct path *path, char *old_name, if (!check_mnt(real_mount(path->mnt)) || !check_mnt(old)) goto out2; - err = -ENOMEM; if (recurse) mnt = copy_tree(old, old_path.dentry, 0); else mnt = clone_mnt(old, old_path.dentry, 0); - if (!mnt) - goto out2; + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); + goto out; + } err = graft_tree(mnt, path); if (err) { @@ -2211,10 +2221,10 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, down_write(&namespace_sem); /* First pass: copy the tree topology */ new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE); - if (!new) { + if (IS_ERR(new)) { up_write(&namespace_sem); kfree(new_ns); - return ERR_PTR(-ENOMEM); + return ERR_CAST(new); } new_ns->root = new; br_write_lock(&vfsmount_lock); diff --git a/fs/pnode.c b/fs/pnode.c index bed378d..3e000a5 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -237,8 +237,9 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, source = get_source(m, prev_dest_mnt, prev_src_mnt, &type); - if (!(child = copy_tree(source, source->mnt.mnt_root, type))) { - ret = -ENOMEM; + child = copy_tree(source, source->mnt.mnt_root, type); + if (IS_ERR(child)) { + ret = PTR_ERR(child); list_splice(tree_list, tmp_list.prev); goto out; } diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 5bf0790..3a5ca58 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -595,7 +595,7 @@ void audit_trim_trees(void) root_mnt = collect_mounts(&path); path_put(&path); - if (!root_mnt) + if (IS_ERR(root_mnt)) goto skip_it; spin_lock(&hash_lock); @@ -669,8 +669,8 @@ int audit_add_tree_rule(struct audit_krule *rule) goto Err; mnt = collect_mounts(&path); path_put(&path); - if (!mnt) { - err = -ENOMEM; + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); goto Err; } @@ -719,8 +719,8 @@ int audit_tag_tree(char *old, char *new) return err; tagged = collect_mounts(&path2); path_put(&path2); - if (!tagged) - return -ENOMEM; + if (IS_ERR(tagged)) + return PTR_ERR(tagged); err = kern_path(old, 0, &path1); if (err) { -- cgit v0.10.2 From f015f1267b23d3530d3f874243fb83cb5f443005 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:28 +0100 Subject: VFS: Comment mount following code Add comments describing what the directions "up" and "down" mean and ref count handling to the VFS mount following family of functions. Signed-off-by: Valerie Aurora (Original author) Signed-off-by: David Howells Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 0e1b9c3..c6dcb4c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -672,6 +672,16 @@ static int follow_up_rcu(struct path *path) return 1; } +/* + * follow_up - Find the mountpoint of path's vfsmount + * + * Given a path, find the mountpoint of its source file system. + * Replace @path with the path of the mountpoint in the parent mount. + * Up is towards /. + * + * Return 1 if we went up a level and 0 if we were already at the + * root. + */ int follow_up(struct path *path) { struct mount *mnt = real_mount(path->mnt); diff --git a/fs/namespace.c b/fs/namespace.c index be1b07a..c53d338 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -515,8 +515,20 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, } /* - * lookup_mnt increments the ref count before returning - * the vfsmount struct. + * lookup_mnt - Return the first child mount mounted at path + * + * "First" means first mounted chronologically. If you create the + * following mounts: + * + * mount /dev/sda1 /mnt + * mount /dev/sda2 /mnt + * mount /dev/sda3 /mnt + * + * Then lookup_mnt() on the base /mnt dentry in the root mount will + * return successively the root dentry and vfsmount of /dev/sda1, then + * /dev/sda2, then /dev/sda3, then NULL. + * + * lookup_mnt takes a reference to the found vfsmount. */ struct vfsmount *lookup_mnt(struct path *path) { -- cgit v0.10.2 From 9249e17fe094d853d1ef7475dd559a2cc7e23d42 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:37 +0100 Subject: VFS: Pass mount flags to sget() Pass mount flags to sget() so that it can use them in initialising a new superblock before the set function is called. They could also be passed to the compare function. Signed-off-by: David Howells Signed-off-by: Al Viro diff --git a/drivers/mtd/mtdsuper.c b/drivers/mtd/mtdsuper.c index a90bfe7..334da5f 100644 --- a/drivers/mtd/mtdsuper.c +++ b/drivers/mtd/mtdsuper.c @@ -63,7 +63,7 @@ static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags, struct super_block *sb; int ret; - sb = sget(fs_type, get_sb_mtd_compare, get_sb_mtd_set, mtd); + sb = sget(fs_type, get_sb_mtd_compare, get_sb_mtd_set, flags, mtd); if (IS_ERR(sb)) goto out_error; @@ -74,8 +74,6 @@ static struct dentry *mount_mtd_aux(struct file_system_type *fs_type, int flags, pr_debug("MTDSB: New superblock for device %d (\"%s\")\n", mtd->index, mtd->name); - sb->s_flags = flags; - ret = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (ret < 0) { deactivate_locked_super(sb); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 8c92a9b..137d503 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -89,7 +89,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, if (v9ses->cache) sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE; - sb->s_flags = flags | MS_ACTIVE | MS_DIRSYNC | MS_NOATIME; + sb->s_flags |= MS_ACTIVE | MS_DIRSYNC | MS_NOATIME; if (!v9ses->cache) sb->s_flags |= MS_SYNCHRONOUS; @@ -137,7 +137,7 @@ static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags, goto close_session; } - sb = sget(fs_type, NULL, v9fs_set_super, v9ses); + sb = sget(fs_type, NULL, v9fs_set_super, flags, v9ses); if (IS_ERR(sb)) { retval = PTR_ERR(sb); goto clunk_fid; diff --git a/fs/afs/super.c b/fs/afs/super.c index f02b31e..df8c604 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -395,7 +395,7 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, as->volume = vol; /* allocate a deviceless superblock */ - sb = sget(fs_type, afs_test_super, afs_set_super, as); + sb = sget(fs_type, afs_test_super, afs_set_super, flags, as); if (IS_ERR(sb)) { ret = PTR_ERR(sb); afs_put_volume(vol); @@ -406,7 +406,6 @@ static struct dentry *afs_mount(struct file_system_type *fs_type, if (!sb->s_root) { /* initial superblock/root creation */ _debug("create"); - sb->s_flags = flags; ret = afs_fill_super(sb, ¶ms); if (ret < 0) { deactivate_locked_super(sb); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index e239915..b19d755 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1068,7 +1068,8 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } bdev = fs_devices->latest_bdev; - s = sget(fs_type, btrfs_test_super, btrfs_set_super, fs_info); + s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | MS_NOSEC, + fs_info); if (IS_ERR(s)) { error = PTR_ERR(s); goto error_close_devices; @@ -1082,7 +1083,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, } else { char b[BDEVNAME_SIZE]; - s->s_flags = flags | MS_NOSEC; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); btrfs_sb(s)->bdev_holder = fs_type; error = btrfs_fill_super(s, fs_devices, data, diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 1e67dd7..7076109 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -871,7 +871,7 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type, if (ceph_test_opt(fsc->client, NOSHARE)) compare_super = NULL; - sb = sget(fs_type, compare_super, ceph_set_super, fsc); + sb = sget(fs_type, compare_super, ceph_set_super, flags, fsc); if (IS_ERR(sb)) { res = ERR_CAST(sb); goto out; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c0c2751..a7610cf 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -637,7 +637,10 @@ cifs_do_mount(struct file_system_type *fs_type, mnt_data.cifs_sb = cifs_sb; mnt_data.flags = flags; - sb = sget(fs_type, cifs_match_super, cifs_set_super, &mnt_data); + /* BB should we make this contingent on mount parm? */ + flags |= MS_NODIRATIME | MS_NOATIME; + + sb = sget(fs_type, cifs_match_super, cifs_set_super, flags, &mnt_data); if (IS_ERR(sb)) { root = ERR_CAST(sb); cifs_umount(cifs_sb); @@ -648,10 +651,6 @@ cifs_do_mount(struct file_system_type *fs_type, cFYI(1, "Use existing superblock"); cifs_umount(cifs_sb); } else { - sb->s_flags = flags; - /* BB should we make this contingent on mount parm? */ - sb->s_flags |= MS_NODIRATIME | MS_NOATIME; - rc = cifs_read_super(sb); if (rc) { root = ERR_PTR(rc); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 979c1e3..14afbab 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -439,15 +439,15 @@ static struct dentry *devpts_mount(struct file_system_type *fs_type, return ERR_PTR(error); if (opts.newinstance) - s = sget(fs_type, NULL, set_anon_super, NULL); + s = sget(fs_type, NULL, set_anon_super, flags, NULL); else - s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL); + s = sget(fs_type, compare_init_pts_sb, set_anon_super, flags, + NULL); if (IS_ERR(s)) return ERR_CAST(s); if (!s->s_root) { - s->s_flags = flags; error = devpts_fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) goto out_undo_sget; diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 6895493..7edeb3d 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -499,13 +499,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags goto out; } - s = sget(fs_type, NULL, set_anon_super, NULL); + s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) { rc = PTR_ERR(s); goto out; } - s->s_flags = flags; rc = bdi_setup_and_register(&sbi->bdi, "ecryptfs", BDI_CAP_MAP_COPY); if (rc) goto out1; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b8c250f..6c90607 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1286,7 +1286,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, error = -EBUSY; goto error_bdev; } - s = sget(fs_type, test_gfs2_super, set_gfs2_super, bdev); + s = sget(fs_type, test_gfs2_super, set_gfs2_super, flags, bdev); mutex_unlock(&bdev->bd_fsfreeze_mutex); error = PTR_ERR(s); if (IS_ERR(s)) @@ -1316,7 +1316,6 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, } else { char b[BDEVNAME_SIZE]; - s->s_flags = flags; s->s_mode = mode; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); @@ -1360,7 +1359,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, dev_name, error); return ERR_PTR(error); } - s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, + s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags, path.dentry->d_inode->i_sb->s_bdev); path_put(&path); if (IS_ERR(s)) { diff --git a/fs/libfs.c b/fs/libfs.c index ebd03f6..a74cb17 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -222,15 +222,15 @@ struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name, const struct super_operations *ops, const struct dentry_operations *dops, unsigned long magic) { - struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); + struct super_block *s; struct dentry *dentry; struct inode *root; struct qstr d_name = QSTR_INIT(name, strlen(name)); + s = sget(fs_type, NULL, set_anon_super, MS_NOUSER, NULL); if (IS_ERR(s)) return ERR_CAST(s); - s->s_flags = MS_NOUSER; s->s_maxbytes = MAX_LFS_FILESIZE; s->s_blocksize = PAGE_SIZE; s->s_blocksize_bits = PAGE_SHIFT; diff --git a/fs/logfs/super.c b/fs/logfs/super.c index 97bca62..345c24b 100644 --- a/fs/logfs/super.c +++ b/fs/logfs/super.c @@ -519,7 +519,7 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super, log_super("LogFS: Start mount %x\n", mount_count++); err = -EINVAL; - sb = sget(type, logfs_sb_test, logfs_sb_set, super); + sb = sget(type, logfs_sb_test, logfs_sb_set, flags | MS_NOATIME, super); if (IS_ERR(sb)) { super->s_devops->put_device(super); kfree(super); @@ -542,7 +542,6 @@ static struct dentry *logfs_get_sb_device(struct logfs_super *super, sb->s_maxbytes = (1ull << 43) - 1; sb->s_max_links = LOGFS_LINK_MAX; sb->s_op = &logfs_super_operations; - sb->s_flags = flags | MS_NOATIME; err = logfs_read_sb(sb, sb->s_flags & MS_RDONLY); if (err) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 0622819..8b2a297 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2419,7 +2419,7 @@ static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type, sb_mntdata.mntflags |= MS_SYNCHRONOUS; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); + s = sget(fs_type, compare_super, nfs_set_super, flags, &sb_mntdata); if (IS_ERR(s)) { mntroot = ERR_CAST(s); goto out_err_nosb; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1099a76..d57c42f9 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -1288,7 +1288,8 @@ nilfs_mount(struct file_system_type *fs_type, int flags, err = -EBUSY; goto failed; } - s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, sd.bdev); + s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags, + sd.bdev); mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) { err = PTR_ERR(s); @@ -1301,7 +1302,6 @@ nilfs_mount(struct file_system_type *fs_type, int flags, s_new = true; /* New superblock instance created */ - s->s_flags = flags; s->s_mode = mode; strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(sd.bdev)); diff --git a/fs/proc/root.c b/fs/proc/root.c index 568b202..9a2d9fd 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -111,7 +111,7 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, options = data; } - sb = sget(fs_type, proc_test_super, proc_set_super, ns); + sb = sget(fs_type, proc_test_super, proc_set_super, flags, ns); if (IS_ERR(sb)) return ERR_CAST(sb); @@ -121,7 +121,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, } if (!sb->s_root) { - sb->s_flags = flags; err = proc_fill_super(sb); if (err) { deactivate_locked_super(sb); diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 2c1ade6..e60e870 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -403,7 +403,7 @@ static void *r_start(struct seq_file *m, loff_t * pos) if (l) return NULL; - if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, s))) + if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, s))) return NULL; up_write(&s->s_umount); diff --git a/fs/super.c b/fs/super.c index cf00177..c743fb3 100644 --- a/fs/super.c +++ b/fs/super.c @@ -105,11 +105,12 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc) /** * alloc_super - create new superblock * @type: filesystem type superblock should belong to + * @flags: the mount flags * * Allocates and initializes a new &struct super_block. alloc_super() * returns a pointer new superblock or %NULL if allocation had failed. */ -static struct super_block *alloc_super(struct file_system_type *type) +static struct super_block *alloc_super(struct file_system_type *type, int flags) { struct super_block *s = kzalloc(sizeof(struct super_block), GFP_USER); static const struct super_operations default_op; @@ -136,6 +137,7 @@ static struct super_block *alloc_super(struct file_system_type *type) #else INIT_LIST_HEAD(&s->s_files); #endif + s->s_flags = flags; s->s_bdi = &default_backing_dev_info; INIT_HLIST_NODE(&s->s_instances); INIT_HLIST_BL_HEAD(&s->s_anon); @@ -415,11 +417,13 @@ EXPORT_SYMBOL(generic_shutdown_super); * @type: filesystem type superblock should belong to * @test: comparison callback * @set: setup callback + * @flags: mount flags * @data: argument to each of them */ struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), + int flags, void *data) { struct super_block *s = NULL; @@ -450,7 +454,7 @@ retry: } if (!s) { spin_unlock(&sb_lock); - s = alloc_super(type); + s = alloc_super(type, flags); if (!s) return ERR_PTR(-ENOMEM); goto retry; @@ -925,13 +929,12 @@ struct dentry *mount_ns(struct file_system_type *fs_type, int flags, { struct super_block *sb; - sb = sget(fs_type, ns_test_super, ns_set_super, data); + sb = sget(fs_type, ns_test_super, ns_set_super, flags, data); if (IS_ERR(sb)) return ERR_CAST(sb); if (!sb->s_root) { int err; - sb->s_flags = flags; err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) { deactivate_locked_super(sb); @@ -992,7 +995,8 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, error = -EBUSY; goto error_bdev; } - s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); + s = sget(fs_type, test_bdev_super, set_bdev_super, flags | MS_NOSEC, + bdev); mutex_unlock(&bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) goto error_s; @@ -1017,7 +1021,6 @@ struct dentry *mount_bdev(struct file_system_type *fs_type, } else { char b[BDEVNAME_SIZE]; - s->s_flags = flags | MS_NOSEC; s->s_mode = mode; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); @@ -1062,13 +1065,11 @@ struct dentry *mount_nodev(struct file_system_type *fs_type, int (*fill_super)(struct super_block *, void *, int)) { int error; - struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); + struct super_block *s = sget(fs_type, NULL, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); - s->s_flags = flags; - error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); @@ -1091,11 +1092,10 @@ struct dentry *mount_single(struct file_system_type *fs_type, struct super_block *s; int error; - s = sget(fs_type, compare_single, set_anon_super, NULL); + s = sget(fs_type, compare_single, set_anon_super, flags, NULL); if (IS_ERR(s)) return ERR_CAST(s); if (!s->s_root) { - s->s_flags = flags; error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(s); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index c15a7a3..71eb7e2 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -118,13 +118,12 @@ static struct dentry *sysfs_mount(struct file_system_type *fs_type, for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) info->ns[type] = kobj_ns_grab_current(type); - sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info); + sb = sget(fs_type, sysfs_test_super, sysfs_set_super, flags, info); if (IS_ERR(sb) || sb->s_fs_info != info) free_sysfs_super_info(info); if (IS_ERR(sb)) return ERR_CAST(sb); if (!sb->s_root) { - sb->s_flags = flags; error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (error) { deactivate_locked_super(sb); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 5862dd9..1c766c3 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -2136,7 +2136,7 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); - sb = sget(fs_type, sb_test, sb_set, c); + sb = sget(fs_type, sb_test, sb_set, flags, c); if (IS_ERR(sb)) { err = PTR_ERR(sb); kfree(c); @@ -2153,7 +2153,6 @@ static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, goto out_deact; } } else { - sb->s_flags = flags; err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); if (err) goto out_deact; diff --git a/include/linux/fs.h b/include/linux/fs.h index 2f857e9..48548bd 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1914,7 +1914,7 @@ void free_anon_bdev(dev_t); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), - void *data); + int flags, void *data); extern struct dentry *mount_pseudo(struct file_system_type *, char *, const struct super_operations *ops, const struct dentry_operations *dops, diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 0cd1314..af2b564 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1587,7 +1587,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, opts.new_root = new_root; /* Locate an existing or new sb for this hierarchy */ - sb = sget(fs_type, cgroup_test_super, cgroup_set_super, &opts); + sb = sget(fs_type, cgroup_test_super, cgroup_set_super, 0, &opts); if (IS_ERR(sb)) { ret = PTR_ERR(sb); cgroup_drop_root(opts.new_root); -- cgit v0.10.2 From 0bdaea9017b9d2b9996e153a71ee03555969b80e Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 25 Jun 2012 12:55:46 +0100 Subject: VFS: Split inode_permission() Split inode_permission() into inode- and superblock-dependent parts. This is aimed at unionmounts where the superblock from the upper layer has to be checked rather than the superblock from the lower layer as the upper layer may be writable, thus allowing an unwritable file from the lower layer to be copied up and modified. Original-author: Valerie Aurora Signed-off-by: David Howells (Further development) Signed-off-by: Al Viro diff --git a/fs/internal.h b/fs/internal.h index 8a9f5fa..a6fd56c 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -42,6 +42,11 @@ static inline int __sync_blockdev(struct block_device *bdev, int wait) extern void __init chrdev_init(void); /* + * namei.c + */ +extern int __inode_permission(struct inode *, int); + +/* * namespace.c */ extern int copy_mount_options(const void __user *, unsigned long *); diff --git a/fs/namei.c b/fs/namei.c index c6dcb4c..1b64746 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -315,31 +315,22 @@ static inline int do_inode_permission(struct inode *inode, int mask) } /** - * inode_permission - check for access rights to a given inode - * @inode: inode to check permission on - * @mask: right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC, ...) + * __inode_permission - Check for access rights to a given inode + * @inode: Inode to check permission on + * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * - * Used to check for read/write/execute permissions on an inode. - * We use "fsuid" for this, letting us set arbitrary permissions - * for filesystem access without changing the "normal" uids which - * are used for other things. + * Check for read/write/execute permissions on an inode. * * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. + * + * This does not check for a read-only file system. You probably want + * inode_permission(). */ -int inode_permission(struct inode *inode, int mask) +int __inode_permission(struct inode *inode, int mask) { int retval; if (unlikely(mask & MAY_WRITE)) { - umode_t mode = inode->i_mode; - - /* - * Nobody gets write access to a read-only fs. - */ - if (IS_RDONLY(inode) && - (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) - return -EROFS; - /* * Nobody gets write access to an immutable file. */ @@ -359,6 +350,47 @@ int inode_permission(struct inode *inode, int mask) } /** + * sb_permission - Check superblock-level permissions + * @sb: Superblock of inode to check permission on + * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) + * + * Separate out file-system wide checks from inode-specific permission checks. + */ +static int sb_permission(struct super_block *sb, struct inode *inode, int mask) +{ + if (unlikely(mask & MAY_WRITE)) { + umode_t mode = inode->i_mode; + + /* Nobody gets write access to a read-only fs. */ + if ((sb->s_flags & MS_RDONLY) && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) + return -EROFS; + } + return 0; +} + +/** + * inode_permission - Check for access rights to a given inode + * @inode: Inode to check permission on + * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) + * + * Check for read/write/execute permissions on an inode. We use fs[ug]id for + * this, letting us set arbitrary permissions for filesystem access without + * changing the "normal" UIDs which are used for other things. + * + * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask. + */ +int inode_permission(struct inode *inode, int mask) +{ + int retval; + + retval = sb_permission(inode->i_sb, inode, mask); + if (retval) + return retval; + return __inode_permission(inode, mask); +} + +/** * path_get - get a reference to a path * @path: path to get the reference to * -- cgit v0.10.2 From 1e0ea0014479f066ba26f937e8740b8902229616 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 22 Jul 2012 23:46:21 +0400 Subject: use __lookup_hash() in kern_path_parent() No need to bother with lookup_one_len() here - it's an overkill Signed-off-by Al Viro diff --git a/fs/namei.c b/fs/namei.c index 1b64746..c14dfac 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1875,7 +1875,7 @@ struct dentry *kern_path_locked(const char *name, struct path *path) return ERR_PTR(-EINVAL); } mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - d = lookup_one_len(nd.last.name, nd.path.dentry, nd.last.len); + d = __lookup_hash(&nd.last, nd.path.dentry, 0); if (IS_ERR(d)) { mutex_unlock(&nd.path.dentry->d_inode->i_mutex); path_put(&nd.path); -- cgit v0.10.2 From 7266702805f9d824f92ce5c4069eca65d0f21d28 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 15 Jul 2012 14:10:52 +0400 Subject: signal: make sure we don't get stopped with pending task_work Signed-off-by: Al Viro diff --git a/kernel/signal.c b/kernel/signal.c index 6771027..be4f856 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1971,6 +1971,13 @@ static void ptrace_do_notify(int signr, int exit_code, int why) void ptrace_notify(int exit_code) { BUG_ON((exit_code & (0x7f | ~0xffff)) != SIGTRAP); + if (unlikely(current->task_works)) { + if (test_and_clear_ti_thread_flag(current_thread_info(), + TIF_NOTIFY_RESUME)) { + smp_mb__after_clear_bit(); + task_work_run(); + } + } spin_lock_irq(¤t->sighand->siglock); ptrace_do_notify(SIGTRAP, exit_code, CLD_TRAPPED); @@ -2191,6 +2198,14 @@ int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct signal_struct *signal = current->signal; int signr; + if (unlikely(current->task_works)) { + if (test_and_clear_ti_thread_flag(current_thread_info(), + TIF_NOTIFY_RESUME)) { + smp_mb__after_clear_bit(); + task_work_run(); + } + } + if (unlikely(uprobe_deny_signal())) return 0; -- cgit v0.10.2 From 41f9d29f09ca0b22c3631e8a39676e74cda9bcc0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jun 2012 22:10:04 +0400 Subject: trimming task_work: kill ->data get rid of the only user of ->data; this is _not_ the final variant - in the end we'll have task_work and rcu_head identical and just use cred->rcu, at which point the separate allocation will be gone completely. Signed-off-by: Al Viro diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 294d5d5..627421c 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -10,14 +10,12 @@ typedef void (*task_work_func_t)(struct task_work *); struct task_work { struct hlist_node hlist; task_work_func_t func; - void *data; }; static inline void -init_task_work(struct task_work *twork, task_work_func_t func, void *data) +init_task_work(struct task_work *twork, task_work_func_t func) { twork->func = func; - twork->data = data; } int task_work_add(struct task_struct *task, struct task_work *twork, bool); diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 8c54823..d1dd547 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -830,7 +830,7 @@ static int irq_thread(void *data) sched_setscheduler(current, SCHED_FIFO, ¶m); - init_task_work(&on_exit_work, irq_thread_dtor, NULL); + init_task_work(&on_exit_work, irq_thread_dtor); task_work_add(current, &on_exit_work, false); while (!irq_wait_for_interrupt(action)) { diff --git a/security/keys/internal.h b/security/keys/internal.h index 3dcbf86..b510a31 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -148,6 +148,10 @@ extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, #define KEY_LOOKUP_PARTIAL 0x02 #define KEY_LOOKUP_FOR_UNLINK 0x04 +struct kludge { /* this will die off very soon */ + struct task_work twork; + struct cred *cred; +}; extern long join_session_keyring(const char *name); extern void key_change_session_keyring(struct task_work *twork); diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 0f5b3f0..26723ca 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1456,7 +1456,8 @@ long keyctl_session_to_parent(void) { struct task_struct *me, *parent; const struct cred *mycred, *pcred; - struct task_work *newwork, *oldwork; + struct kludge *newwork; + struct task_work *oldwork; key_ref_t keyring_r; struct cred *cred; int ret; @@ -1466,7 +1467,7 @@ long keyctl_session_to_parent(void) return PTR_ERR(keyring_r); ret = -ENOMEM; - newwork = kmalloc(sizeof(struct task_work), GFP_KERNEL); + newwork = kmalloc(sizeof(struct kludge), GFP_KERNEL); if (!newwork) goto error_keyring; @@ -1478,7 +1479,8 @@ long keyctl_session_to_parent(void) goto error_newwork; cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r); - init_task_work(newwork, key_change_session_keyring, cred); + init_task_work(&newwork->twork, key_change_session_keyring); + newwork->cred = cred; me = current; rcu_read_lock(); @@ -1527,18 +1529,18 @@ long keyctl_session_to_parent(void) /* the replacement session keyring is applied just prior to userspace * restarting */ - ret = task_work_add(parent, newwork, true); + ret = task_work_add(parent, &newwork->twork, true); if (!ret) newwork = NULL; unlock: write_unlock_irq(&tasklist_lock); rcu_read_unlock(); if (oldwork) { - put_cred(oldwork->data); + put_cred(container_of(oldwork, struct kludge, twork)->cred); kfree(oldwork); } if (newwork) { - put_cred(newwork->data); + put_cred(newwork->cred); kfree(newwork); } return ret; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 4ad54ee..c9b07c9 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -837,9 +837,10 @@ error: void key_change_session_keyring(struct task_work *twork) { const struct cred *old = current_cred(); - struct cred *new = twork->data; + struct kludge *p = container_of(twork, struct kludge, twork); + struct cred *new = p->cred; - kfree(twork); + kfree(p); if (unlikely(current->flags & PF_EXITING)) { put_cred(new); return; -- cgit v0.10.2 From 158e1645e07f3e9f7e4962d7a0997f5c3b98311b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jun 2012 09:24:13 +0400 Subject: trim task_work: get rid of hlist layout based on Oleg's suggestion; single-linked list, task->task_works points to the last element, forward pointer from said last element points to head. I'd still prefer much more regular scheme with two pointers in task_work, but... Signed-off-by: Al Viro diff --git a/include/linux/sched.h b/include/linux/sched.h index 4059c0f..b9216eb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1405,7 +1405,7 @@ struct task_struct { int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; - struct hlist_head task_works; + void *task_works; struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 627421c..3b3e2c8 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -8,7 +8,7 @@ struct task_work; typedef void (*task_work_func_t)(struct task_work *); struct task_work { - struct hlist_node hlist; + struct task_work *next; task_work_func_t func; }; @@ -24,7 +24,7 @@ void task_work_run(void); static inline void exit_task_work(struct task_struct *task) { - if (unlikely(!hlist_empty(&task->task_works))) + if (unlikely(task->task_works)) task_work_run(); } diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 6a4d82b..1e98b55 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -192,7 +192,7 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) * hlist_add_head(task->task_works); */ smp_mb__after_clear_bit(); - if (unlikely(!hlist_empty(¤t->task_works))) + if (unlikely(current->task_works)) task_work_run(); } diff --git a/kernel/fork.c b/kernel/fork.c index ab5211b..bebabad 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1415,7 +1415,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, */ p->group_leader = p; INIT_LIST_HEAD(&p->thread_group); - INIT_HLIST_HEAD(&p->task_works); + p->task_works = NULL; /* Now that the task is set up, run cgroup callbacks if * necessary. We need to run them before the task is visible diff --git a/kernel/task_work.c b/kernel/task_work.c index 82d1c79..9b8948d 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -19,7 +19,12 @@ task_work_add(struct task_struct *task, struct task_work *twork, bool notify) */ raw_spin_lock_irqsave(&task->pi_lock, flags); if (likely(!(task->flags & PF_EXITING))) { - hlist_add_head(&twork->hlist, &task->task_works); + struct task_work *last = task->task_works; + struct task_work *first = last ? last->next : twork; + twork->next = first; + if (last) + last->next = twork; + task->task_works = twork; err = 0; } raw_spin_unlock_irqrestore(&task->pi_lock, flags); @@ -34,51 +39,48 @@ struct task_work * task_work_cancel(struct task_struct *task, task_work_func_t func) { unsigned long flags; - struct task_work *twork; - struct hlist_node *pos; + struct task_work *last, *res = NULL; raw_spin_lock_irqsave(&task->pi_lock, flags); - hlist_for_each_entry(twork, pos, &task->task_works, hlist) { - if (twork->func == func) { - hlist_del(&twork->hlist); - goto found; + last = task->task_works; + if (last) { + struct task_work *q = last, *p = q->next; + while (1) { + if (p->func == func) { + q->next = p->next; + if (p == last) + task->task_works = q == p ? NULL : q; + res = p; + break; + } + if (p == last) + break; + q = p; + p = q->next; } } - twork = NULL; - found: raw_spin_unlock_irqrestore(&task->pi_lock, flags); - - return twork; + return res; } void task_work_run(void) { struct task_struct *task = current; - struct hlist_head task_works; - struct hlist_node *pos; + struct task_work *p, *q; raw_spin_lock_irq(&task->pi_lock); - hlist_move_list(&task->task_works, &task_works); + p = task->task_works; + task->task_works = NULL; raw_spin_unlock_irq(&task->pi_lock); - if (unlikely(hlist_empty(&task_works))) + if (unlikely(!p)) return; - /* - * We use hlist to save the space in task_struct, but we want fifo. - * Find the last entry, the list should be short, then process them - * in reverse order. - */ - for (pos = task_works.first; pos->next; pos = pos->next) - ; - - for (;;) { - struct hlist_node **pprev = pos->pprev; - struct task_work *twork = container_of(pos, struct task_work, - hlist); - twork->func(twork); - if (pprev == &task_works.first) - break; - pos = container_of(pprev, struct hlist_node, next); + q = p->next; /* head */ + p->next = NULL; /* cut it */ + while (q) { + p = q->next; + q->func(q); + q = p; } } -- cgit v0.10.2 From 67d1214551e800f9fe7dc7c47a346d2df0fafed5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jun 2012 11:07:19 +0400 Subject: merge task_work and rcu_head, get rid of separate allocation for keyring case task_work and rcu_head are identical now; merge them (calling the result struct callback_head, rcu_head #define'd to it), kill separate allocation in security/keys since we can just use cred->rcu now. Signed-off-by: Al Viro diff --git a/include/linux/sched.h b/include/linux/sched.h index b9216eb..af3555c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1405,7 +1405,7 @@ struct task_struct { int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; - void *task_works; + struct callback_head *task_works; struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL diff --git a/include/linux/task_work.h b/include/linux/task_work.h index 3b3e2c8..fb46b03 100644 --- a/include/linux/task_work.h +++ b/include/linux/task_work.h @@ -4,22 +4,16 @@ #include #include -struct task_work; -typedef void (*task_work_func_t)(struct task_work *); - -struct task_work { - struct task_work *next; - task_work_func_t func; -}; +typedef void (*task_work_func_t)(struct callback_head *); static inline void -init_task_work(struct task_work *twork, task_work_func_t func) +init_task_work(struct callback_head *twork, task_work_func_t func) { twork->func = func; } -int task_work_add(struct task_struct *task, struct task_work *twork, bool); -struct task_work *task_work_cancel(struct task_struct *, task_work_func_t); +int task_work_add(struct task_struct *task, struct callback_head *twork, bool); +struct callback_head *task_work_cancel(struct task_struct *, task_work_func_t); void task_work_run(void); static inline void exit_task_work(struct task_struct *task) diff --git a/include/linux/types.h b/include/linux/types.h index 9c1bd53..bf0dd75 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -246,14 +246,15 @@ struct ustat { }; /** - * struct rcu_head - callback structure for use with RCU + * struct callback_head - callback structure for use with RCU and task_work * @next: next update requests in a list * @func: actual update function to call after the grace period. */ -struct rcu_head { - struct rcu_head *next; - void (*func)(struct rcu_head *head); +struct callback_head { + struct callback_head *next; + void (*func)(struct callback_head *head); }; +#define rcu_head callback_head #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index d1dd547..814c9ef 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -781,7 +781,7 @@ static void wake_threads_waitq(struct irq_desc *desc) wake_up(&desc->wait_for_threads); } -static void irq_thread_dtor(struct task_work *unused) +static void irq_thread_dtor(struct callback_head *unused) { struct task_struct *tsk = current; struct irq_desc *desc; @@ -813,7 +813,7 @@ static void irq_thread_dtor(struct task_work *unused) */ static int irq_thread(void *data) { - struct task_work on_exit_work; + struct callback_head on_exit_work; static const struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2, }; diff --git a/kernel/task_work.c b/kernel/task_work.c index 9b8948d..76266fb 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -3,7 +3,7 @@ #include int -task_work_add(struct task_struct *task, struct task_work *twork, bool notify) +task_work_add(struct task_struct *task, struct callback_head *twork, bool notify) { unsigned long flags; int err = -ESRCH; @@ -19,8 +19,8 @@ task_work_add(struct task_struct *task, struct task_work *twork, bool notify) */ raw_spin_lock_irqsave(&task->pi_lock, flags); if (likely(!(task->flags & PF_EXITING))) { - struct task_work *last = task->task_works; - struct task_work *first = last ? last->next : twork; + struct callback_head *last = task->task_works; + struct callback_head *first = last ? last->next : twork; twork->next = first; if (last) last->next = twork; @@ -35,16 +35,16 @@ task_work_add(struct task_struct *task, struct task_work *twork, bool notify) return err; } -struct task_work * +struct callback_head * task_work_cancel(struct task_struct *task, task_work_func_t func) { unsigned long flags; - struct task_work *last, *res = NULL; + struct callback_head *last, *res = NULL; raw_spin_lock_irqsave(&task->pi_lock, flags); last = task->task_works; if (last) { - struct task_work *q = last, *p = q->next; + struct callback_head *q = last, *p = q->next; while (1) { if (p->func == func) { q->next = p->next; @@ -66,7 +66,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func) void task_work_run(void) { struct task_struct *task = current; - struct task_work *p, *q; + struct callback_head *p, *q; raw_spin_lock_irq(&task->pi_lock); p = task->task_works; diff --git a/security/keys/internal.h b/security/keys/internal.h index b510a31..c246ba5 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -148,12 +148,8 @@ extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, #define KEY_LOOKUP_PARTIAL 0x02 #define KEY_LOOKUP_FOR_UNLINK 0x04 -struct kludge { /* this will die off very soon */ - struct task_work twork; - struct cred *cred; -}; extern long join_session_keyring(const char *name); -extern void key_change_session_keyring(struct task_work *twork); +extern void key_change_session_keyring(struct callback_head *twork); extern struct work_struct key_gc_work; extern unsigned key_gc_delay; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 26723ca..0291b3f 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1456,8 +1456,7 @@ long keyctl_session_to_parent(void) { struct task_struct *me, *parent; const struct cred *mycred, *pcred; - struct kludge *newwork; - struct task_work *oldwork; + struct callback_head *newwork, *oldwork; key_ref_t keyring_r; struct cred *cred; int ret; @@ -1467,20 +1466,17 @@ long keyctl_session_to_parent(void) return PTR_ERR(keyring_r); ret = -ENOMEM; - newwork = kmalloc(sizeof(struct kludge), GFP_KERNEL); - if (!newwork) - goto error_keyring; /* our parent is going to need a new cred struct, a new tgcred struct * and new security data, so we allocate them here to prevent ENOMEM in * our parent */ cred = cred_alloc_blank(); if (!cred) - goto error_newwork; + goto error_keyring; + newwork = &cred->rcu; cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r); - init_task_work(&newwork->twork, key_change_session_keyring); - newwork->cred = cred; + init_task_work(newwork, key_change_session_keyring); me = current; rcu_read_lock(); @@ -1529,24 +1525,18 @@ long keyctl_session_to_parent(void) /* the replacement session keyring is applied just prior to userspace * restarting */ - ret = task_work_add(parent, &newwork->twork, true); + ret = task_work_add(parent, newwork, true); if (!ret) newwork = NULL; unlock: write_unlock_irq(&tasklist_lock); rcu_read_unlock(); - if (oldwork) { - put_cred(container_of(oldwork, struct kludge, twork)->cred); - kfree(oldwork); - } - if (newwork) { - put_cred(newwork->cred); - kfree(newwork); - } + if (oldwork) + put_cred(container_of(oldwork, struct cred, rcu)); + if (newwork) + put_cred(cred); return ret; -error_newwork: - kfree(newwork); error_keyring: key_ref_put(keyring_r); return ret; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index c9b07c9..54339cf 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -834,13 +834,11 @@ error: * Replace a process's session keyring on behalf of one of its children when * the target process is about to resume userspace execution. */ -void key_change_session_keyring(struct task_work *twork) +void key_change_session_keyring(struct callback_head *twork) { const struct cred *old = current_cred(); - struct kludge *p = container_of(twork, struct kludge, twork); - struct cred *new = p->cred; + struct cred *new = container_of(twork, struct cred, rcu); - kfree(p); if (unlikely(current->flags & PF_EXITING)) { put_cred(new); return; -- cgit v0.10.2 From ed3e694d78cc75fa79bf29698631b146fd27aa35 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jun 2012 11:31:24 +0400 Subject: move exit_task_work() past exit_files() et.al. ... and get rid of PF_EXITING check in task_work_add(). Signed-off-by: Al Viro diff --git a/kernel/exit.c b/kernel/exit.c index 2f59cc3..d17f6c4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -953,14 +953,11 @@ void do_exit(long code) exit_signals(tsk); /* sets PF_EXITING */ /* * tsk->flags are checked in the futex code to protect against - * an exiting task cleaning up the robust pi futexes, and in - * task_work_add() to avoid the race with exit_task_work(). + * an exiting task cleaning up the robust pi futexes. */ smp_mb(); raw_spin_unlock_wait(&tsk->pi_lock); - exit_task_work(tsk); - if (unlikely(in_atomic())) printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n", current->comm, task_pid_nr(current), @@ -995,6 +992,7 @@ void do_exit(long code) exit_shm(tsk); exit_files(tsk); exit_fs(tsk); + exit_task_work(tsk); check_stack_usage(); exit_thread(); diff --git a/kernel/task_work.c b/kernel/task_work.c index 76266fb..fb39608 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -5,34 +5,26 @@ int task_work_add(struct task_struct *task, struct callback_head *twork, bool notify) { + struct callback_head *last, *first; unsigned long flags; - int err = -ESRCH; -#ifndef TIF_NOTIFY_RESUME - if (notify) - return -ENOTSUPP; -#endif /* - * We must not insert the new work if the task has already passed - * exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait() - * and check PF_EXITING under pi_lock. + * Not inserting the new work if the task has already passed + * exit_task_work() is the responisbility of callers. */ raw_spin_lock_irqsave(&task->pi_lock, flags); - if (likely(!(task->flags & PF_EXITING))) { - struct callback_head *last = task->task_works; - struct callback_head *first = last ? last->next : twork; - twork->next = first; - if (last) - last->next = twork; - task->task_works = twork; - err = 0; - } + last = task->task_works; + first = last ? last->next : twork; + twork->next = first; + if (last) + last->next = twork; + task->task_works = twork; raw_spin_unlock_irqrestore(&task->pi_lock, flags); /* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */ - if (likely(!err) && notify) + if (notify) set_notify_resume(task); - return err; + return 0; } struct callback_head * -- cgit v0.10.2 From a2d4c71d1559426155e5da8db3265bfa0d8d398d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Jun 2012 11:33:29 +0400 Subject: deal with task_work callbacks adding more work It doesn't matter on normal return to userland path (we'll recheck the NOTIFY_RESUME flag anyway), but in case of exit_task_work() we'll need that as soon as we get callbacks capable of triggering more task_work_add(). Signed-off-by: Al Viro diff --git a/kernel/task_work.c b/kernel/task_work.c index fb39608..91d4e17 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -60,19 +60,21 @@ void task_work_run(void) struct task_struct *task = current; struct callback_head *p, *q; - raw_spin_lock_irq(&task->pi_lock); - p = task->task_works; - task->task_works = NULL; - raw_spin_unlock_irq(&task->pi_lock); + while (1) { + raw_spin_lock_irq(&task->pi_lock); + p = task->task_works; + task->task_works = NULL; + raw_spin_unlock_irq(&task->pi_lock); - if (unlikely(!p)) - return; + if (unlikely(!p)) + return; - q = p->next; /* head */ - p->next = NULL; /* cut it */ - while (q) { - p = q->next; - q->func(q); - q = p; + q = p->next; /* head */ + p->next = NULL; /* cut it */ + while (q) { + p = q->next; + q->func(q); + q = p; + } } } -- cgit v0.10.2 From 4a9d4b024a3102fc083c925c242d98ac27b1c5f6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Jun 2012 09:56:45 +0400 Subject: switch fput to task_work_add ... and schedule_work() for interrupt/kernel_thread callers (and yes, now it *is* OK to call from interrupt). We are guaranteed that __fput() will be done before we return to userland (or exit). Note that for fput() from a kernel thread we get an async behaviour; it's almost always OK, but sometimes you might need to have __fput() completed before you do anything else. There are two mechanisms for that - a general barrier (flush_delayed_fput()) and explicit __fput_sync(). Both should be used with care (as was the case for fput() from kernel threads all along). See comments in fs/file_table.c for details. Signed-off-by: Al Viro diff --git a/fs/file_table.c b/fs/file_table.c index 9ace278..b3fc4d6 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include #include @@ -251,7 +253,6 @@ static void __fput(struct file *file) } fops_put(file->f_op); put_pid(file->f_owner.pid); - file_sb_list_del(file); if ((file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_dec(inode); if (file->f_mode & FMODE_WRITE) @@ -263,10 +264,77 @@ static void __fput(struct file *file) mntput(mnt); } +static DEFINE_SPINLOCK(delayed_fput_lock); +static LIST_HEAD(delayed_fput_list); +static void delayed_fput(struct work_struct *unused) +{ + LIST_HEAD(head); + spin_lock_irq(&delayed_fput_lock); + list_splice_init(&delayed_fput_list, &head); + spin_unlock_irq(&delayed_fput_lock); + while (!list_empty(&head)) { + struct file *f = list_first_entry(&head, struct file, f_u.fu_list); + list_del_init(&f->f_u.fu_list); + __fput(f); + } +} + +static void ____fput(struct callback_head *work) +{ + __fput(container_of(work, struct file, f_u.fu_rcuhead)); +} + +/* + * If kernel thread really needs to have the final fput() it has done + * to complete, call this. The only user right now is the boot - we + * *do* need to make sure our writes to binaries on initramfs has + * not left us with opened struct file waiting for __fput() - execve() + * won't work without that. Please, don't add more callers without + * very good reasons; in particular, never call that with locks + * held and never call that from a thread that might need to do + * some work on any kind of umount. + */ +void flush_delayed_fput(void) +{ + delayed_fput(NULL); +} + +static DECLARE_WORK(delayed_fput_work, delayed_fput); + void fput(struct file *file) { - if (atomic_long_dec_and_test(&file->f_count)) + if (atomic_long_dec_and_test(&file->f_count)) { + struct task_struct *task = current; + file_sb_list_del(file); + if (unlikely(in_interrupt() || task->flags & PF_KTHREAD)) { + unsigned long flags; + spin_lock_irqsave(&delayed_fput_lock, flags); + list_add(&file->f_u.fu_list, &delayed_fput_list); + schedule_work(&delayed_fput_work); + spin_unlock_irqrestore(&delayed_fput_lock, flags); + return; + } + init_task_work(&file->f_u.fu_rcuhead, ____fput); + task_work_add(task, &file->f_u.fu_rcuhead, true); + } +} + +/* + * synchronous analog of fput(); for kernel threads that might be needed + * in some umount() (and thus can't use flush_delayed_fput() without + * risking deadlocks), need to wait for completion of __fput() and know + * for this specific struct file it won't involve anything that would + * need them. Use only if you really need it - at the very least, + * don't blindly convert fput() by kernel thread to that. + */ +void __fput_sync(struct file *file) +{ + if (atomic_long_dec_and_test(&file->f_count)) { + struct task_struct *task = current; + file_sb_list_del(file); + BUG_ON(!(task->flags & PF_KTHREAD)); __fput(file); + } } EXPORT_SYMBOL(fput); diff --git a/include/linux/file.h b/include/linux/file.h index 58bf158..a22408b 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -39,4 +39,7 @@ extern void put_unused_fd(unsigned int fd); extern void fd_install(unsigned int fd, struct file *file); +extern void flush_delayed_fput(void); +extern void __fput_sync(struct file *); + #endif /* __LINUX_FILE_H */ diff --git a/init/main.c b/init/main.c index b5cc0a7..3f151f6 100644 --- a/init/main.c +++ b/init/main.c @@ -68,6 +68,7 @@ #include #include #include +#include #include #include @@ -804,8 +805,8 @@ static noinline int init_post(void) system_state = SYSTEM_RUNNING; numa_default_policy(); - current->signal->flags |= SIGNAL_UNKILLABLE; + flush_delayed_fput(); if (ramdisk_execute_command) { run_init_process(ramdisk_execute_command); -- cgit v0.10.2 From 3ffa3c0e3f6e62f67fc2346ca60161dfb030083d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Jun 2012 10:00:10 +0400 Subject: aio: now fput() is OK from interrupt context; get rid of manual delayed __fput() Signed-off-by: Al Viro diff --git a/fs/aio.c b/fs/aio.c index 55c4c76..71f613c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -56,13 +56,6 @@ static struct kmem_cache *kioctx_cachep; static struct workqueue_struct *aio_wq; -/* Used for rare fput completion. */ -static void aio_fput_routine(struct work_struct *); -static DECLARE_WORK(fput_work, aio_fput_routine); - -static DEFINE_SPINLOCK(fput_lock); -static LIST_HEAD(fput_head); - static void aio_kick_handler(struct work_struct *); static void aio_queue_work(struct kioctx *); @@ -479,7 +472,6 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch) { unsigned short allocated, to_alloc; long avail; - bool called_fput = false; struct kiocb *req, *n; struct aio_ring *ring; @@ -495,28 +487,11 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch) if (allocated == 0) goto out; -retry: spin_lock_irq(&ctx->ctx_lock); ring = kmap_atomic(ctx->ring_info.ring_pages[0]); avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active; BUG_ON(avail < 0); - if (avail == 0 && !called_fput) { - /* - * Handle a potential starvation case. It is possible that - * we hold the last reference on a struct file, causing us - * to delay the final fput to non-irq context. In this case, - * ctx->reqs_active is artificially high. Calling the fput - * routine here may free up a slot in the event completion - * ring, allowing this allocation to succeed. - */ - kunmap_atomic(ring); - spin_unlock_irq(&ctx->ctx_lock); - aio_fput_routine(NULL); - called_fput = true; - goto retry; - } - if (avail < allocated) { /* Trim back the number of requests. */ list_for_each_entry_safe(req, n, &batch->head, ki_batch) { @@ -570,36 +545,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req) wake_up_all(&ctx->wait); } -static void aio_fput_routine(struct work_struct *data) -{ - spin_lock_irq(&fput_lock); - while (likely(!list_empty(&fput_head))) { - struct kiocb *req = list_kiocb(fput_head.next); - struct kioctx *ctx = req->ki_ctx; - - list_del(&req->ki_list); - spin_unlock_irq(&fput_lock); - - /* Complete the fput(s) */ - if (req->ki_filp != NULL) - fput(req->ki_filp); - - /* Link the iocb into the context's free list */ - rcu_read_lock(); - spin_lock_irq(&ctx->ctx_lock); - really_put_req(ctx, req); - /* - * at that point ctx might've been killed, but actual - * freeing is RCU'd - */ - spin_unlock_irq(&ctx->ctx_lock); - rcu_read_unlock(); - - spin_lock_irq(&fput_lock); - } - spin_unlock_irq(&fput_lock); -} - /* __aio_put_req * Returns true if this put was the last user of the request. */ @@ -618,21 +563,9 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req) req->ki_cancel = NULL; req->ki_retry = NULL; - /* - * Try to optimize the aio and eventfd file* puts, by avoiding to - * schedule work in case it is not final fput() time. In normal cases, - * we would not be holding the last reference to the file*, so - * this function will be executed w/out any aio kthread wakeup. - */ - if (unlikely(!fput_atomic(req->ki_filp))) { - spin_lock(&fput_lock); - list_add(&req->ki_list, &fput_head); - spin_unlock(&fput_lock); - schedule_work(&fput_work); - } else { - req->ki_filp = NULL; - really_put_req(ctx, req); - } + fput(req->ki_filp); + req->ki_filp = NULL; + really_put_req(ctx, req); return 1; } -- cgit v0.10.2 From 6120d3dbb1220792ebea88cd475e1ec8f8620a93 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Jun 2012 10:03:05 +0400 Subject: get rid of ->scm_work_list recursion in __scm_destroy() will be cut by delaying final fput() Signed-off-by: Al Viro diff --git a/include/linux/sched.h b/include/linux/sched.h index af3555c..598ba2d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1546,7 +1546,6 @@ struct task_struct { unsigned long timer_slack_ns; unsigned long default_timer_slack_ns; - struct list_head *scm_work_list; #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack */ int curr_ret_stack; diff --git a/include/net/scm.h b/include/net/scm.h index d456f4c..079d788 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -13,7 +13,6 @@ #define SCM_MAX_FD 253 struct scm_fp_list { - struct list_head list; short count; short max; struct file *fp[SCM_MAX_FD]; diff --git a/net/core/scm.c b/net/core/scm.c index 611c5ef..8f6ccfd 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -109,25 +109,9 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - if (current->scm_work_list) { - list_add_tail(&fpl->list, current->scm_work_list); - } else { - LIST_HEAD(work_list); - - current->scm_work_list = &work_list; - - list_add(&fpl->list, &work_list); - while (!list_empty(&work_list)) { - fpl = list_first_entry(&work_list, struct scm_fp_list, list); - - list_del(&fpl->list); - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); - } - - current->scm_work_list = NULL; - } + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); } } EXPORT_SYMBOL(__scm_destroy); -- cgit v0.10.2 From d35abdb28824cf74f0a106a0f9c6f3ff700a35bf Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Jun 2012 11:55:24 +0400 Subject: hold task_lock around checks in keyctl Signed-off-by: Al Viro diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 0291b3f..f1b59ae 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -1486,6 +1486,7 @@ long keyctl_session_to_parent(void) oldwork = NULL; parent = me->real_parent; + task_lock(parent); /* the parent mustn't be init and mustn't be a kernel thread */ if (parent->pid <= 1 || !parent->mm) goto unlock; @@ -1529,6 +1530,7 @@ long keyctl_session_to_parent(void) if (!ret) newwork = NULL; unlock: + task_unlock(parent); write_unlock_irq(&tasklist_lock); rcu_read_unlock(); if (oldwork) -- cgit v0.10.2 From 0a81861978deedfe9267d9fe905c756d3af3af38 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:26:28 +0300 Subject: hfsplus: make hfsplus_sync_fs static ... because it is used only in fs/hfsplus/super.c. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 4e75ac6..66a9365 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -428,7 +428,6 @@ int hfsplus_show_options(struct seq_file *, struct dentry *); /* super.c */ struct inode *hfsplus_iget(struct super_block *, unsigned long); -int hfsplus_sync_fs(struct super_block *sb, int wait); /* tables.c */ extern u16 hfsplus_case_fold_table[]; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index a9bca4b..5df771e 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -161,7 +161,7 @@ static void hfsplus_evict_inode(struct inode *inode) } } -int hfsplus_sync_fs(struct super_block *sb, int wait) +static int hfsplus_sync_fs(struct super_block *sb, int wait) { struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); struct hfsplus_vh *vhdr = sbi->s_vhdr; -- cgit v0.10.2 From b7a90e8043e7ab1922126e1c1c5c004b470f9e2a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:26:29 +0300 Subject: hfsplus: amend debugging print Print correct function name in the debugging print of the 'hfsplus_sync_fs()' function. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 5df771e..9e9c2788 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -171,7 +171,7 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) if (!wait) return 0; - dprint(DBG_SUPER, "hfsplus_write_super\n"); + dprint(DBG_SUPER, "hfsplus_sync_fs\n"); sb->s_dirt = 0; -- cgit v0.10.2 From 58770d7e83eede5fafbcdf54a604277d70010705 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:26:30 +0300 Subject: hfsplus: remove useless check This check is useless because we always have 'sb->s_fs_info' to be non-NULL. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 9e9c2788..f4f3d54 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -240,9 +240,6 @@ static void hfsplus_put_super(struct super_block *sb) dprint(DBG_SUPER, "hfsplus_put_super\n"); - if (!sb->s_fs_info) - return; - if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) { struct hfsplus_vh *vhdr = sbi->s_vhdr; -- cgit v0.10.2 From 9e6c5829b07c9ba6668807631914efc557fab059 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:26:31 +0300 Subject: hfsplus: get rid of write_super This patch makes hfsplus stop using the VFS '->write_super()' method along with the 's_dirt' superblock flag, because they are on their way out. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblocks using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds, even if there are no diry superblocks, or there are no client file-systems which would need this (e.g., btrfs does not use '->write_super()'). So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super()' VFS service, and then remove it together with the kernel thread. Tested using fsstress from the LTP project. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index 1cad80c..4cfbe2e 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c @@ -153,7 +153,7 @@ done: kunmap(page); *max = offset + (curr - pptr) * 32 + i - start; sbi->free_blocks -= *max; - sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(sb); dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); out: mutex_unlock(&sbi->alloc_mutex); @@ -228,7 +228,7 @@ out: set_page_dirty(page); kunmap(page); sbi->free_blocks += len; - sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(sb); mutex_unlock(&sbi->alloc_mutex); return 0; diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 378ea0c..6b9f921 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -316,7 +316,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); sbi->file_count++; - dst_dir->i_sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(dst_dir->i_sb); out: mutex_unlock(&sbi->vh_mutex); return res; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 66a9365..558dbb4 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -153,8 +153,11 @@ struct hfsplus_sb_info { gid_t gid; int part, session; - unsigned long flags; + + int work_queued; /* non-zero delayed work is queued */ + struct delayed_work sync_work; /* FS sync delayed work */ + spinlock_t work_lock; /* protects sync_work and work_queued */ }; #define HFSPLUS_SB_WRITEBACKUP 0 @@ -428,6 +431,7 @@ int hfsplus_show_options(struct seq_file *, struct dentry *); /* super.c */ struct inode *hfsplus_iget(struct super_block *, unsigned long); +void hfsplus_mark_mdb_dirty(struct super_block *sb); /* tables.c */ extern u16 hfsplus_case_fold_table[]; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 7009265..3d8b4a6 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -431,7 +431,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode) sbi->file_count++; insert_inode_hash(inode); mark_inode_dirty(inode); - sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(sb); return inode; } @@ -442,7 +442,7 @@ void hfsplus_delete_inode(struct inode *inode) if (S_ISDIR(inode->i_mode)) { HFSPLUS_SB(sb)->folder_count--; - sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(sb); return; } HFSPLUS_SB(sb)->file_count--; @@ -455,7 +455,7 @@ void hfsplus_delete_inode(struct inode *inode) inode->i_size = 0; hfsplus_file_truncate(inode); } - sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(sb); } void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index f4f3d54..4733320 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -124,7 +124,7 @@ static int hfsplus_system_write_inode(struct inode *inode) if (fork->total_size != cpu_to_be64(inode->i_size)) { set_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags); - inode->i_sb->s_dirt = 1; + hfsplus_mark_mdb_dirty(inode->i_sb); } hfsplus_inode_write_fork(inode, fork); if (tree) @@ -173,8 +173,6 @@ static int hfsplus_sync_fs(struct super_block *sb, int wait) dprint(DBG_SUPER, "hfsplus_sync_fs\n"); - sb->s_dirt = 0; - /* * Explicitly write out the special metadata inodes. * @@ -226,12 +224,34 @@ out: return error; } -static void hfsplus_write_super(struct super_block *sb) +static void delayed_sync_fs(struct work_struct *work) { - if (!(sb->s_flags & MS_RDONLY)) - hfsplus_sync_fs(sb, 1); - else - sb->s_dirt = 0; + struct hfsplus_sb_info *sbi; + + sbi = container_of(work, struct hfsplus_sb_info, sync_work.work); + + spin_lock(&sbi->work_lock); + sbi->work_queued = 0; + spin_unlock(&sbi->work_lock); + + hfsplus_sync_fs(sbi->alloc_file->i_sb, 1); +} + +void hfsplus_mark_mdb_dirty(struct super_block *sb) +{ + struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); + unsigned long delay; + + if (sb->s_flags & MS_RDONLY) + return; + + spin_lock(&sbi->work_lock); + if (!sbi->work_queued) { + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + queue_delayed_work(system_long_wq, &sbi->sync_work, delay); + sbi->work_queued = 1; + } + spin_unlock(&sbi->work_lock); } static void hfsplus_put_super(struct super_block *sb) @@ -240,6 +260,8 @@ static void hfsplus_put_super(struct super_block *sb) dprint(DBG_SUPER, "hfsplus_put_super\n"); + cancel_delayed_work_sync(&sbi->sync_work); + if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) { struct hfsplus_vh *vhdr = sbi->s_vhdr; @@ -325,7 +347,6 @@ static const struct super_operations hfsplus_sops = { .write_inode = hfsplus_write_inode, .evict_inode = hfsplus_evict_inode, .put_super = hfsplus_put_super, - .write_super = hfsplus_write_super, .sync_fs = hfsplus_sync_fs, .statfs = hfsplus_statfs, .remount_fs = hfsplus_remount, @@ -352,6 +373,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) sb->s_fs_info = sbi; mutex_init(&sbi->alloc_mutex); mutex_init(&sbi->vh_mutex); + spin_lock_init(&sbi->work_lock); + INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); hfsplus_fill_defaults(sbi); err = -EINVAL; -- cgit v0.10.2 From 715189d836ab276b3d0fc114681f12b423686ffa Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:28:44 +0300 Subject: hfs: push lock_super down HFS uses 'lock_super()'/'unlock_super()' around 'hfs_mdb_commit()' in order to serialize MDB (Master Directory Block) changes. Push it down to 'hfs_mdb_commit()' in order to simplify the code a bit. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 451c972..f2deefd 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -645,11 +645,9 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, /* sync the superblock to buffers */ sb = inode->i_sb; if (sb->s_dirt) { - lock_super(sb); sb->s_dirt = 0; if (!(sb->s_flags & MS_RDONLY)) hfs_mdb_commit(sb); - unlock_super(sb); } /* .. finally sync the buffers to disk */ err = sync_blockdev(sb->s_bdev); diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 1563d5c..3f558d58 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -260,6 +260,7 @@ void hfs_mdb_commit(struct super_block *sb) { struct hfs_mdb *mdb = HFS_SB(sb)->mdb; + lock_super(sb); if (test_and_clear_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags)) { /* These parameters may have been modified, so write them back */ mdb->drLsMod = hfs_mtime(); @@ -317,6 +318,7 @@ void hfs_mdb_commit(struct super_block *sb) size -= len; } } + unlock_super(sb); } void hfs_mdb_close(struct super_block *sb) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 7b4c537..f7c06bb 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -50,21 +50,17 @@ MODULE_LICENSE("GPL"); */ static void hfs_write_super(struct super_block *sb) { - lock_super(sb); sb->s_dirt = 0; /* sync everything to the buffers */ if (!(sb->s_flags & MS_RDONLY)) hfs_mdb_commit(sb); - unlock_super(sb); } static int hfs_sync_fs(struct super_block *sb, int wait) { - lock_super(sb); hfs_mdb_commit(sb); sb->s_dirt = 0; - unlock_super(sb); return 0; } -- cgit v0.10.2 From b59352359d6555aa8537d74ac9b15c1c6bcf3c68 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:28:45 +0300 Subject: hfs: get rid of lock_super Stop using lock_super for serializing the MDB changes - use the buffer-head own lock instead. Tested with fsstress. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 3f558d58..7a32240 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -260,7 +260,7 @@ void hfs_mdb_commit(struct super_block *sb) { struct hfs_mdb *mdb = HFS_SB(sb)->mdb; - lock_super(sb); + lock_buffer(HFS_SB(sb)->mdb_bh); if (test_and_clear_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags)) { /* These parameters may have been modified, so write them back */ mdb->drLsMod = hfs_mtime(); @@ -284,9 +284,13 @@ void hfs_mdb_commit(struct super_block *sb) &mdb->drXTFlSize, NULL); hfs_inode_write_fork(HFS_SB(sb)->cat_tree->inode, mdb->drCTExtRec, &mdb->drCTFlSize, NULL); + + lock_buffer(HFS_SB(sb)->alt_mdb_bh); memcpy(HFS_SB(sb)->alt_mdb, HFS_SB(sb)->mdb, HFS_SECTOR_SIZE); HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT); HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT); + unlock_buffer(HFS_SB(sb)->alt_mdb_bh); + mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh); sync_dirty_buffer(HFS_SB(sb)->alt_mdb_bh); } @@ -309,7 +313,11 @@ void hfs_mdb_commit(struct super_block *sb) break; } len = min((int)sb->s_blocksize - off, size); + + lock_buffer(bh); memcpy(bh->b_data + off, ptr, len); + unlock_buffer(bh); + mark_buffer_dirty(bh); brelse(bh); block++; @@ -318,7 +326,7 @@ void hfs_mdb_commit(struct super_block *sb) size -= len; } } - unlock_super(sb); + unlock_buffer(HFS_SB(sb)->mdb_bh); } void hfs_mdb_close(struct super_block *sb) -- cgit v0.10.2 From a3742d4828caeffab4cb075b562a4ed92d3e74d6 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:28:46 +0300 Subject: hfs: remove extra mdb write on unmount HFS calls 'hfs_write_super()' from 'hfs_put_super()' in order to write the MDB to the media. However, it is not needed because VFS calls '->sync_fs()' before calling '->put_super()' - so by the time we are in 'hfs_write_super()', the MDB is already synchronized. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/hfs/super.c b/fs/hfs/super.c index f7c06bb..47e4119 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -74,8 +74,6 @@ static int hfs_sync_fs(struct super_block *sb, int wait) */ static void hfs_put_super(struct super_block *sb) { - if (sb->s_dirt) - hfs_write_super(sb); hfs_mdb_close(sb); /* release the MDB's resources */ hfs_mdb_put(sb); -- cgit v0.10.2 From 4527440d5db8ff27ae8801de3f819843a1e6c6f6 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:28:47 +0300 Subject: hfs: simplify a bit checking for R/O We have the following pattern in 2 places in HFS if (!RDONLY) hfs_mdb_commit(); This patch pushes the RDONLY check down to 'hfs_mdb_commit()'. This will make the following patches a bit simpler. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index f2deefd..90c1ccb 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -646,8 +646,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, sb = inode->i_sb; if (sb->s_dirt) { sb->s_dirt = 0; - if (!(sb->s_flags & MS_RDONLY)) - hfs_mdb_commit(sb); + hfs_mdb_commit(sb); } /* .. finally sync the buffers to disk */ err = sync_blockdev(sb->s_bdev); diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 7a32240..5fd51a5 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c @@ -260,6 +260,9 @@ void hfs_mdb_commit(struct super_block *sb) { struct hfs_mdb *mdb = HFS_SB(sb)->mdb; + if (sb->s_flags & MS_RDONLY) + return; + lock_buffer(HFS_SB(sb)->mdb_bh); if (test_and_clear_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags)) { /* These parameters may have been modified, so write them back */ diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 47e4119..0730135 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -53,8 +53,7 @@ static void hfs_write_super(struct super_block *sb) sb->s_dirt = 0; /* sync everything to the buffers */ - if (!(sb->s_flags & MS_RDONLY)) - hfs_mdb_commit(sb); + hfs_mdb_commit(sb); } static int hfs_sync_fs(struct super_block *sb, int wait) -- cgit v0.10.2 From b16ca626358cbf056b752eab63ba8f20087afeaf Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:28:48 +0300 Subject: hfs: introduce VFS superblock object back-reference Add an 'sb' VFS superblock back-reference to the 'struct hfs_sb_info' data structure - we will need to find the VFS superblock from a 'struct hfs_sb_info' object in the next patch, so this change is jut a preparation. Remove few useless newlines while on it. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 1bf967c..3515612 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -137,16 +137,12 @@ struct hfs_sb_info { gid_t s_gid; /* The gid of all files */ int session, part; - struct nls_table *nls_io, *nls_disk; - struct mutex bitmap_lock; - unsigned long flags; - u16 blockoffset; - int fs_div; + struct super_block *sb; }; #define HFS_FLG_BITMAP_DIRTY 0 diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 0730135..99c6239 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -380,6 +380,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) if (!sbi) return -ENOMEM; + sbi->sb = sb; sb->s_fs_info = sbi; res = -EINVAL; -- cgit v0.10.2 From 5687b5780e90278a62d4cd916a3632087066f59d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 17:28:49 +0300 Subject: hfs: get rid of hfs_sync_super This patch makes hfs stop using the VFS '->write_super()' method along with the 's_dirt' superblock flag, because they are on their way out. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblocks using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds, even if there are no diry superblocks, or there are no client file-systems which would need this (e.g., btrfs does not use '->write_super()'). So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super()' VFS service, and then remove it together with the kernel thread. Tested using fsstress from the LTP project. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/hfs/extent.c b/fs/hfs/extent.c index 2c16316..a67955a 100644 --- a/fs/hfs/extent.c +++ b/fs/hfs/extent.c @@ -432,7 +432,7 @@ out: if (inode->i_ino < HFS_FIRSTUSER_CNID) set_bit(HFS_FLG_ALT_MDB_DIRTY, &HFS_SB(sb)->flags); set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); - sb->s_dirt = 1; + hfs_mark_mdb_dirty(sb); } return res; diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 3515612..8275175 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -143,6 +144,9 @@ struct hfs_sb_info { u16 blockoffset; int fs_div; struct super_block *sb; + int work_queued; /* non-zero delayed work is queued */ + struct delayed_work mdb_work; /* MDB flush delayed work */ + spinlock_t work_lock; /* protects mdb_work and work_queued */ }; #define HFS_FLG_BITMAP_DIRTY 0 @@ -222,6 +226,9 @@ extern int hfs_compare_dentry(const struct dentry *parent, extern void hfs_asc2mac(struct super_block *, struct hfs_name *, struct qstr *); extern int hfs_mac2asc(struct super_block *, char *, const struct hfs_name *); +/* super.c */ +extern void hfs_mark_mdb_dirty(struct super_block *sb); + extern struct timezone sys_tz; /* @@ -249,7 +256,7 @@ static inline const char *hfs_mdb_name(struct super_block *sb) static inline void hfs_bitmap_dirty(struct super_block *sb) { set_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags); - sb->s_dirt = 1; + hfs_mark_mdb_dirty(sb); } #define sb_bread512(sb, sec, data) ({ \ diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 90c1ccb..ee1bc55 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -220,7 +220,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, umode_t mode) insert_inode_hash(inode); mark_inode_dirty(inode); set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); - sb->s_dirt = 1; + hfs_mark_mdb_dirty(sb); return inode; } @@ -235,7 +235,7 @@ void hfs_delete_inode(struct inode *inode) if (HFS_I(inode)->cat_key.ParID == cpu_to_be32(HFS_ROOT_CNID)) HFS_SB(sb)->root_dirs--; set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); - sb->s_dirt = 1; + hfs_mark_mdb_dirty(sb); return; } HFS_SB(sb)->file_count--; @@ -248,7 +248,7 @@ void hfs_delete_inode(struct inode *inode) } } set_bit(HFS_FLG_MDB_DIRTY, &HFS_SB(sb)->flags); - sb->s_dirt = 1; + hfs_mark_mdb_dirty(sb); } void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext, @@ -644,10 +644,7 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, /* sync the superblock to buffers */ sb = inode->i_sb; - if (sb->s_dirt) { - sb->s_dirt = 0; - hfs_mdb_commit(sb); - } + flush_delayed_work_sync(&HFS_SB(sb)->mdb_work); /* .. finally sync the buffers to disk */ err = sync_blockdev(sb->s_bdev); if (!ret) diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 99c6239..4eb873e 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -29,38 +29,9 @@ static struct kmem_cache *hfs_inode_cachep; MODULE_LICENSE("GPL"); -/* - * hfs_write_super() - * - * Description: - * This function is called by the VFS only. When the filesystem - * is mounted r/w it updates the MDB on disk. - * Input Variable(s): - * struct super_block *sb: Pointer to the hfs superblock - * Output Variable(s): - * NONE - * Returns: - * void - * Preconditions: - * 'sb' points to a "valid" (struct super_block). - * Postconditions: - * The MDB is marked 'unsuccessfully unmounted' by clearing bit 8 of drAtrb - * (hfs_put_super() must set this flag!). Some MDB fields are updated - * and the MDB buffer is written to disk by calling hfs_mdb_commit(). - */ -static void hfs_write_super(struct super_block *sb) -{ - sb->s_dirt = 0; - - /* sync everything to the buffers */ - hfs_mdb_commit(sb); -} - static int hfs_sync_fs(struct super_block *sb, int wait) { hfs_mdb_commit(sb); - sb->s_dirt = 0; - return 0; } @@ -73,11 +44,44 @@ static int hfs_sync_fs(struct super_block *sb, int wait) */ static void hfs_put_super(struct super_block *sb) { + cancel_delayed_work_sync(&HFS_SB(sb)->mdb_work); hfs_mdb_close(sb); /* release the MDB's resources */ hfs_mdb_put(sb); } +static void flush_mdb(struct work_struct *work) +{ + struct hfs_sb_info *sbi; + struct super_block *sb; + + sbi = container_of(work, struct hfs_sb_info, mdb_work.work); + sb = sbi->sb; + + spin_lock(&sbi->work_lock); + sbi->work_queued = 0; + spin_unlock(&sbi->work_lock); + + hfs_mdb_commit(sb); +} + +void hfs_mark_mdb_dirty(struct super_block *sb) +{ + struct hfs_sb_info *sbi = HFS_SB(sb); + unsigned long delay; + + if (sb->s_flags & MS_RDONLY) + return; + + spin_lock(&sbi->work_lock); + if (!sbi->work_queued) { + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + queue_delayed_work(system_long_wq, &sbi->mdb_work, delay); + sbi->work_queued = 1; + } + spin_unlock(&sbi->work_lock); +} + /* * hfs_statfs() * @@ -177,7 +181,6 @@ static const struct super_operations hfs_super_operations = { .write_inode = hfs_write_inode, .evict_inode = hfs_evict_inode, .put_super = hfs_put_super, - .write_super = hfs_write_super, .sync_fs = hfs_sync_fs, .statfs = hfs_statfs, .remount_fs = hfs_remount, @@ -382,6 +385,8 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) sbi->sb = sb; sb->s_fs_info = sbi; + spin_lock_init(&sbi->work_lock); + INIT_DELAYED_WORK(&sbi->mdb_work, flush_mdb); res = -EINVAL; if (!parse_options((char *)data, sbi)) { -- cgit v0.10.2 From a4d05d315a4fdf5ccb0dbf0ce38bac12d522d33e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 3 Jul 2012 16:43:26 +0300 Subject: fs/sysv: remove useless write_super call We do not need to call 'sysv_write_super()' from 'sysv_put_super()', because VFS has called 'sysv_sync_fs()' before calling '->put_super()'. So remove it. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 08d0b25..af13d13 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -81,9 +81,6 @@ static void sysv_put_super(struct super_block *sb) { struct sysv_sb_info *sbi = SYSV_SB(sb); - if (sb->s_dirt) - sysv_write_super(sb); - if (!(sb->s_flags & MS_RDONLY)) { /* XXX ext2 also updates the state here */ mark_buffer_dirty(sbi->s_bh1); -- cgit v0.10.2 From eee458936b52bd3a9ff0ff577313b637905fff08 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 3 Jul 2012 16:43:27 +0300 Subject: fs/sysv: remove another useless write_super call We do not need to call 'sysv_write_super()' from 'sysv_remount()', because VFS has called 'sysv_sync_fs()' before calling '->remount()'. So remove it. Remove also '(un)lock_super()' which obvioulsy is becoming useless in this function. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index af13d13..f20ffe3 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -68,12 +68,9 @@ static void sysv_write_super(struct super_block *sb) static int sysv_remount(struct super_block *sb, int *flags, char *data) { struct sysv_sb_info *sbi = SYSV_SB(sb); - lock_super(sb); + if (sbi->s_forced_ro) *flags |= MS_RDONLY; - if (*flags & MS_RDONLY) - sysv_write_super(sb); - unlock_super(sb); return 0; } -- cgit v0.10.2 From 9d46be294d12871adf4206f89168b14d27adb8b5 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 3 Jul 2012 16:43:28 +0300 Subject: fs/sysv: stop using write_super and s_dirt It does not look like sysv FS needs 'write_super()' at all, because all it does is a timestamp update. I cannot test this patch, because this file-system is so old and probably has not been used by anyone for years, so there are no tools to create it in Linux. But from the code I see that marking the superblock as dirty is basically marking the superblock buffers as drity and then setting the s_dirt flag. And when 'write_super()' is executed to handle the s_dirt flag, we just update the timestamp and again mark the superblock buffer as dirty. Seems pointless. It looks like we can update the timestamp more opprtunistically - on unmount or remount of sync, and nothing should change. Thus, this patch removes 'sysv_write_super()' and 's_dirt'. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index f20ffe3..80e1e2b 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -43,7 +43,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait) * then attach current time stamp. * But if the filesystem was marked clean, keep it clean. */ - sb->s_dirt = 0; old_time = fs32_to_cpu(sbi, *sbi->s_sb_time); if (sbi->s_type == FSTYPE_SYSV4) { if (*sbi->s_sb_state == cpu_to_fs32(sbi, 0x7c269d38 - old_time)) @@ -57,14 +56,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait) return 0; } -static void sysv_write_super(struct super_block *sb) -{ - if (!(sb->s_flags & MS_RDONLY)) - sysv_sync_fs(sb, 1); - else - sb->s_dirt = 0; -} - static int sysv_remount(struct super_block *sb, int *flags, char *data) { struct sysv_sb_info *sbi = SYSV_SB(sb); @@ -351,7 +342,6 @@ const struct super_operations sysv_sops = { .write_inode = sysv_write_inode, .evict_inode = sysv_evict_inode, .put_super = sysv_put_super, - .write_super = sysv_write_super, .sync_fs = sysv_sync_fs, .remount_fs = sysv_remount, .statfs = sysv_statfs, diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 11b0767..0bc35fd 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -117,7 +117,6 @@ static inline void dirty_sb(struct super_block *sb) mark_buffer_dirty(sbi->s_bh1); if (sbi->s_bh1 != sbi->s_bh2) mark_buffer_dirty(sbi->s_bh2); - sb->s_dirt = 1; } -- cgit v0.10.2 From 65e5e83f7d01a3790deb1ba2e0d887e715c43307 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 16:28:06 +0300 Subject: fs/ufs: remove extra superblock write on unmount UFS calls 'ufs_write_super()' from 'ufs_put_super()' in order to write the superblocks to the media. However, it is not needed because VFS calls '->sync_fs()' before calling '->put_super()' - so by the time we are in 'ufs_write_super()', the superblocks are already synchronized. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 302f340..ae91e0a 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1238,9 +1238,6 @@ static void ufs_put_super(struct super_block *sb) UFSD("ENTER\n"); - if (sb->s_dirt) - ufs_write_super(sb); - if (!(sb->s_flags & MS_RDONLY)) ufs_put_super_internal(sb); -- cgit v0.10.2 From 7bd54ef722e9938768f524677be0ac4985d8473a Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 16:28:07 +0300 Subject: fs/ufs: re-arrange the code a bit This patch does not do any functional changes. It only moves 3 functions in fs/ufs/super.c a little bit up in order to prepare for further changes where I'll need this new arrangement to avoid forward declarations. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/ufs/super.c b/fs/ufs/super.c index ae91e0a..ad56c6d 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -691,6 +691,64 @@ static void ufs_put_super_internal(struct super_block *sb) UFSD("EXIT\n"); } +static int ufs_sync_fs(struct super_block *sb, int wait) +{ + struct ufs_sb_private_info * uspi; + struct ufs_super_block_first * usb1; + struct ufs_super_block_third * usb3; + unsigned flags; + + lock_ufs(sb); + lock_super(sb); + + UFSD("ENTER\n"); + + flags = UFS_SB(sb)->s_flags; + uspi = UFS_SB(sb)->s_uspi; + usb1 = ubh_get_usb_first(uspi); + usb3 = ubh_get_usb_third(uspi); + + usb1->fs_time = cpu_to_fs32(sb, get_seconds()); + if ((flags & UFS_ST_MASK) == UFS_ST_SUN || + (flags & UFS_ST_MASK) == UFS_ST_SUNOS || + (flags & UFS_ST_MASK) == UFS_ST_SUNx86) + ufs_set_fs_state(sb, usb1, usb3, + UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); + ufs_put_cstotal(sb); + sb->s_dirt = 0; + + UFSD("EXIT\n"); + unlock_super(sb); + unlock_ufs(sb); + + return 0; +} + +static void ufs_write_super(struct super_block *sb) +{ + if (!(sb->s_flags & MS_RDONLY)) + ufs_sync_fs(sb, 1); + else + sb->s_dirt = 0; +} + +static void ufs_put_super(struct super_block *sb) +{ + struct ufs_sb_info * sbi = UFS_SB(sb); + + UFSD("ENTER\n"); + + if (!(sb->s_flags & MS_RDONLY)) + ufs_put_super_internal(sb); + + ubh_brelse_uspi (sbi->s_uspi); + kfree (sbi->s_uspi); + kfree (sbi); + sb->s_fs_info = NULL; + UFSD("EXIT\n"); + return; +} + static int ufs_fill_super(struct super_block *sb, void *data, int silent) { struct ufs_sb_info * sbi; @@ -1191,65 +1249,6 @@ failed_nomem: return -ENOMEM; } -static int ufs_sync_fs(struct super_block *sb, int wait) -{ - struct ufs_sb_private_info * uspi; - struct ufs_super_block_first * usb1; - struct ufs_super_block_third * usb3; - unsigned flags; - - lock_ufs(sb); - lock_super(sb); - - UFSD("ENTER\n"); - - flags = UFS_SB(sb)->s_flags; - uspi = UFS_SB(sb)->s_uspi; - usb1 = ubh_get_usb_first(uspi); - usb3 = ubh_get_usb_third(uspi); - - usb1->fs_time = cpu_to_fs32(sb, get_seconds()); - if ((flags & UFS_ST_MASK) == UFS_ST_SUN || - (flags & UFS_ST_MASK) == UFS_ST_SUNOS || - (flags & UFS_ST_MASK) == UFS_ST_SUNx86) - ufs_set_fs_state(sb, usb1, usb3, - UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); - ufs_put_cstotal(sb); - sb->s_dirt = 0; - - UFSD("EXIT\n"); - unlock_super(sb); - unlock_ufs(sb); - - return 0; -} - -static void ufs_write_super(struct super_block *sb) -{ - if (!(sb->s_flags & MS_RDONLY)) - ufs_sync_fs(sb, 1); - else - sb->s_dirt = 0; -} - -static void ufs_put_super(struct super_block *sb) -{ - struct ufs_sb_info * sbi = UFS_SB(sb); - - UFSD("ENTER\n"); - - if (!(sb->s_flags & MS_RDONLY)) - ufs_put_super_internal(sb); - - ubh_brelse_uspi (sbi->s_uspi); - kfree (sbi->s_uspi); - kfree (sbi); - sb->s_fs_info = NULL; - UFSD("EXIT\n"); - return; -} - - static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) { struct ufs_sb_private_info * uspi; -- cgit v0.10.2 From 9e9ad5f408889db6038a59b38ede29ff1ba9ef2f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 12 Jul 2012 16:28:08 +0300 Subject: fs/ufs: get rid of write_super This patch makes UFS stop using the VFS '->write_super()' method along with the 's_dirt' superblock flag, because they are on their way out. The way we implement this is that we schedule a delay job instead relying on 's_dirt' and '->write_super()'. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblocks using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds, even if there are no diry superblocks, or there are no client file-systems which would need this (e.g., btrfs does not use '->write_super()'). So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super()' VFS service, and then remove it together with the kernel thread. Tested using fsstress from the LTP project. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 42694e1..1b3e410 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -116,7 +116,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); unlock_super (sb); UFSD("EXIT\n"); @@ -214,7 +214,7 @@ do_more: goto do_more; } - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); unlock_super (sb); UFSD("EXIT\n"); return; @@ -557,7 +557,7 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); @@ -677,7 +677,7 @@ succed: ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); result += cgno * uspi->s_fpg; UFSD("EXIT3, result %llu\n", (unsigned long long)result); diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 4ec5c10..e84cbe2 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -116,7 +116,7 @@ void ufs_free_inode (struct inode * inode) if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); unlock_super (sb); UFSD("EXIT\n"); } @@ -288,7 +288,7 @@ cg_found: ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); if (sb->s_flags & MS_SYNCHRONOUS) ubh_sync_block(UCPI_UBH(ucpi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); inode->i_ino = cg * uspi->s_ipg + bit; inode_init_owner(inode, dir, mode); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index ad56c6d..444927e 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -302,7 +302,7 @@ void ufs_error (struct super_block * sb, const char * function, if (!(sb->s_flags & MS_RDONLY)) { usb1->fs_clean = UFS_FSBAD; ubh_mark_buffer_dirty(USPI_UBH(uspi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); sb->s_flags |= MS_RDONLY; } va_start (args, fmt); @@ -334,7 +334,7 @@ void ufs_panic (struct super_block * sb, const char * function, if (!(sb->s_flags & MS_RDONLY)) { usb1->fs_clean = UFS_FSBAD; ubh_mark_buffer_dirty(USPI_UBH(uspi)); - sb->s_dirt = 1; + ufs_mark_sb_dirty(sb); } va_start (args, fmt); vsnprintf (error_buf, sizeof(error_buf), fmt, args); @@ -715,7 +715,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait) ufs_set_fs_state(sb, usb1, usb3, UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); ufs_put_cstotal(sb); - sb->s_dirt = 0; UFSD("EXIT\n"); unlock_super(sb); @@ -724,12 +723,31 @@ static int ufs_sync_fs(struct super_block *sb, int wait) return 0; } -static void ufs_write_super(struct super_block *sb) +static void delayed_sync_fs(struct work_struct *work) { - if (!(sb->s_flags & MS_RDONLY)) - ufs_sync_fs(sb, 1); - else - sb->s_dirt = 0; + struct ufs_sb_info *sbi; + + sbi = container_of(work, struct ufs_sb_info, sync_work.work); + + spin_lock(&sbi->work_lock); + sbi->work_queued = 0; + spin_unlock(&sbi->work_lock); + + ufs_sync_fs(sbi->sb, 1); +} + +void ufs_mark_sb_dirty(struct super_block *sb) +{ + struct ufs_sb_info *sbi = UFS_SB(sb); + unsigned long delay; + + spin_lock(&sbi->work_lock); + if (!sbi->work_queued) { + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + queue_delayed_work(system_long_wq, &sbi->sync_work, delay); + sbi->work_queued = 1; + } + spin_unlock(&sbi->work_lock); } static void ufs_put_super(struct super_block *sb) @@ -740,6 +758,7 @@ static void ufs_put_super(struct super_block *sb) if (!(sb->s_flags & MS_RDONLY)) ufs_put_super_internal(sb); + cancel_delayed_work_sync(&sbi->sync_work); ubh_brelse_uspi (sbi->s_uspi); kfree (sbi->s_uspi); @@ -774,6 +793,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) if (!sbi) goto failed_nomem; sb->s_fs_info = sbi; + sbi->sb = sb; UFSD("flag %u\n", (int)(sb->s_flags & MS_RDONLY)); @@ -785,6 +805,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) } #endif mutex_init(&sbi->mutex); + spin_lock_init(&sbi->work_lock); + INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs); /* * Set default mount options * Parse mount options @@ -1304,7 +1326,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufs_set_fs_state(sb, usb1, usb3, UFS_FSOK - fs32_to_cpu(sb, usb1->fs_time)); ubh_mark_buffer_dirty (USPI_UBH(uspi)); - sb->s_dirt = 0; sb->s_flags |= MS_RDONLY; } else { /* @@ -1454,7 +1475,6 @@ static const struct super_operations ufs_super_ops = { .write_inode = ufs_write_inode, .evict_inode = ufs_evict_inode, .put_super = ufs_put_super, - .write_super = ufs_write_super, .sync_fs = ufs_sync_fs, .statfs = ufs_statfs, .remount_fs = ufs_remount, diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index 528750b..343e6fc 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -20,6 +20,10 @@ struct ufs_sb_info { unsigned s_mount_opt; struct mutex mutex; struct task_struct *mutex_owner; + struct super_block *sb; + int work_queued; /* non-zero if the delayed work is queued */ + struct delayed_work sync_work; /* FS sync delayed work */ + spinlock_t work_lock; /* protects sync_work and work_queued */ }; struct ufs_inode_info { @@ -123,6 +127,7 @@ extern __printf(3, 4) void ufs_error(struct super_block *, const char *, const char *, ...); extern __printf(3, 4) void ufs_panic(struct super_block *, const char *, const char *, ...); +void ufs_mark_sb_dirty(struct super_block *sb); /* symlink.c */ extern const struct inode_operations ufs_fast_symlink_inode_operations; diff --git a/fs/ufs/ufs_fs.h b/fs/ufs/ufs_fs.h index 8aba544..0cbd5d3 100644 --- a/fs/ufs/ufs_fs.h +++ b/fs/ufs/ufs_fs.h @@ -34,6 +34,7 @@ #include #include #include +#include #include typedef __u64 __bitwise __fs64; -- cgit v0.10.2 From 6eedc70150d55b5885800eb6664ea226dc2cb66f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:27 +0200 Subject: vfs: Move noop_backing_dev_info check from sync into writeback In principle, a filesystem may want to have ->sync_fs() called during sync(1) although it does not have a bdi (i.e. s_bdi is set to noop_backing_dev_info). Only writeback code really needs bdi set to something reasonable. So move the checks where they are more logical. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 41a3ccf..8f660dd 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1315,6 +1315,8 @@ void writeback_inodes_sb_nr(struct super_block *sb, .reason = reason, }; + if (sb->s_bdi == &noop_backing_dev_info) + return; WARN_ON(!rwsem_is_locked(&sb->s_umount)); bdi_queue_work(sb->s_bdi, &work); wait_for_completion(&done); @@ -1398,6 +1400,9 @@ void sync_inodes_sb(struct super_block *sb) .reason = WB_REASON_SYNC, }; + /* Nothing to do? */ + if (sb->s_bdi == &noop_backing_dev_info) + return; WARN_ON(!rwsem_is_locked(&sb->s_umount)); bdi_queue_work(sb->s_bdi, &work); diff --git a/fs/sync.c b/fs/sync.c index 11e3d1c..b3d2a00 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -29,13 +29,6 @@ */ static int __sync_filesystem(struct super_block *sb, int wait) { - /* - * This should be safe, as we require bdi backing to actually - * write out data in the first place - */ - if (sb->s_bdi == &noop_backing_dev_info) - return 0; - if (sb->s_qcop && sb->s_qcop->quota_sync) sb->s_qcop->quota_sync(sb, -1, wait); -- cgit v0.10.2 From ceed17236a7491b44ee2be21f56a41ab997cbe7d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:28 +0200 Subject: quota: Split dquot_quota_sync() to writeback and cache flushing part Split off part of dquot_quota_sync() which writes dquots into a quota file to a separate function. In the next patch we will use the function from filesystems and we do not want to abuse ->quota_sync quotactl callback more than necessary. Acked-by: Steven Whitehouse Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index b97178e..27b5cc7 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1108,7 +1108,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, } } -int gfs2_quota_sync(struct super_block *sb, int type, int wait) +int gfs2_quota_sync(struct super_block *sb, int type) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_quota_data **qda; @@ -1154,7 +1154,7 @@ int gfs2_quota_sync(struct super_block *sb, int type, int wait) static int gfs2_quota_sync_timeo(struct super_block *sb, int type) { - return gfs2_quota_sync(sb, type, 0); + return gfs2_quota_sync(sb, type); } int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id) diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 90bf1c3..f25d98b 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -26,7 +26,7 @@ extern int gfs2_quota_check(struct gfs2_inode *ip, u32 uid, u32 gid); extern void gfs2_quota_change(struct gfs2_inode *ip, s64 change, u32 uid, u32 gid); -extern int gfs2_quota_sync(struct super_block *sb, int type, int wait); +extern int gfs2_quota_sync(struct super_block *sb, int type); extern int gfs2_quota_refresh(struct gfs2_sbd *sdp, int user, u32 id); extern int gfs2_quota_init(struct gfs2_sbd *sdp); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 713e621..313c329 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -838,7 +838,7 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) int error; flush_workqueue(gfs2_delete_workqueue); - gfs2_quota_sync(sdp->sd_vfs, 0, 1); + gfs2_quota_sync(sdp->sd_vfs, 0); gfs2_statfs_sync(sdp->sd_vfs, 0); error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 9c2592b..73ecc34 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -168,7 +168,7 @@ static ssize_t quota_sync_store(struct gfs2_sbd *sdp, const char *buf, if (simple_strtol(buf, NULL, 0) != 1) return -EINVAL; - gfs2_quota_sync(sdp->sd_vfs, 0, 1); + gfs2_quota_sync(sdp->sd_vfs, 0); return len; } diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 10cbe84..d679fc4 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -595,12 +595,14 @@ out: } EXPORT_SYMBOL(dquot_scan_active); -int dquot_quota_sync(struct super_block *sb, int type, int wait) +/* Write all dquot structures to quota files */ +int dquot_writeback_dquots(struct super_block *sb, int type) { struct list_head *dirty; struct dquot *dquot; struct quota_info *dqopt = sb_dqopt(sb); int cnt; + int err, ret = 0; mutex_lock(&dqopt->dqonoff_mutex); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -624,7 +626,9 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait) atomic_inc(&dquot->dq_count); spin_unlock(&dq_list_lock); dqstats_inc(DQST_LOOKUPS); - sb->dq_op->write_dquot(dquot); + err = sb->dq_op->write_dquot(dquot); + if (!ret && err) + err = ret; dqput(dquot); spin_lock(&dq_list_lock); } @@ -638,7 +642,21 @@ int dquot_quota_sync(struct super_block *sb, int type, int wait) dqstats_inc(DQST_SYNCS); mutex_unlock(&dqopt->dqonoff_mutex); - if (!wait || (dqopt->flags & DQUOT_QUOTA_SYS_FILE)) + return ret; +} +EXPORT_SYMBOL(dquot_writeback_dquots); + +/* Write all dquot structures to disk and make them visible from userspace */ +int dquot_quota_sync(struct super_block *sb, int type) +{ + struct quota_info *dqopt = sb_dqopt(sb); + int cnt; + int ret; + + ret = dquot_writeback_dquots(sb, type); + if (ret) + return ret; + if (dqopt->flags & DQUOT_QUOTA_SYS_FILE) return 0; /* This is not very clever (and fast) but currently I don't know about diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 9a39120..c659f92 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -47,7 +47,7 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd, static void quota_sync_one(struct super_block *sb, void *arg) { if (sb->s_qcop && sb->s_qcop->quota_sync) - sb->s_qcop->quota_sync(sb, *(int *)arg, 1); + sb->s_qcop->quota_sync(sb, *(int *)arg); } static int quota_sync_all(int type) @@ -270,7 +270,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, case Q_SYNC: if (!sb->s_qcop->quota_sync) return -ENOSYS; - return sb->s_qcop->quota_sync(sb, type, 1); + return sb->s_qcop->quota_sync(sb, type); case Q_XQUOTAON: case Q_XQUOTAOFF: case Q_XQUOTARM: diff --git a/fs/sync.c b/fs/sync.c index b3d2a00..cae145d 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -30,7 +30,7 @@ static int __sync_filesystem(struct super_block *sb, int wait) { if (sb->s_qcop && sb->s_qcop->quota_sync) - sb->s_qcop->quota_sync(sb, -1, wait); + sb->s_qcop->quota_sync(sb, -1); if (wait) sync_inodes_sb(sb); diff --git a/include/linux/quota.h b/include/linux/quota.h index c09fa04..524ede8 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -333,7 +333,7 @@ struct quotactl_ops { int (*quota_on)(struct super_block *, int, int, struct path *); int (*quota_on_meta)(struct super_block *, int, int); int (*quota_off)(struct super_block *, int); - int (*quota_sync)(struct super_block *, int, int); + int (*quota_sync)(struct super_block *, int); int (*get_info)(struct super_block *, int, struct if_dqinfo *); int (*set_info)(struct super_block *, int, struct if_dqinfo *); int (*get_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *); diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 17b9773..ec6b65f 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -83,7 +83,8 @@ int dquot_quota_on(struct super_block *sb, int type, int format_id, int dquot_quota_on_mount(struct super_block *sb, char *qf_name, int format_id, int type); int dquot_quota_off(struct super_block *sb, int type); -int dquot_quota_sync(struct super_block *sb, int type, int wait); +int dquot_writeback_dquots(struct super_block *sb, int type); +int dquot_quota_sync(struct super_block *sb, int type); int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii); int dquot_get_dqblk(struct super_block *sb, int type, qid_t id, @@ -255,6 +256,11 @@ static inline int dquot_resume(struct super_block *sb, int type) #define dquot_file_open generic_file_open +static inline int dquot_writeback_dquots(struct super_block *sb, int type) +{ + return 0; +} + #endif /* CONFIG_QUOTA */ static inline int dquot_alloc_space_nodirty(struct inode *inode, qsize_t nr) -- cgit v0.10.2 From a1177825719ccef3f76ef39bbfd5ebb6087d53c7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:29 +0200 Subject: quota: Move quota syncing to ->sync_fs method Since the moment writes to quota files are using block device page cache and space for quota structures is reserved at the moment they are first accessed we have no reason to sync quota before inode writeback. In fact this order is now only harmful since quota information can easily change during inode writeback (either because conversion of delayed-allocated extents or simply because of allocation of new blocks for simple filesystems not using page_mkwrite). So move syncing of quota information after writeback of inodes into ->sync_fs method. This way we do not have to use ->quota_sync callback which is primarily intended for use by quotactl syscall anyway and we get rid of calling ->sync_fs() twice unnecessarily. We skip quota syncing for OCFS2 since it does proper quota journalling in all cases (unlike ext3, ext4, and reiserfs which also support legacy non-journalled quotas) and thus there are no dirty quota structures. CC: "Theodore Ts'o" CC: Joel Becker CC: reiserfs-devel@vger.kernel.org Acked-by: Steven Whitehouse Acked-by: Dave Kleikamp Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro diff --git a/fs/ext2/super.c b/fs/ext2/super.c index b3621cb..5df3d2d 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1184,6 +1184,12 @@ static int ext2_sync_fs(struct super_block *sb, int wait) struct ext2_sb_info *sbi = EXT2_SB(sb); struct ext2_super_block *es = EXT2_SB(sb)->s_es; + /* + * Write quota structures to quota file, sync_blockdev() will write + * them to disk later + */ + dquot_writeback_dquots(sb, -1); + spin_lock(&sbi->s_lock); if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) { ext2_debug("setting valid to 0\n"); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 8c3a44b..4ac304c 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2526,6 +2526,11 @@ static int ext3_sync_fs(struct super_block *sb, int wait) tid_t target; trace_ext3_sync_fs(sb, wait); + /* + * Writeback quota in non-journalled quota case - journalled quota has + * no dirty dquots + */ + dquot_writeback_dquots(sb, -1); if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { if (wait) log_wait_commit(EXT3_SB(sb)->s_journal, target); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index eb7aa3e..d875940 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4325,6 +4325,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait) trace_ext4_sync_fs(sb, wait); flush_workqueue(sbi->dio_unwritten_wq); + /* + * Writeback quota in non-journalled quota case - journalled quota has + * no dirty dquots + */ + dquot_writeback_dquots(sb, -1); if (jbd2_journal_start_commit(sbi->s_journal, &target)) { if (wait) jbd2_log_wait_commit(sbi->s_journal, target); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 313c329..f3d6bbf 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -952,6 +952,8 @@ restart: static int gfs2_sync_fs(struct super_block *sb, int wait) { struct gfs2_sbd *sdp = sb->s_fs_info; + + gfs2_quota_sync(sb, -1); if (wait && sdp) gfs2_log_flush(sdp, NULL); return 0; diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 4a82950..c55c745 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -601,6 +601,11 @@ static int jfs_sync_fs(struct super_block *sb, int wait) /* log == NULL indicates read-only mount */ if (log) { + /* + * Write quota structures to quota file, sync_blockdev() will + * write them to disk later + */ + dquot_writeback_dquots(sb, -1); jfs_flush_journal(log, wait); jfs_syncpt(log, 0); } diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 651ce76..7a37dab 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -68,6 +68,11 @@ static int reiserfs_sync_fs(struct super_block *s, int wait) { struct reiserfs_transaction_handle th; + /* + * Writeback quota in non-journalled quota case - journalled quota has + * no dirty dquots + */ + dquot_writeback_dquots(s, -1); reiserfs_write_lock(s); if (!journal_begin(&th, s, 1)) if (!journal_end_sync(&th, s, 1)) diff --git a/fs/sync.c b/fs/sync.c index cae145d..66acd2b 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -29,9 +29,6 @@ */ static int __sync_filesystem(struct super_block *sb, int wait) { - if (sb->s_qcop && sb->s_qcop->quota_sync) - sb->s_qcop->quota_sync(sb, -1); - if (wait) sync_inodes_sb(sb); else -- cgit v0.10.2 From b3de653105180b57af90ef2f5b8441f085f4ff56 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:30 +0200 Subject: vfs: Reorder operations during sys_sync Change the order of operations during sync from for_each_sb { writeback_inodes_sb(); sync_fs(nowait); __sync_blockdev(nowait); } for_each_sb { sync_inodes_sb(); sync_fs(wait); __sync_blockdev(wait); } to for_each_sb writeback_inodes_sb(); for_each_sb sync_fs(nowait); for_each_sb __sync_blockdev(nowait); for_each_sb sync_inodes_sb(); for_each_sb sync_fs(wait); for_each_sb __sync_blockdev(wait); This is a preparation for the following patches in this series. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro diff --git a/fs/sync.c b/fs/sync.c index 66acd2b..490e902 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -67,18 +67,28 @@ int sync_filesystem(struct super_block *sb) } EXPORT_SYMBOL_GPL(sync_filesystem); -static void sync_one_sb(struct super_block *sb, void *arg) +static void sync_inodes_one_sb(struct super_block *sb, void *arg) { if (!(sb->s_flags & MS_RDONLY)) - __sync_filesystem(sb, *(int *)arg); + sync_inodes_sb(sb); } -/* - * Sync all the data for all the filesystems (called by sys_sync() and - * emergency sync) - */ -static void sync_filesystems(int wait) + +static void writeback_inodes_one_sb(struct super_block *sb, void *arg) { - iterate_supers(sync_one_sb, &wait); + if (!(sb->s_flags & MS_RDONLY)) + writeback_inodes_sb(sb, WB_REASON_SYNC); +} + +static void sync_fs_one_sb(struct super_block *sb, void *arg) +{ + if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs) + sb->s_op->sync_fs(sb, *(int *)arg); +} + +static void sync_blkdev_one_sb(struct super_block *sb, void *arg) +{ + if (!(sb->s_flags & MS_RDONLY)) + __sync_blockdev(sb->s_bdev, *(int *)arg); } /* @@ -87,9 +97,15 @@ static void sync_filesystems(int wait) */ SYSCALL_DEFINE0(sync) { + int nowait = 0, wait = 1; + wakeup_flusher_threads(0, WB_REASON_SYNC); - sync_filesystems(0); - sync_filesystems(1); + iterate_supers(writeback_inodes_one_sb, NULL); + iterate_supers(sync_fs_one_sb, &nowait); + iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_supers(sync_inodes_one_sb, NULL); + iterate_supers(sync_fs_one_sb, &wait); + iterate_supers(sync_blkdev_one_sb, &wait); if (unlikely(laptop_mode)) laptop_sync_completion(); return 0; @@ -97,12 +113,18 @@ SYSCALL_DEFINE0(sync) static void do_sync_work(struct work_struct *work) { + int nowait = 0; + /* * Sync twice to reduce the possibility we skipped some inodes / pages * because they were temporarily locked */ - sync_filesystems(0); - sync_filesystems(0); + iterate_supers(sync_inodes_one_sb, &nowait); + iterate_supers(sync_fs_one_sb, &nowait); + iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_supers(sync_inodes_one_sb, &nowait); + iterate_supers(sync_fs_one_sb, &nowait); + iterate_supers(sync_blkdev_one_sb, &nowait); printk("Emergency Sync complete\n"); kfree(work); } -- cgit v0.10.2 From 5c0d6b60a0ba46d45020547eacf7199171920935 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:31 +0200 Subject: vfs: Create function for iterating over block devices Signed-off-by: Jan Kara Signed-off-by: Al Viro diff --git a/fs/block_dev.c b/fs/block_dev.c index c2bbe1f..1e51919 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1710,3 +1710,39 @@ int __invalidate_device(struct block_device *bdev, bool kill_dirty) return res; } EXPORT_SYMBOL(__invalidate_device); + +void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg) +{ + struct inode *inode, *old_inode = NULL; + + spin_lock(&inode_sb_list_lock); + list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) { + struct address_space *mapping = inode->i_mapping; + + spin_lock(&inode->i_lock); + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) || + mapping->nrpages == 0) { + spin_unlock(&inode->i_lock); + continue; + } + __iget(inode); + spin_unlock(&inode->i_lock); + spin_unlock(&inode_sb_list_lock); + /* + * We hold a reference to 'inode' so it couldn't have been + * removed from s_inodes list while we dropped the + * inode_sb_list_lock. We cannot iput the inode now as we can + * be holding the last reference and we cannot iput it under + * inode_sb_list_lock. So we keep the reference and iput it + * later. + */ + iput(old_inode); + old_inode = inode; + + func(I_BDEV(inode), arg); + + spin_lock(&inode_sb_list_lock); + } + spin_unlock(&inode_sb_list_lock); + iput(old_inode); +} diff --git a/include/linux/fs.h b/include/linux/fs.h index 48548bd..6a6ca85 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2102,6 +2102,7 @@ extern sector_t blkdev_max_block(struct block_device *bdev); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); extern void invalidate_bdev(struct block_device *); +extern void iterate_bdevs(void (*)(struct block_device *, void *), void *); extern int sync_blockdev(struct block_device *bdev); extern void kill_bdev(struct block_device *); extern struct super_block *freeze_bdev(struct block_device *); @@ -2123,6 +2124,10 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) { return 0; } + +static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg) +{ +} #endif extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; -- cgit v0.10.2 From a8c7176b6ded413d5044a00f1d05477b95a6d7ad Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:32 +0200 Subject: vfs: Make sys_sync writeout also block device inodes In case block device does not have filesystem mounted on it, sys_sync will just ignore it and doesn't writeout its dirty pages. This is because writeback code avoids writing inodes from superblock without backing device and blockdev_superblock is such a superblock. Since it's unexpected that sync doesn't writeout dirty data for block devices be nice to users and change the behavior to do so. So now we iterate over all block devices on blockdev_super instead of iterating over all superblocks when syncing block devices. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Al Viro diff --git a/fs/sync.c b/fs/sync.c index 490e902..0b166f2 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -85,10 +85,14 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg) sb->s_op->sync_fs(sb, *(int *)arg); } -static void sync_blkdev_one_sb(struct super_block *sb, void *arg) +static void flush_one_bdev(struct block_device *bdev, void *arg) { - if (!(sb->s_flags & MS_RDONLY)) - __sync_blockdev(sb->s_bdev, *(int *)arg); + __sync_blockdev(bdev, 0); +} + +static void sync_one_bdev(struct block_device *bdev, void *arg) +{ + sync_blockdev(bdev); } /* @@ -102,10 +106,10 @@ SYSCALL_DEFINE0(sync) wakeup_flusher_threads(0, WB_REASON_SYNC); iterate_supers(writeback_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &nowait); - iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_bdevs(flush_one_bdev, NULL); iterate_supers(sync_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &wait); - iterate_supers(sync_blkdev_one_sb, &wait); + iterate_bdevs(sync_one_bdev, NULL); if (unlikely(laptop_mode)) laptop_sync_completion(); return 0; @@ -121,10 +125,10 @@ static void do_sync_work(struct work_struct *work) */ iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); - iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_bdevs(flush_one_bdev, NULL); iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); - iterate_supers(sync_blkdev_one_sb, &nowait); + iterate_bdevs(flush_one_bdev, NULL); printk("Emergency Sync complete\n"); kfree(work); } -- cgit v0.10.2 From d0e91b13eb34d449922124c34f8a05e498daa089 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:33 +0200 Subject: vfs: Remove unnecessary flushing of block devices It is not necessary to write block devices twice. The reason why we first did flush and then proper sync is that for_each_bdev() { write_bdev() wait_for_completion() } is much slower than for_each_bdev() write_bdev() for_each_bdev() wait_for_completion() when there is bigger amount of data. But as is seen in the above, there's no real need to scan pages and submit them twice. We just need to separate the submission and waiting part. This patch does that. Signed-off-by: Jan Kara Signed-off-by: Al Viro diff --git a/fs/sync.c b/fs/sync.c index 0b166f2..131ddae 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -85,14 +85,14 @@ static void sync_fs_one_sb(struct super_block *sb, void *arg) sb->s_op->sync_fs(sb, *(int *)arg); } -static void flush_one_bdev(struct block_device *bdev, void *arg) +static void fdatawrite_one_bdev(struct block_device *bdev, void *arg) { - __sync_blockdev(bdev, 0); + filemap_fdatawrite(bdev->bd_inode->i_mapping); } -static void sync_one_bdev(struct block_device *bdev, void *arg) +static void fdatawait_one_bdev(struct block_device *bdev, void *arg) { - sync_blockdev(bdev); + filemap_fdatawait(bdev->bd_inode->i_mapping); } /* @@ -106,10 +106,10 @@ SYSCALL_DEFINE0(sync) wakeup_flusher_threads(0, WB_REASON_SYNC); iterate_supers(writeback_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &nowait); - iterate_bdevs(flush_one_bdev, NULL); iterate_supers(sync_inodes_one_sb, NULL); iterate_supers(sync_fs_one_sb, &wait); - iterate_bdevs(sync_one_bdev, NULL); + iterate_bdevs(fdatawrite_one_bdev, NULL); + iterate_bdevs(fdatawait_one_bdev, NULL); if (unlikely(laptop_mode)) laptop_sync_completion(); return 0; @@ -125,10 +125,10 @@ static void do_sync_work(struct work_struct *work) */ iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); - iterate_bdevs(flush_one_bdev, NULL); + iterate_bdevs(fdatawrite_one_bdev, NULL); iterate_supers(sync_inodes_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &nowait); - iterate_bdevs(flush_one_bdev, NULL); + iterate_bdevs(fdatawrite_one_bdev, NULL); printk("Emergency Sync complete\n"); kfree(work); } -- cgit v0.10.2 From 4ea425b63a3dfeb7707fc7cc7161c11a51e871ed Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 3 Jul 2012 16:45:34 +0200 Subject: vfs: Avoid unnecessary WB_SYNC_NONE writeback during sys_sync and reorder sync passes wakeup_flusher_threads(0) will queue work doing complete writeback for each flusher thread. Thus there is not much point in submitting another work doing full inode WB_SYNC_NONE writeback by writeback_inodes_sb(). After this change it does not make sense to call nonblocking ->sync_fs and block device flush before calling sync_inodes_sb() because wakeup_flusher_threads() is completely asynchronous and thus these functions would be called in parallel with inode writeback running which will effectively void any work they do. So we move sync_inodes_sb() call before these two functions. Signed-off-by: Jan Kara Signed-off-by: Al Viro diff --git a/fs/sync.c b/fs/sync.c index 131ddae..eb8722d 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -73,12 +73,6 @@ static void sync_inodes_one_sb(struct super_block *sb, void *arg) sync_inodes_sb(sb); } -static void writeback_inodes_one_sb(struct super_block *sb, void *arg) -{ - if (!(sb->s_flags & MS_RDONLY)) - writeback_inodes_sb(sb, WB_REASON_SYNC); -} - static void sync_fs_one_sb(struct super_block *sb, void *arg) { if (!(sb->s_flags & MS_RDONLY) && sb->s_op->sync_fs) @@ -96,17 +90,22 @@ static void fdatawait_one_bdev(struct block_device *bdev, void *arg) } /* - * sync everything. Start out by waking pdflush, because that writes back - * all queues in parallel. + * Sync everything. We start by waking flusher threads so that most of + * writeback runs on all devices in parallel. Then we sync all inodes reliably + * which effectively also waits for all flusher threads to finish doing + * writeback. At this point all data is on disk so metadata should be stable + * and we tell filesystems to sync their metadata via ->sync_fs() calls. + * Finally, we writeout all block devices because some filesystems (e.g. ext2) + * just write metadata (such as inodes or bitmaps) to block device page cache + * and do not sync it on their own in ->sync_fs(). */ SYSCALL_DEFINE0(sync) { int nowait = 0, wait = 1; wakeup_flusher_threads(0, WB_REASON_SYNC); - iterate_supers(writeback_inodes_one_sb, NULL); - iterate_supers(sync_fs_one_sb, &nowait); iterate_supers(sync_inodes_one_sb, NULL); + iterate_supers(sync_fs_one_sb, &nowait); iterate_supers(sync_fs_one_sb, &wait); iterate_bdevs(fdatawrite_one_bdev, NULL); iterate_bdevs(fdatawait_one_bdev, NULL); -- cgit v0.10.2 From e8b96eb5034a0ccebf36760f88e31ea3e3cdf1e4 Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 30 Apr 2012 13:11:29 -0500 Subject: vfs: allow custom EOF in generic_file_llseek code For ext3/4 htree directories, using the vfs llseek function with SEEK_END goes to i_size like for any other file, but in reality we want the maximum possible hash value. Recent changes in ext4 have cut & pasted generic_file_llseek() back into fs/ext4/dir.c, but replicating this core code seems like a bad idea, especially since the copy has already diverged from the vfs. This patch updates generic_file_llseek_size to accept both a custom maximum offset, and a custom EOF position. With this in place, ext4_dir_llseek can pass in the appropriate maximum hash position for both maxsize and eof, and get what it wants. As far as I know, this does not fix any bugs - nfs in the kernel doesn't use SEEK_END, and I don't know of any user who does. But some ext4 folks seem keen on doing the right thing here, and I can't really argue. (Patch also fixes up some comments slightly) Signed-off-by: Eric Sandeen Signed-off-by: Al Viro diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 92490e9..901f67e 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -303,7 +303,8 @@ loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin) if (likely(dx_dir)) return generic_file_llseek_size(file, offset, origin, - ext3_get_htree_eof(file)); + ext3_get_htree_eof(file), + i_size_read(inode)); else return generic_file_llseek(file, offset, origin); } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 8c7642a..f3dadd0 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -225,7 +225,8 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int origin) else maxbytes = inode->i_sb->s_maxbytes; - return generic_file_llseek_size(file, offset, origin, maxbytes); + return generic_file_llseek_size(file, offset, origin, + maxbytes, i_size_read(inode)); } const struct file_operations ext4_file_operations = { diff --git a/fs/read_write.c b/fs/read_write.c index c20614f..1adfb69 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -55,10 +55,11 @@ static loff_t lseek_execute(struct file *file, struct inode *inode, * @file: file structure to seek on * @offset: file offset to seek to * @origin: type of seek - * @size: max size of file system + * @size: max size of this file in file system + * @eof: offset used for SEEK_END position * * This is a variant of generic_file_llseek that allows passing in a custom - * file size. + * maximum file size and a custom EOF position, for e.g. hashed directories * * Synchronization: * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) @@ -67,13 +68,13 @@ static loff_t lseek_execute(struct file *file, struct inode *inode, */ loff_t generic_file_llseek_size(struct file *file, loff_t offset, int origin, - loff_t maxsize) + loff_t maxsize, loff_t eof) { struct inode *inode = file->f_mapping->host; switch (origin) { case SEEK_END: - offset += i_size_read(inode); + offset += eof; break; case SEEK_CUR: /* @@ -99,7 +100,7 @@ generic_file_llseek_size(struct file *file, loff_t offset, int origin, * In the generic case the entire file is data, so as long as * offset isn't at the end of the file then the offset is data. */ - if (offset >= i_size_read(inode)) + if (offset >= eof) return -ENXIO; break; case SEEK_HOLE: @@ -107,9 +108,9 @@ generic_file_llseek_size(struct file *file, loff_t offset, int origin, * There is a virtual hole at the end of the file, so as long as * offset isn't i_size or larger, return i_size. */ - if (offset >= i_size_read(inode)) + if (offset >= eof) return -ENXIO; - offset = i_size_read(inode); + offset = eof; break; } @@ -132,7 +133,8 @@ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) struct inode *inode = file->f_mapping->host; return generic_file_llseek_size(file, offset, origin, - inode->i_sb->s_maxbytes); + inode->i_sb->s_maxbytes, + i_size_read(inode)); } EXPORT_SYMBOL(generic_file_llseek); diff --git a/include/linux/fs.h b/include/linux/fs.h index 6a6ca85..34acf51 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2454,7 +2454,7 @@ extern loff_t noop_llseek(struct file *file, loff_t offset, int origin); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek_size(struct file *file, loff_t offset, - int origin, loff_t maxsize); + int origin, loff_t maxsize, loff_t eof); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); -- cgit v0.10.2 From ec7268ce21b379a248705548573393e4f346b20b Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 30 Apr 2012 13:14:03 -0500 Subject: ext4: use core vfs llseek code for dir seeks Use the new functionality in generic_file_llseek_size() to accept a custom EOF position, and un-cut-and-paste all the vfs llseek code from ext4. Also fix up comments on ext4_llseek() to reflect reality. Signed-off-by: Eric Sandeen Signed-off-by: Al Viro diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index aa39e60..8e07d2a 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -324,74 +324,27 @@ static inline loff_t ext4_get_htree_eof(struct file *filp) /* - * ext4_dir_llseek() based on generic_file_llseek() to handle both - * non-htree and htree directories, where the "offset" is in terms - * of the filename hash value instead of the byte offset. + * ext4_dir_llseek() calls generic_file_llseek_size to handle htree + * directories, where the "offset" is in terms of the filename hash + * value instead of the byte offset. * - * NOTE: offsets obtained *before* ext4_set_inode_flag(dir, EXT4_INODE_INDEX) - * will be invalid once the directory was converted into a dx directory + * Because we may return a 64-bit hash that is well beyond offset limits, + * we need to pass the max hash as the maximum allowable offset in + * the htree directory case. + * + * For non-htree, ext4_llseek already chooses the proper max offset. */ loff_t ext4_dir_llseek(struct file *file, loff_t offset, int origin) { struct inode *inode = file->f_mapping->host; - loff_t ret = -EINVAL; int dx_dir = is_dx_dir(inode); + loff_t htree_max = ext4_get_htree_eof(file); - mutex_lock(&inode->i_mutex); - - /* NOTE: relative offsets with dx directories might not work - * as expected, as it is difficult to figure out the - * correct offset between dx hashes */ - - switch (origin) { - case SEEK_END: - if (unlikely(offset > 0)) - goto out_err; /* not supported for directories */ - - /* so only negative offsets are left, does that have a - * meaning for directories at all? */ - if (dx_dir) - offset += ext4_get_htree_eof(file); - else - offset += inode->i_size; - break; - case SEEK_CUR: - /* - * Here we special-case the lseek(fd, 0, SEEK_CUR) - * position-querying operation. Avoid rewriting the "same" - * f_pos value back to the file because a concurrent read(), - * write() or lseek() might have altered it - */ - if (offset == 0) { - offset = file->f_pos; - goto out_ok; - } - - offset += file->f_pos; - break; - } - - if (unlikely(offset < 0)) - goto out_err; - - if (!dx_dir) { - if (offset > inode->i_sb->s_maxbytes) - goto out_err; - } else if (offset > ext4_get_htree_eof(file)) - goto out_err; - - /* Special lock needed here? */ - if (offset != file->f_pos) { - file->f_pos = offset; - file->f_version = 0; - } - -out_ok: - ret = offset; -out_err: - mutex_unlock(&inode->i_mutex); - - return ret; + if (likely(dx_dir)) + return generic_file_llseek_size(file, offset, origin, + htree_max, htree_max); + else + return ext4_llseek(file, offset, origin); } /* diff --git a/fs/ext4/file.c b/fs/ext4/file.c index f3dadd0..782eecb 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -211,9 +211,9 @@ static int ext4_file_open(struct inode * inode, struct file * filp) } /* - * ext4_llseek() copied from generic_file_llseek() to handle both - * block-mapped and extent-mapped maxbytes values. This should - * otherwise be identical with generic_file_llseek(). + * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values + * by calling generic_file_llseek_size() with the appropriate maxbytes + * value for each. */ loff_t ext4_llseek(struct file *file, loff_t offset, int origin) { -- cgit v0.10.2 From de9b9422026357c878a9fa3714936be0e1388e9c Mon Sep 17 00:00:00 2001 From: Eric Sandeen Date: Mon, 30 Apr 2012 13:16:04 -0500 Subject: ext3: pass custom EOF to generic_file_llseek_size() Use the new custom EOF argument to generic_file_llseek_size so that SEEK_END will go to the max hash value for htree dirs in ext3 rather than to i_size_read() Signed-off-by: Eric Sandeen Signed-off-by: Al Viro diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 901f67e..c8fff93 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -300,11 +300,11 @@ loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin) { struct inode *inode = file->f_mapping->host; int dx_dir = is_dx_dir(inode); + loff_t htree_max = ext3_get_htree_eof(file); if (likely(dx_dir)) return generic_file_llseek_size(file, offset, origin, - ext3_get_htree_eof(file), - i_size_read(inode)); + htree_max, htree_max); else return generic_file_llseek(file, offset, origin); } -- cgit v0.10.2 From 3c0a6163688b8ca3f44029c7bdb3d91a865c878a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 Jul 2012 17:32:50 +0400 Subject: unobfuscate follow_up() a bit really convoluted test in there has grown up during struct mount introduction; what it checks is that we'd reached the root of mount tree. diff --git a/fs/namei.c b/fs/namei.c index c14dfac..d4d15bb 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -722,7 +722,7 @@ int follow_up(struct path *path) br_read_lock(&vfsmount_lock); parent = mnt->mnt_parent; - if (&parent->mnt == path->mnt) { + if (parent == mnt) { br_read_unlock(&vfsmount_lock); return 0; } -- cgit v0.10.2 From 32a7991b6a9c758e4e2b8166c5e1cc7563c3dcde Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 Jul 2012 20:43:19 +0400 Subject: tidy up namei.c a bit locking/unlocking for rcu walk taken to a couple of inline helpers Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index d4d15bb..2ccc35c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -427,6 +427,18 @@ EXPORT_SYMBOL(path_put); * to restart the path walk from the beginning in ref-walk mode. */ +static inline void lock_rcu_walk(void) +{ + br_read_lock(&vfsmount_lock); + rcu_read_lock(); +} + +static inline void unlock_rcu_walk(void) +{ + rcu_read_unlock(); + br_read_unlock(&vfsmount_lock); +} + /** * unlazy_walk - try to switch to ref-walk mode. * @nd: nameidata pathwalk data @@ -480,8 +492,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) } mntget(nd->path.mnt); - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); nd->flags &= ~LOOKUP_RCU; return 0; @@ -522,15 +533,13 @@ static int complete_walk(struct nameidata *nd) spin_lock(&dentry->d_lock); if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { spin_unlock(&dentry->d_lock); - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); return -ECHILD; } BUG_ON(nd->inode != dentry->d_inode); spin_unlock(&dentry->d_lock); mntget(nd->path.mnt); - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -985,8 +994,7 @@ failed: nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); return -ECHILD; } @@ -1323,8 +1331,7 @@ static void terminate_walk(struct nameidata *nd) nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - rcu_read_unlock(); - br_read_unlock(&vfsmount_lock); + unlock_rcu_walk(); } } @@ -1691,8 +1698,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - br_read_lock(&vfsmount_lock); - rcu_read_lock(); + lock_rcu_walk(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } else { path_get(&nd->path); @@ -1704,8 +1710,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (*name=='/') { if (flags & LOOKUP_RCU) { - br_read_lock(&vfsmount_lock); - rcu_read_lock(); + lock_rcu_walk(); set_root_rcu(nd); } else { set_root(nd); @@ -1717,8 +1722,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct fs_struct *fs = current->fs; unsigned seq; - br_read_lock(&vfsmount_lock); - rcu_read_lock(); + lock_rcu_walk(); do { seq = read_seqcount_begin(&fs->seq); @@ -1753,8 +1757,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (fput_needed) *fp = file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - br_read_lock(&vfsmount_lock); - rcu_read_lock(); + lock_rcu_walk(); } else { path_get(&file->f_path); fput_light(file, fput_needed); -- cgit v0.10.2 From 8fc37ec54cd8e37193b0d42809b785ff19661c34 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Jul 2012 09:18:15 +0400 Subject: don't expose I_NEW inodes via dentry->d_inode d_instantiate(dentry, inode); unlock_new_inode(inode); is a bad idea; do it the other way round... Signed-off-by: Al Viro diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index da52cdb..ffa2be5 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -269,8 +269,8 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry, iput(ecryptfs_inode); goto out; } - d_instantiate(ecryptfs_dentry, ecryptfs_inode); unlock_new_inode(ecryptfs_inode); + d_instantiate(ecryptfs_dentry, ecryptfs_inode); out: return rc; } diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 9ba7de0..73b0d95 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -41,8 +41,8 @@ static inline int ext2_add_nondir(struct dentry *dentry, struct inode *inode) { int err = ext2_add_link(dentry, inode); if (!err) { - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; } inode_dec_link_count(inode); @@ -242,8 +242,8 @@ static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode) if (err) goto out_fail; - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); out: return err; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 85286db..8f4fdda 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -1671,8 +1671,8 @@ static int ext3_add_nondir(handle_t *handle, int err = ext3_add_entry(handle, dentry, inode); if (!err) { ext3_mark_inode_dirty(handle, inode); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; } drop_nlink(inode); @@ -1836,8 +1836,8 @@ out_clear_inode: if (err) goto out_clear_inode; - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); out_stop: brelse(dir_block); ext3_journal_stop(handle); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index eca3e48..d0d3f0e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2072,8 +2072,8 @@ static int ext4_add_nondir(handle_t *handle, int err = ext4_add_entry(handle, dentry, inode); if (!err) { ext4_mark_inode_dirty(handle, inode); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; } drop_nlink(inode); @@ -2249,8 +2249,8 @@ out_clear_inode: err = ext4_mark_inode_dirty(handle, dir); if (err) goto out_clear_inode; - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); out_stop: brelse(dir_block); ext4_journal_stop(handle); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 2324519..ad7774d 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -226,8 +226,8 @@ static int jffs2_create(struct inode *dir_i, struct dentry *dentry, __func__, inode->i_ino, inode->i_mode, inode->i_nlink, f->inocache->pino_nlink, inode->i_mapping->nrpages); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; fail: @@ -446,8 +446,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; fail: @@ -591,8 +591,8 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; fail: @@ -766,8 +766,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode mutex_unlock(&dir_f->sem); jffs2_complete_reservation(c); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); return 0; fail: diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index c426293..3b91a7a 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -176,8 +176,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode, unlock_new_inode(ip); iput(ip); } else { - d_instantiate(dentry, ip); unlock_new_inode(ip); + d_instantiate(dentry, ip); } out2: @@ -309,8 +309,8 @@ static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode) unlock_new_inode(ip); iput(ip); } else { - d_instantiate(dentry, ip); unlock_new_inode(ip); + d_instantiate(dentry, ip); } out2: @@ -1043,8 +1043,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, unlock_new_inode(ip); iput(ip); } else { - d_instantiate(dentry, ip); unlock_new_inode(ip); + d_instantiate(dentry, ip); } out2: @@ -1424,8 +1424,8 @@ static int jfs_mknod(struct inode *dir, struct dentry *dentry, unlock_new_inode(ip); iput(ip); } else { - d_instantiate(dentry, ip); unlock_new_inode(ip); + d_instantiate(dentry, ip); } out1: diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 3916be1..8567fb8 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -634,8 +634,8 @@ static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mod reiserfs_update_inode_transaction(inode); reiserfs_update_inode_transaction(dir); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: @@ -712,8 +712,8 @@ static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode goto out_failed; } - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: @@ -800,8 +800,8 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode // the above add_entry did not update dir's stat data reiserfs_update_sd(&th, dir); - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: reiserfs_write_unlock_once(dir->i_sb, lock_depth); @@ -1096,8 +1096,8 @@ static int reiserfs_symlink(struct inode *parent_dir, goto out_failed; } - d_instantiate(dentry, inode); unlock_new_inode(inode); + d_instantiate(dentry, inode); retval = journal_end(&th, parent_dir->i_sb, jbegin_count); out_failed: reiserfs_write_unlock(parent_dir->i_sb); -- cgit v0.10.2 From 3b8b487114c95ef6db5fef708ef69bfb5209014e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 25 Jun 2012 11:38:56 +0400 Subject: ecryptfs: don't reinvent the wheels, please - use struct completion ... and keep the sodding requests on stack - they are small enough. Signed-off-by: Al Viro diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 867b64c..989e034 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -550,20 +550,6 @@ extern struct kmem_cache *ecryptfs_key_record_cache; extern struct kmem_cache *ecryptfs_key_sig_cache; extern struct kmem_cache *ecryptfs_global_auth_tok_cache; extern struct kmem_cache *ecryptfs_key_tfm_cache; -extern struct kmem_cache *ecryptfs_open_req_cache; - -struct ecryptfs_open_req { -#define ECRYPTFS_REQ_PROCESSED 0x00000001 -#define ECRYPTFS_REQ_DROPPED 0x00000002 -#define ECRYPTFS_REQ_ZOMBIE 0x00000004 - u32 flags; - struct file **lower_file; - struct dentry *lower_dentry; - struct vfsmount *lower_mnt; - wait_queue_head_t wait; - struct mutex mux; - struct list_head kthread_ctl_list; -}; struct inode *ecryptfs_get_inode(struct inode *lower_inode, struct super_block *sb); diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index 0dbe58a..c7d199d 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -27,7 +27,13 @@ #include #include "ecryptfs_kernel.h" -struct kmem_cache *ecryptfs_open_req_cache; +struct ecryptfs_open_req { + struct file **lower_file; + struct dentry *lower_dentry; + struct vfsmount *lower_mnt; + struct completion done; + struct list_head kthread_ctl_list; +}; static struct ecryptfs_kthread_ctl { #define ECRYPTFS_KTHREAD_ZOMBIE 0x00000001 @@ -67,18 +73,13 @@ static int ecryptfs_threadfn(void *ignored) req = list_first_entry(&ecryptfs_kthread_ctl.req_list, struct ecryptfs_open_req, kthread_ctl_list); - mutex_lock(&req->mux); list_del(&req->kthread_ctl_list); - if (!(req->flags & ECRYPTFS_REQ_ZOMBIE)) { - dget(req->lower_dentry); - mntget(req->lower_mnt); - (*req->lower_file) = dentry_open( - req->lower_dentry, req->lower_mnt, - (O_RDWR | O_LARGEFILE), current_cred()); - req->flags |= ECRYPTFS_REQ_PROCESSED; - } - wake_up(&req->wait); - mutex_unlock(&req->mux); + dget(req->lower_dentry); + mntget(req->lower_mnt); + (*req->lower_file) = dentry_open( + req->lower_dentry, req->lower_mnt, + (O_RDWR | O_LARGEFILE), current_cred()); + complete(&req->done); } mutex_unlock(&ecryptfs_kthread_ctl.mux); } @@ -111,10 +112,9 @@ void ecryptfs_destroy_kthread(void) ecryptfs_kthread_ctl.flags |= ECRYPTFS_KTHREAD_ZOMBIE; list_for_each_entry(req, &ecryptfs_kthread_ctl.req_list, kthread_ctl_list) { - mutex_lock(&req->mux); - req->flags |= ECRYPTFS_REQ_ZOMBIE; - wake_up(&req->wait); - mutex_unlock(&req->mux); + list_del(&req->kthread_ctl_list); + *req->lower_file = ERR_PTR(-EIO); + complete(&req->done); } mutex_unlock(&ecryptfs_kthread_ctl.mux); kthread_stop(ecryptfs_kthread); @@ -136,7 +136,7 @@ int ecryptfs_privileged_open(struct file **lower_file, struct vfsmount *lower_mnt, const struct cred *cred) { - struct ecryptfs_open_req *req; + struct ecryptfs_open_req req; int flags = O_LARGEFILE; int rc = 0; @@ -153,17 +153,10 @@ int ecryptfs_privileged_open(struct file **lower_file, rc = PTR_ERR((*lower_file)); goto out; } - req = kmem_cache_alloc(ecryptfs_open_req_cache, GFP_KERNEL); - if (!req) { - rc = -ENOMEM; - goto out; - } - mutex_init(&req->mux); - req->lower_file = lower_file; - req->lower_dentry = lower_dentry; - req->lower_mnt = lower_mnt; - init_waitqueue_head(&req->wait); - req->flags = 0; + init_completion(&req.done); + req.lower_file = lower_file; + req.lower_dentry = lower_dentry; + req.lower_mnt = lower_mnt; mutex_lock(&ecryptfs_kthread_ctl.mux); if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) { rc = -EIO; @@ -171,27 +164,14 @@ int ecryptfs_privileged_open(struct file **lower_file, printk(KERN_ERR "%s: We are in the middle of shutting down; " "aborting privileged request to open lower file\n", __func__); - goto out_free; + goto out; } - list_add_tail(&req->kthread_ctl_list, &ecryptfs_kthread_ctl.req_list); + list_add_tail(&req.kthread_ctl_list, &ecryptfs_kthread_ctl.req_list); mutex_unlock(&ecryptfs_kthread_ctl.mux); wake_up(&ecryptfs_kthread_ctl.wait); - wait_event(req->wait, (req->flags != 0)); - mutex_lock(&req->mux); - BUG_ON(req->flags == 0); - if (req->flags & ECRYPTFS_REQ_DROPPED - || req->flags & ECRYPTFS_REQ_ZOMBIE) { - rc = -EIO; - printk(KERN_WARNING "%s: Privileged open request dropped\n", - __func__); - goto out_unlock; - } - if (IS_ERR(*req->lower_file)) - rc = PTR_ERR(*req->lower_file); -out_unlock: - mutex_unlock(&req->mux); -out_free: - kmem_cache_free(ecryptfs_open_req_cache, req); + wait_for_completion(&req.done); + if (IS_ERR(*lower_file)) + rc = PTR_ERR(*lower_file); out: return rc; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 7edeb3d..1c0b3b6 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -681,11 +681,6 @@ static struct ecryptfs_cache_info { .name = "ecryptfs_key_tfm_cache", .size = sizeof(struct ecryptfs_key_tfm), }, - { - .cache = &ecryptfs_open_req_cache, - .name = "ecryptfs_open_req_cache", - .size = sizeof(struct ecryptfs_open_req), - }, }; static void ecryptfs_free_kmem_caches(void) -- cgit v0.10.2 From 3b6456d2c3fc9d0c4ec1db5d6594f40098ff0a6f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 14 Jul 2012 13:49:40 +0400 Subject: zoran: don't bother with struct file * in zoran_map all we need it for is file->private_data, which is assign-once, already assigned by that point and, incidentally, its value is already in use by zoran ->mmap() anyway. So just store that pointer instead... Signed-off-by: Al Viro diff --git a/drivers/media/video/zoran/zoran.h b/drivers/media/video/zoran/zoran.h index d7166af..ca2754a 100644 --- a/drivers/media/video/zoran/zoran.h +++ b/drivers/media/video/zoran/zoran.h @@ -172,8 +172,10 @@ struct zoran_jpg_settings { struct v4l2_jpegcompression jpg_comp; /* JPEG-specific capture settings */ }; +struct zoran_fh; + struct zoran_mapping { - struct file *file; + struct zoran_fh *fh; int count; }; diff --git a/drivers/media/video/zoran/zoran_driver.c b/drivers/media/video/zoran/zoran_driver.c index c573109..c6ccdeb 100644 --- a/drivers/media/video/zoran/zoran_driver.c +++ b/drivers/media/video/zoran/zoran_driver.c @@ -2811,7 +2811,7 @@ static void zoran_vm_close (struct vm_area_struct *vma) { struct zoran_mapping *map = vma->vm_private_data; - struct zoran_fh *fh = map->file->private_data; + struct zoran_fh *fh = map->fh; struct zoran *zr = fh->zr; int i; @@ -2938,7 +2938,7 @@ zoran_mmap (struct file *file, res = -ENOMEM; goto mmap_unlock_and_return; } - map->file = file; + map->fh = fh; map->count = 1; vma->vm_ops = &zoran_vm_ops; -- cgit v0.10.2 From bf349a447059656ebe63fb4fd1ccb27ac1da22ad Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 25 Jun 2012 11:46:13 +0400 Subject: spufs: shift dget/mntget towards dentry_open() Signed-off-by: Al Viro diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 66519d2..1c9cac0 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -323,22 +323,21 @@ static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt) struct file *filp; ret = get_unused_fd(); - if (ret < 0) { - dput(dentry); - mntput(mnt); - goto out; - } + if (ret < 0) + return ret; - filp = dentry_open(dentry, mnt, O_RDONLY, current_cred()); + /* + * get references for dget and mntget, will be released + * in error path of *_open(). + */ + filp = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, current_cred()); if (IS_ERR(filp)) { put_unused_fd(ret); - ret = PTR_ERR(filp); - goto out; + return PTR_ERR(filp); } filp->f_op = &spufs_context_fops; fd_install(ret, filp); -out: return ret; } @@ -495,11 +494,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, put_spu_context(neighbor); } - /* - * get references for dget and mntget, will be released - * in error path of *_open(). - */ - ret = spufs_context_open(dget(dentry), mntget(mnt)); + ret = spufs_context_open(dentry, mnt); if (ret < 0) { WARN_ON(spufs_rmdir(inode, dentry)); if (affinity) @@ -562,22 +557,21 @@ static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt) struct file *filp; ret = get_unused_fd(); - if (ret < 0) { - dput(dentry); - mntput(mnt); - goto out; - } + if (ret < 0) + return ret; - filp = dentry_open(dentry, mnt, O_RDONLY, current_cred()); + /* + * get references for dget and mntget, will be released + * in error path of *_open(). + */ + filp = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, current_cred()); if (IS_ERR(filp)) { put_unused_fd(ret); - ret = PTR_ERR(filp); - goto out; + return PTR_ERR(filp); } filp->f_op = &simple_dir_operations; fd_install(ret, filp); -out: return ret; } @@ -591,11 +585,7 @@ static int spufs_create_gang(struct inode *inode, if (ret) goto out; - /* - * get references for dget and mntget, will be released - * in error path of *_open(). - */ - ret = spufs_gang_open(dget(dentry), mntget(mnt)); + ret = spufs_gang_open(dentry, mnt); if (ret < 0) { int err = simple_rmdir(inode, dentry); WARN_ON(err); -- cgit v0.10.2 From 765927b2d508712d320c8934db963bbe14c3fcec Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Jun 2012 21:58:53 +0400 Subject: switch dentry_open() to struct path, make it grab references itself Signed-off-by: Al Viro diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 1c9cac0..d544d78 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -317,7 +317,7 @@ out: return ret; } -static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt) +static int spufs_context_open(struct path *path) { int ret; struct file *filp; @@ -326,11 +326,7 @@ static int spufs_context_open(struct dentry *dentry, struct vfsmount *mnt) if (ret < 0) return ret; - /* - * get references for dget and mntget, will be released - * in error path of *_open(). - */ - filp = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, current_cred()); + filp = dentry_open(path, O_RDONLY, current_cred()); if (IS_ERR(filp)) { put_unused_fd(ret); return PTR_ERR(filp); @@ -452,6 +448,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, int affinity; struct spu_gang *gang; struct spu_context *neighbor; + struct path path = {.mnt = mnt, .dentry = dentry}; ret = -EPERM; if ((flags & SPU_CREATE_NOSCHED) && @@ -494,7 +491,7 @@ spufs_create_context(struct inode *inode, struct dentry *dentry, put_spu_context(neighbor); } - ret = spufs_context_open(dentry, mnt); + ret = spufs_context_open(&path); if (ret < 0) { WARN_ON(spufs_rmdir(inode, dentry)); if (affinity) @@ -551,7 +548,7 @@ out: return ret; } -static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt) +static int spufs_gang_open(struct path *path) { int ret; struct file *filp; @@ -564,7 +561,7 @@ static int spufs_gang_open(struct dentry *dentry, struct vfsmount *mnt) * get references for dget and mntget, will be released * in error path of *_open(). */ - filp = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, current_cred()); + filp = dentry_open(path, O_RDONLY, current_cred()); if (IS_ERR(filp)) { put_unused_fd(ret); return PTR_ERR(filp); @@ -579,13 +576,14 @@ static int spufs_create_gang(struct inode *inode, struct dentry *dentry, struct vfsmount *mnt, umode_t mode) { + struct path path = {.mnt = mnt, .dentry = dentry}; int ret; ret = spufs_mkgang(inode, dentry, mode & S_IRWXUGO); if (ret) goto out; - ret = spufs_gang_open(dentry, mnt); + ret = spufs_gang_open(&path); if (ret < 0) { int err = simple_rmdir(inode, dentry); WARN_ON(err); diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index aa9103f..abf645c 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -257,8 +257,8 @@ static int autofs_dev_ioctl_open_mountpoint(const char *name, dev_t devid) * corresponding to the autofs fs we want to open. */ - filp = dentry_open(path.dentry, path.mnt, O_RDONLY, - current_cred()); + filp = dentry_open(&path, O_RDONLY, current_cred()); + path_put(&path); if (IS_ERR(filp)) { err = PTR_ERR(filp); goto out; diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 0e3c092..c0353df 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -891,6 +891,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) struct cachefiles_cache *cache; mm_segment_t old_fs; struct file *file; + struct path path; loff_t pos, eof; size_t len; void *data; @@ -916,10 +917,9 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) /* write the page to the backing filesystem and let it store it in its * own time */ - dget(object->backer); - mntget(cache->mnt); - file = dentry_open(object->backer, cache->mnt, O_RDWR, - cache->cache_cred); + path.mnt = cache->mnt; + path.dentry = object->backer; + file = dentry_open(&path, O_RDWR, cache->cache_cred); if (IS_ERR(file)) { ret = PTR_ERR(file); } else { diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c index c7d199d..809e67d 100644 --- a/fs/ecryptfs/kthread.c +++ b/fs/ecryptfs/kthread.c @@ -29,8 +29,7 @@ struct ecryptfs_open_req { struct file **lower_file; - struct dentry *lower_dentry; - struct vfsmount *lower_mnt; + struct path path; struct completion done; struct list_head kthread_ctl_list; }; @@ -74,10 +73,7 @@ static int ecryptfs_threadfn(void *ignored) struct ecryptfs_open_req, kthread_ctl_list); list_del(&req->kthread_ctl_list); - dget(req->lower_dentry); - mntget(req->lower_mnt); - (*req->lower_file) = dentry_open( - req->lower_dentry, req->lower_mnt, + *req->lower_file = dentry_open(&req->path, (O_RDWR | O_LARGEFILE), current_cred()); complete(&req->done); } @@ -140,23 +136,22 @@ int ecryptfs_privileged_open(struct file **lower_file, int flags = O_LARGEFILE; int rc = 0; + init_completion(&req.done); + req.lower_file = lower_file; + req.path.dentry = lower_dentry; + req.path.mnt = lower_mnt; + /* Corresponding dput() and mntput() are done when the * lower file is fput() when all eCryptfs files for the inode are * released. */ - dget(lower_dentry); - mntget(lower_mnt); flags |= IS_RDONLY(lower_dentry->d_inode) ? O_RDONLY : O_RDWR; - (*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred); + (*lower_file) = dentry_open(&req.path, flags, cred); if (!IS_ERR(*lower_file)) goto out; if ((flags & O_ACCMODE) == O_RDONLY) { rc = PTR_ERR((*lower_file)); goto out; } - init_completion(&req.done); - req.lower_file = lower_file; - req.lower_dentry = lower_dentry; - req.lower_mnt = lower_mnt; mutex_lock(&ecryptfs_kthread_ctl.mux); if (ecryptfs_kthread_ctl.flags & ECRYPTFS_KTHREAD_ZOMBIE) { rc = -EIO; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index b42063c..29ab099 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -19,19 +19,19 @@ #define dprintk(fmt, args...) do{}while(0) -static int get_name(struct vfsmount *mnt, struct dentry *dentry, char *name, - struct dentry *child); +static int get_name(const struct path *path, char *name, struct dentry *child); static int exportfs_get_name(struct vfsmount *mnt, struct dentry *dir, char *name, struct dentry *child) { const struct export_operations *nop = dir->d_sb->s_export_op; + struct path path = {.mnt = mnt, .dentry = dir}; if (nop->get_name) return nop->get_name(dir, name, child); else - return get_name(mnt, dir, name, child); + return get_name(&path, name, child); } /* @@ -249,11 +249,10 @@ static int filldir_one(void * __buf, const char * name, int len, * calls readdir on the parent until it finds an entry with * the same inode number as the child, and returns that. */ -static int get_name(struct vfsmount *mnt, struct dentry *dentry, - char *name, struct dentry *child) +static int get_name(const struct path *path, char *name, struct dentry *child) { const struct cred *cred = current_cred(); - struct inode *dir = dentry->d_inode; + struct inode *dir = path->dentry->d_inode; int error; struct file *file; struct getdents_callback buffer; @@ -267,7 +266,7 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry, /* * Open the directory ... */ - file = dentry_open(dget(dentry), mntget(mnt), O_RDONLY, cred); + file = dentry_open(path, O_RDONLY, cred); error = PTR_ERR(file); if (IS_ERR(file)) goto out; diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index e5c0653..c1dffe4 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -420,8 +420,7 @@ static int hppfs_open(struct inode *inode, struct file *file) { const struct cred *cred = file->f_cred; struct hppfs_private *data; - struct vfsmount *proc_mnt; - struct dentry *proc_dentry; + struct path path; char *host_file; int err, fd, type, filter; @@ -434,12 +433,11 @@ static int hppfs_open(struct inode *inode, struct file *file) if (host_file == NULL) goto out_free2; - proc_dentry = HPPFS_I(inode)->proc_dentry; - proc_mnt = inode->i_sb->s_fs_info; + path.mnt = inode->i_sb->s_fs_info; + path.dentry = HPPFS_I(inode)->proc_dentry; /* XXX This isn't closed anywhere */ - data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), - file_mode(file->f_mode), cred); + data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred); err = PTR_ERR(data->proc_file); if (IS_ERR(data->proc_file)) goto out_free1; @@ -484,8 +482,7 @@ static int hppfs_dir_open(struct inode *inode, struct file *file) { const struct cred *cred = file->f_cred; struct hppfs_private *data; - struct vfsmount *proc_mnt; - struct dentry *proc_dentry; + struct path path; int err; err = -ENOMEM; @@ -493,10 +490,9 @@ static int hppfs_dir_open(struct inode *inode, struct file *file) if (data == NULL) goto out; - proc_dentry = HPPFS_I(inode)->proc_dentry; - proc_mnt = inode->i_sb->s_fs_info; - data->proc_file = dentry_open(dget(proc_dentry), mntget(proc_mnt), - file_mode(file->f_mode), cred); + path.mnt = inode->i_sb->s_fs_info; + path.dentry = HPPFS_I(inode)->proc_dentry; + data->proc_file = dentry_open(&path, file_mode(file->f_mode), cred); err = PTR_ERR(data->proc_file); if (IS_ERR(data->proc_file)) goto out_free; diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 05d9eee..4700a0a 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -745,7 +745,7 @@ __be32 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int may_flags, struct file **filp) { - struct dentry *dentry; + struct path path; struct inode *inode; int flags = O_RDONLY|O_LARGEFILE; __be32 err; @@ -762,8 +762,9 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, if (err) goto out; - dentry = fhp->fh_dentry; - inode = dentry->d_inode; + path.mnt = fhp->fh_export->ex_path.mnt; + path.dentry = fhp->fh_dentry; + inode = path.dentry->d_inode; /* Disallow write access to files with the append-only bit set * or any access when mandatory locking enabled @@ -792,8 +793,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, else flags = O_WRONLY|O_LARGEFILE; } - *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt), - flags, current_cred()); + *filp = dentry_open(&path, flags, current_cred()); if (IS_ERR(*filp)) host_err = PTR_ERR(*filp); else { diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 3568c8a..d438036 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -61,8 +61,6 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) { int client_fd; - struct dentry *dentry; - struct vfsmount *mnt; struct file *new_file; pr_debug("%s: group=%p event=%p\n", __func__, group, event); @@ -81,12 +79,10 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event) * we need a new file handle for the userspace program so it can read even if it was * originally opened O_WRONLY. */ - dentry = dget(event->path.dentry); - mnt = mntget(event->path.mnt); /* it's possible this event was an overflow event. in that case dentry and mnt * are NULL; That's fine, just don't call dentry open */ - if (dentry && mnt) - new_file = dentry_open(dentry, mnt, + if (event->path.dentry && event->path.mnt) + new_file = dentry_open(&event->path, group->fanotify_data.f_flags | FMODE_NONOTIFY, current_cred()); else diff --git a/fs/open.c b/fs/open.c index 75bea86..1e914b3 100644 --- a/fs/open.c +++ b/fs/open.c @@ -766,11 +766,7 @@ int finish_no_open(struct file *file, struct dentry *dentry) } EXPORT_SYMBOL(finish_no_open); -/* - * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an - * error. - */ -struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, +struct file *dentry_open(const struct path *path, int flags, const struct cred *cred) { int error; @@ -779,19 +775,16 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags, validate_creds(cred); /* We must always pass in a valid mount pointer. */ - BUG_ON(!mnt); + BUG_ON(!path->mnt); error = -ENFILE; f = get_empty_filp(); - if (f == NULL) { - dput(dentry); - mntput(mnt); + if (f == NULL) return ERR_PTR(error); - } f->f_flags = flags; - f->f_path.mnt = mnt; - f->f_path.dentry = dentry; + f->f_path = *path; + path_get(&f->f_path); error = do_dentry_open(f, NULL, cred); if (!error) { error = open_check_o_direct(f); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 3a05a41..1f1535d 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -208,6 +208,7 @@ xfs_open_by_handle( struct inode *inode; struct dentry *dentry; fmode_t fmode; + struct path path; if (!capable(CAP_SYS_ADMIN)) return -XFS_ERROR(EPERM); @@ -252,8 +253,10 @@ xfs_open_by_handle( goto out_dput; } - filp = dentry_open(dentry, mntget(parfilp->f_path.mnt), - hreq->oflags, cred); + path.mnt = parfilp->f_path.mnt; + path.dentry = dentry; + filp = dentry_open(&path, hreq->oflags, cred); + dput(dentry); if (IS_ERR(filp)) { put_unused_fd(fd); return PTR_ERR(filp); diff --git a/include/linux/fs.h b/include/linux/fs.h index 34acf51..8fabb03 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2060,8 +2060,7 @@ extern long do_sys_open(int dfd, const char __user *filename, int flags, extern struct file *filp_open(const char *, int, umode_t); extern struct file *file_open_root(struct dentry *, struct vfsmount *, const char *, int); -extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, - const struct cred *); +extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); enum { diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 2dee38d..f8e54f5 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -721,8 +721,8 @@ static int mq_attr_ok(struct ipc_namespace *ipc_ns, struct mq_attr *attr) /* * Invoked when creating a new queue via sys_mq_open */ -static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir, - struct dentry *dentry, int oflag, umode_t mode, +static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, + struct path *path, int oflag, umode_t mode, struct mq_attr *attr) { const struct cred *cred = current_cred(); @@ -732,9 +732,9 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir, if (attr) { ret = mq_attr_ok(ipc_ns, attr); if (ret) - goto out; + return ERR_PTR(ret); /* store for use during create */ - dentry->d_fsdata = attr; + path->dentry->d_fsdata = attr; } else { struct mq_attr def_attr; @@ -744,71 +744,51 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct dentry *dir, ipc_ns->mq_msgsize_default); ret = mq_attr_ok(ipc_ns, &def_attr); if (ret) - goto out; + return ERR_PTR(ret); } mode &= ~current_umask(); - ret = mnt_want_write(ipc_ns->mq_mnt); + ret = mnt_want_write(path->mnt); if (ret) - goto out; - ret = vfs_create(dir->d_inode, dentry, mode, true); - dentry->d_fsdata = NULL; - if (ret) - goto out_drop_write; - - result = dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred); + return ERR_PTR(ret); + ret = vfs_create(dir, path->dentry, mode, true); + path->dentry->d_fsdata = NULL; + if (!ret) + result = dentry_open(path, oflag, cred); + else + result = ERR_PTR(ret); /* * dentry_open() took a persistent mnt_want_write(), * so we can now drop this one. */ - mnt_drop_write(ipc_ns->mq_mnt); + mnt_drop_write(path->mnt); return result; - -out_drop_write: - mnt_drop_write(ipc_ns->mq_mnt); -out: - dput(dentry); - mntput(ipc_ns->mq_mnt); - return ERR_PTR(ret); } /* Opens existing queue */ -static struct file *do_open(struct ipc_namespace *ipc_ns, - struct dentry *dentry, int oflag) +static struct file *do_open(struct path *path, int oflag) { - int ret; - const struct cred *cred = current_cred(); - static const int oflag2acc[O_ACCMODE] = { MAY_READ, MAY_WRITE, MAY_READ | MAY_WRITE }; - - if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) { - ret = -EINVAL; - goto err; - } - - if (inode_permission(dentry->d_inode, oflag2acc[oflag & O_ACCMODE])) { - ret = -EACCES; - goto err; - } - - return dentry_open(dentry, ipc_ns->mq_mnt, oflag, cred); - -err: - dput(dentry); - mntput(ipc_ns->mq_mnt); - return ERR_PTR(ret); + int acc; + if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY)) + return ERR_PTR(-EINVAL); + acc = oflag2acc[oflag & O_ACCMODE]; + if (inode_permission(path->dentry->d_inode, acc)) + return ERR_PTR(-EACCES); + return dentry_open(path, oflag, current_cred()); } SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, struct mq_attr __user *, u_attr) { - struct dentry *dentry; + struct path path; struct file *filp; char *name; struct mq_attr attr; int fd, error; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; + struct dentry *root = ipc_ns->mq_mnt->mnt_root; if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr))) return -EFAULT; @@ -822,52 +802,49 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, if (fd < 0) goto out_putname; - mutex_lock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex); - dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name)); - if (IS_ERR(dentry)) { - error = PTR_ERR(dentry); + error = 0; + mutex_lock(&root->d_inode->i_mutex); + path.dentry = lookup_one_len(name, root, strlen(name)); + if (IS_ERR(path.dentry)) { + error = PTR_ERR(path.dentry); goto out_putfd; } - mntget(ipc_ns->mq_mnt); + path.mnt = mntget(ipc_ns->mq_mnt); if (oflag & O_CREAT) { - if (dentry->d_inode) { /* entry already exists */ - audit_inode(name, dentry); + if (path.dentry->d_inode) { /* entry already exists */ + audit_inode(name, path.dentry); if (oflag & O_EXCL) { error = -EEXIST; goto out; } - filp = do_open(ipc_ns, dentry, oflag); + filp = do_open(&path, oflag); } else { - filp = do_create(ipc_ns, ipc_ns->mq_mnt->mnt_root, - dentry, oflag, mode, + filp = do_create(ipc_ns, root->d_inode, + &path, oflag, mode, u_attr ? &attr : NULL); } } else { - if (!dentry->d_inode) { + if (!path.dentry->d_inode) { error = -ENOENT; goto out; } - audit_inode(name, dentry); - filp = do_open(ipc_ns, dentry, oflag); + audit_inode(name, path.dentry); + filp = do_open(&path, oflag); } - if (IS_ERR(filp)) { + if (!IS_ERR(filp)) + fd_install(fd, filp); + else error = PTR_ERR(filp); - goto out_putfd; - } - - fd_install(fd, filp); - goto out_upsem; - out: - dput(dentry); - mntput(ipc_ns->mq_mnt); + path_put(&path); out_putfd: - put_unused_fd(fd); - fd = error; -out_upsem: - mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex); + if (error) { + put_unused_fd(fd); + fd = error; + } + mutex_unlock(&root->d_inode->i_mutex); out_putname: putname(name); return fd; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 372ec65..e423f5f 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -2157,8 +2157,7 @@ static inline void flush_unauthorized_files(const struct cred *cred, get_file(devnull); } else { devnull = dentry_open( - dget(selinux_null), - mntget(selinuxfs_mount), + &selinux_null, O_RDWR, cred); if (IS_ERR(devnull)) { devnull = NULL; diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index dde2005..6d38851 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -221,7 +221,7 @@ extern void selinux_status_update_policyload(int seqno); extern void selinux_complete_init(void); extern int selinux_disable(void); extern void exit_sel_fs(void); -extern struct dentry *selinux_null; +extern struct path selinux_null; extern struct vfsmount *selinuxfs_mount; extern void selnl_notify_setenforce(int val); extern void selnl_notify_policyload(u32 seqno); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 3ad2902..298e695 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1297,7 +1297,7 @@ out: #define NULL_FILE_NAME "null" -struct dentry *selinux_null; +struct path selinux_null; static ssize_t sel_read_avc_cache_threshold(struct file *filp, char __user *buf, size_t count, loff_t *ppos) @@ -1838,7 +1838,7 @@ static int sel_fill_super(struct super_block *sb, void *data, int silent) init_special_inode(inode, S_IFCHR | S_IRUGO | S_IWUGO, MKDEV(MEM_MAJOR, 3)); d_add(dentry, inode); - selinux_null = dentry; + selinux_null.dentry = dentry; dentry = sel_make_dir(sb->s_root, "avc", &sel_last_ino); if (IS_ERR(dentry)) { @@ -1912,7 +1912,7 @@ static int __init init_sel_fs(void) return err; } - selinuxfs_mount = kern_mount(&sel_fs_type); + selinux_null.mnt = selinuxfs_mount = kern_mount(&sel_fs_type); if (IS_ERR(selinuxfs_mount)) { printk(KERN_ERR "selinuxfs: could not mount!\n"); err = PTR_ERR(selinuxfs_mount); -- cgit v0.10.2 From 11e62a8fabd003352e852e74e1b64a437fd908c6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Jul 2012 11:17:49 +0400 Subject: btrfs: switch btrfs_ioctl_balance() to mnt_want_write_file() Signed-off-by: Al Viro diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0e92e57..1e9f6c0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3268,7 +3268,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) if (fs_info->sb->s_flags & MS_RDONLY) return -EROFS; - ret = mnt_want_write(file->f_path.mnt); + ret = mnt_want_write_file(file); if (ret) return ret; @@ -3338,7 +3338,7 @@ out_bargs: out: mutex_unlock(&fs_info->balance_mutex); mutex_unlock(&fs_info->volume_mutex); - mnt_drop_write(file->f_path.mnt); + mnt_drop_write_file(file); return ret; } -- cgit v0.10.2 From 8cae6f7158ec1fa44c8a04a43db7d8020ec60437 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Jul 2012 11:19:07 +0400 Subject: ext4: switch EXT4_IOC_RESIZE_FS to mnt_want_write_file() Signed-off-by: Al Viro diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e34deac..23788b3 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -390,7 +390,7 @@ group_add_out: if (err) return err; - err = mnt_want_write(filp->f_path.mnt); + err = mnt_want_write_file(filp); if (err) goto resizefs_out; @@ -402,7 +402,7 @@ group_add_out: } if (err == 0) err = err2; - mnt_drop_write(filp->f_path.mnt); + mnt_drop_write_file(filp); resizefs_out: ext4_resize_end(sb); return err; -- cgit v0.10.2