From 7dffc87f2eea9f80889d2eb87bfbe63e51b9709c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 18 Mar 2016 13:25:43 -0400 Subject: reiserfs_cache_default_acl(): use get_acl() Signed-off-by: Al Viro diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 558a16b..ec74bbe 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -370,7 +370,7 @@ int reiserfs_cache_default_acl(struct inode *inode) if (IS_PRIVATE(inode)) return 0; - acl = reiserfs_get_acl(inode, ACL_TYPE_DEFAULT); + acl = get_acl(inode, ACL_TYPE_DEFAULT); if (acl && !IS_ERR(acl)) { int size = reiserfs_acl_size(acl->a_count); -- cgit v0.10.2 From 8861964f4c7caecbacc89bcb6b513b40cf097a02 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2016 14:38:36 +0100 Subject: jfs: Remove unnecessary code in jfs_get_acl The get_acl inode operation is called only when no ACL is cached. It makes no sense to check for a cached ACL as the first thing inside such inode operations. Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 4945685..ab48828 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -34,10 +34,6 @@ struct posix_acl *jfs_get_acl(struct inode *inode, int type) int size; char *value = NULL; - acl = get_cached_acl(inode, type); - if (acl != ACL_NOT_CACHED) - return acl; - switch(type) { case ACL_TYPE_ACCESS: ea_name = XATTR_NAME_POSIX_ACL_ACCESS; -- cgit v0.10.2 From b8a7a3a6674725d7ca0ff6e322f6c1cab6e6a11d Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2016 14:38:37 +0100 Subject: posix_acl: Inode acl caching fixes When get_acl() is called for an inode whose ACL is not cached yet, the get_acl inode operation is called to fetch the ACL from the filesystem. The inode operation is responsible for updating the cached acl with set_cached_acl(). This is done without locking at the VFS level, so another task can call set_cached_acl() or forget_cached_acl() before the get_acl inode operation gets to calling set_cached_acl(), and then get_acl's call to set_cached_acl() results in caching an outdate ACL. Prevent this from happening by setting the cached ACL pointer to a task-specific sentinel value before calling the get_acl inode operation. Move the responsibility for updating the cached ACL from the get_acl inode operations to get_acl(). There, only set the cached ACL if the sentinel value hasn't changed. The sentinel values are chosen to have odd values. Likewise, the value of ACL_NOT_CACHED is odd. In contrast, ACL object pointers always have an even value (ACLs are aligned in memory). This allows to distinguish uncached ACLs values from ACL objects. In addition, switch from guarding inode->i_acl and inode->i_default_acl upates by the inode->i_lock spinlock to using xchg() and cmpxchg(). Filesystems that do not want ACLs returned from their get_acl inode operations to be cached must call forget_cached_acl() to prevent the VFS from doing so. (Patch written by Al Viro and Andreas Gruenbacher.) Signed-off-by: Andreas Gruenbacher Signed-off-by: Al Viro diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 9da967f..2d94e94 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -93,7 +93,7 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type) * instantiating the inode (v9fs_inode_from_fid) */ acl = get_cached_acl(inode, type); - BUG_ON(acl == ACL_NOT_CACHED); + BUG_ON(is_uncached_acl(acl)); return acl; } diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6d263bb..67a6077 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -63,9 +63,6 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type) } kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - return acl; } diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index f197084..5457f216 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -37,6 +37,8 @@ static inline void ceph_set_cached_acl(struct inode *inode, spin_lock(&ci->i_ceph_lock); if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0)) set_cached_acl(inode, type, acl); + else + forget_cached_acl(inode, type); spin_unlock(&ci->i_ceph_lock); } diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 27695e6..42f1d18 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -172,9 +172,6 @@ ext2_get_acl(struct inode *inode, int type) acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - return acl; } diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 69b1e73..c6601a4 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -172,9 +172,6 @@ ext4_get_acl(struct inode *inode, int type) acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - return acl; } diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index c8f25f7..6f1fdda 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -190,9 +190,6 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, acl = ERR_PTR(retval); kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - return acl; } diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c index afb33ed..ab7ea25 100644 --- a/fs/hfsplus/posix_acl.c +++ b/fs/hfsplus/posix_acl.c @@ -48,9 +48,6 @@ struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type) hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); - return acl; } diff --git a/fs/inode.c b/fs/inode.c index 69b8b52..4202aac 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -238,9 +238,9 @@ void __destroy_inode(struct inode *inode) } #ifdef CONFIG_FS_POSIX_ACL - if (inode->i_acl && inode->i_acl != ACL_NOT_CACHED) + if (inode->i_acl && !is_uncached_acl(inode->i_acl)) posix_acl_release(inode->i_acl); - if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) + if (inode->i_default_acl && !is_uncached_acl(inode->i_default_acl)) posix_acl_release(inode->i_default_acl); #endif this_cpu_dec(nr_inodes); diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 2f7a3c0..bc2693d 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -203,8 +203,6 @@ struct posix_acl *jffs2_get_acl(struct inode *inode, int type) acl = ERR_PTR(rc); } kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); return acl; } diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index ab48828..21fa92b 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -63,8 +63,6 @@ struct posix_acl *jfs_get_acl(struct inode *inode, int type) acl = posix_acl_from_xattr(&init_user_ns, value, size); } kfree(value); - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); return acl; } diff --git a/fs/namei.c b/fs/namei.c index 794f81d..3498d53 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -265,7 +265,7 @@ static int check_acl(struct inode *inode, int mask) if (!acl) return -EAGAIN; /* no ->get_acl() calls in RCU mode... */ - if (acl == ACL_NOT_CACHED) + if (is_uncached_acl(acl)) return -ECHILD; return posix_acl_permission(inode, acl, mask & ~MAY_NOT_BLOCK); } diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 17c0fa1..720d92f5 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -11,6 +11,38 @@ #define NFSDBG_FACILITY NFSDBG_PROC +/* + * nfs3_prepare_get_acl, nfs3_complete_get_acl, nfs3_abort_get_acl: Helpers for + * caching get_acl results in a race-free way. See fs/posix_acl.c:get_acl() + * for explanations. + */ +static void nfs3_prepare_get_acl(struct posix_acl **p) +{ + struct posix_acl *sentinel = uncached_acl_sentinel(current); + + if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED) { + /* Not the first reader or sentinel already in place. */ + } +} + +static void nfs3_complete_get_acl(struct posix_acl **p, struct posix_acl *acl) +{ + struct posix_acl *sentinel = uncached_acl_sentinel(current); + + /* Only cache the ACL if our sentinel is still in place. */ + posix_acl_dup(acl); + if (cmpxchg(p, sentinel, acl) != sentinel) + posix_acl_release(acl); +} + +static void nfs3_abort_get_acl(struct posix_acl **p) +{ + struct posix_acl *sentinel = uncached_acl_sentinel(current); + + /* Remove our sentinel upon failure. */ + cmpxchg(p, sentinel, ACL_NOT_CACHED); +} + struct posix_acl *nfs3_get_acl(struct inode *inode, int type) { struct nfs_server *server = NFS_SERVER(inode); @@ -55,6 +87,11 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type) if (res.fattr == NULL) return ERR_PTR(-ENOMEM); + if (args.mask & NFS_ACL) + nfs3_prepare_get_acl(&inode->i_acl); + if (args.mask & NFS_DFACL) + nfs3_prepare_get_acl(&inode->i_default_acl); + status = rpc_call_sync(server->client_acl, &msg, 0); dprintk("NFS reply getacl: %d\n", status); @@ -89,12 +126,12 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type) } if (res.mask & NFS_ACL) - set_cached_acl(inode, ACL_TYPE_ACCESS, res.acl_access); + nfs3_complete_get_acl(&inode->i_acl, res.acl_access); else forget_cached_acl(inode, ACL_TYPE_ACCESS); if (res.mask & NFS_DFACL) - set_cached_acl(inode, ACL_TYPE_DEFAULT, res.acl_default); + nfs3_complete_get_acl(&inode->i_default_acl, res.acl_default); else forget_cached_acl(inode, ACL_TYPE_DEFAULT); @@ -108,6 +145,8 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type) } getout: + nfs3_abort_get_acl(&inode->i_acl); + nfs3_abort_get_acl(&inode->i_default_acl); posix_acl_release(res.acl_access); posix_acl_release(res.acl_default); nfs_free_fattr(res.fattr); diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 474e57f..1eaa910 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -54,6 +54,7 @@ #include "uptodate.h" #include "quota.h" #include "refcounttree.h" +#include "acl.h" #include "buffer_head_io.h" @@ -3623,6 +3624,8 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, filemap_fdatawait(mapping); } + forget_all_cached_acls(inode); + out: return UNBLOCK_CONTINUE; } diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 711dd51..bc6736d 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -37,14 +37,18 @@ EXPORT_SYMBOL(acl_by_type); struct posix_acl *get_cached_acl(struct inode *inode, int type) { struct posix_acl **p = acl_by_type(inode, type); - struct posix_acl *acl = ACCESS_ONCE(*p); - if (acl) { - spin_lock(&inode->i_lock); - acl = *p; - if (acl != ACL_NOT_CACHED) - acl = posix_acl_dup(acl); - spin_unlock(&inode->i_lock); + struct posix_acl *acl; + + for (;;) { + rcu_read_lock(); + acl = rcu_dereference(*p); + if (!acl || is_uncached_acl(acl) || + atomic_inc_not_zero(&acl->a_refcount)) + break; + rcu_read_unlock(); + cpu_relax(); } + rcu_read_unlock(); return acl; } EXPORT_SYMBOL(get_cached_acl); @@ -59,58 +63,72 @@ void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl) { struct posix_acl **p = acl_by_type(inode, type); struct posix_acl *old; - spin_lock(&inode->i_lock); - old = *p; - rcu_assign_pointer(*p, posix_acl_dup(acl)); - spin_unlock(&inode->i_lock); - if (old != ACL_NOT_CACHED) + + old = xchg(p, posix_acl_dup(acl)); + if (!is_uncached_acl(old)) posix_acl_release(old); } EXPORT_SYMBOL(set_cached_acl); -void forget_cached_acl(struct inode *inode, int type) +static void __forget_cached_acl(struct posix_acl **p) { - struct posix_acl **p = acl_by_type(inode, type); struct posix_acl *old; - spin_lock(&inode->i_lock); - old = *p; - *p = ACL_NOT_CACHED; - spin_unlock(&inode->i_lock); - if (old != ACL_NOT_CACHED) + + old = xchg(p, ACL_NOT_CACHED); + if (!is_uncached_acl(old)) posix_acl_release(old); } + +void forget_cached_acl(struct inode *inode, int type) +{ + __forget_cached_acl(acl_by_type(inode, type)); +} EXPORT_SYMBOL(forget_cached_acl); void forget_all_cached_acls(struct inode *inode) { - struct posix_acl *old_access, *old_default; - spin_lock(&inode->i_lock); - old_access = inode->i_acl; - old_default = inode->i_default_acl; - inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED; - spin_unlock(&inode->i_lock); - if (old_access != ACL_NOT_CACHED) - posix_acl_release(old_access); - if (old_default != ACL_NOT_CACHED) - posix_acl_release(old_default); + __forget_cached_acl(&inode->i_acl); + __forget_cached_acl(&inode->i_default_acl); } EXPORT_SYMBOL(forget_all_cached_acls); struct posix_acl *get_acl(struct inode *inode, int type) { + void *sentinel; + struct posix_acl **p; struct posix_acl *acl; + /* + * The sentinel is used to detect when another operation like + * set_cached_acl() or forget_cached_acl() races with get_acl(). + * It is guaranteed that is_uncached_acl(sentinel) is true. + */ + acl = get_cached_acl(inode, type); - if (acl != ACL_NOT_CACHED) + if (!is_uncached_acl(acl)) return acl; if (!IS_POSIXACL(inode)) return NULL; + sentinel = uncached_acl_sentinel(current); + p = acl_by_type(inode, type); + /* - * A filesystem can force a ACL callback by just never filling the - * ACL cache. But normally you'd fill the cache either at inode - * instantiation time, or on the first ->get_acl call. + * If the ACL isn't being read yet, set our sentinel. Otherwise, the + * current value of the ACL will not be ACL_NOT_CACHED and so our own + * sentinel will not be set; another task will update the cache. We + * could wait for that other task to complete its job, but it's easier + * to just call ->get_acl to fetch the ACL ourself. (This is going to + * be an unlikely race.) + */ + if (cmpxchg(p, ACL_NOT_CACHED, sentinel) != ACL_NOT_CACHED) + /* fall through */ ; + + /* + * Normally, the ACL returned by ->get_acl will be cached. + * A filesystem can prevent that by calling + * forget_cached_acl(inode, type) in ->get_acl. * * If the filesystem doesn't have a get_acl() function at all, we'll * just create the negative cache entry. @@ -119,7 +137,24 @@ struct posix_acl *get_acl(struct inode *inode, int type) set_cached_acl(inode, type, NULL); return NULL; } - return inode->i_op->get_acl(inode, type); + acl = inode->i_op->get_acl(inode, type); + + if (IS_ERR(acl)) { + /* + * Remove our sentinel so that we don't block future attempts + * to cache the ACL. + */ + cmpxchg(p, sentinel, ACL_NOT_CACHED); + return acl; + } + + /* + * Cache the result, but only if our sentinel is still in place. + */ + posix_acl_dup(acl); + if (unlikely(cmpxchg(p, sentinel, acl) != sentinel)) + posix_acl_release(acl); + return acl; } EXPORT_SYMBOL(get_acl); diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index ec74bbe..dbed42f 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -197,10 +197,8 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) size = reiserfs_xattr_get(inode, name, NULL, 0); if (size < 0) { - if (size == -ENODATA || size == -ENOSYS) { - set_cached_acl(inode, type, NULL); + if (size == -ENODATA || size == -ENOSYS) return NULL; - } return ERR_PTR(size); } @@ -220,8 +218,6 @@ struct posix_acl *reiserfs_get_acl(struct inode *inode, int type) } else { acl = reiserfs_posix_acl_from_disk(value, retval); } - if (!IS_ERR(acl)) - set_cached_acl(inode, type, acl); kfree(value); return acl; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 2d5df1f..b6e527b 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -158,22 +158,14 @@ xfs_get_acl(struct inode *inode, int type) if (error) { /* * If the attribute doesn't exist make sure we have a negative - * cache entry, for any other error assume it is transient and - * leave the cache entry as ACL_NOT_CACHED. + * cache entry, for any other error assume it is transient. */ - if (error == -ENOATTR) - goto out_update_cache; - acl = ERR_PTR(error); - goto out; + if (error != -ENOATTR) + acl = ERR_PTR(error); + } else { + acl = xfs_acl_from_disk(xfs_acl, len, + XFS_ACL_MAX_ENTRIES(ip->i_mount)); } - - acl = xfs_acl_from_disk(xfs_acl, len, XFS_ACL_MAX_ENTRIES(ip->i_mount)); - if (IS_ERR(acl)) - goto out; - -out_update_cache: - set_cached_acl(inode, type, acl); -out: kmem_free(xfs_acl); return acl; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 14a9719..329ed37 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -577,6 +577,18 @@ static inline void mapping_allow_writable(struct address_space *mapping) struct posix_acl; #define ACL_NOT_CACHED ((void *)(-1)) +static inline struct posix_acl * +uncached_acl_sentinel(struct task_struct *task) +{ + return (void *)task + 1; +} + +static inline bool +is_uncached_acl(struct posix_acl *acl) +{ + return (long)acl & 1; +} + #define IOP_FASTPERM 0x0001 #define IOP_LOOKUP 0x0002 #define IOP_NOFOLLOW 0x0004 -- cgit v0.10.2 From 04c57f4501909b60353031cfe5b991751d745658 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Thu, 24 Mar 2016 14:38:38 +0100 Subject: posix_acl: Unexport acl_by_type and make it static acl_by_type(inode, type) returns a pointer to either inode->i_acl or inode->i_default_acl depending on type. This is useful in fs/posix_acl.c, but should never have been visible outside that file. Signed-off-by: Andreas Gruenbacher Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro diff --git a/fs/posix_acl.c b/fs/posix_acl.c index bc6736d..db1fb0f 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -21,7 +21,7 @@ #include #include -struct posix_acl **acl_by_type(struct inode *inode, int type) +static struct posix_acl **acl_by_type(struct inode *inode, int type) { switch (type) { case ACL_TYPE_ACCESS: @@ -32,7 +32,6 @@ struct posix_acl **acl_by_type(struct inode *inode, int type) BUG(); } } -EXPORT_SYMBOL(acl_by_type); struct posix_acl *get_cached_acl(struct inode *inode, int type) { diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 3e96a6a..5b5a80c 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -99,7 +99,6 @@ extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **, extern int simple_set_acl(struct inode *, struct posix_acl *, int); extern int simple_acl_create(struct inode *, struct inode *); -struct posix_acl **acl_by_type(struct inode *inode, int type); struct posix_acl *get_cached_acl(struct inode *inode, int type); struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type); void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl); -- cgit v0.10.2 From fc64005c93090c052637f63578d810b037abb1a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Apr 2016 01:33:30 -0400 Subject: don't bother with ->d_inode->i_sb - it's always equal to ->d_sb ... and neither can ever be NULL Signed-off-by: Al Viro diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 3a08b3e..f4645c5 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1071,7 +1071,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, if (IS_ERR(st)) return PTR_ERR(st); - v9fs_stat2inode(st, d_inode(dentry), d_inode(dentry)->i_sb); + v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb); generic_fillattr(d_inode(dentry), stat); p9stat_free(st); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 24d03c7..a82e20e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4851,7 +4851,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, goto out; if (!S_ISDIR(inode->i_mode)) { - if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb) + if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) goto out; inode = d_inode(parent); } @@ -4872,7 +4872,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, break; } - if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb) + if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) break; if (IS_ROOT(parent)) @@ -5285,7 +5285,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, } while (1) { - if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb) + if (!parent || d_really_is_negative(parent) || sb != parent->d_sb) break; inode = d_inode(parent); diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index e956cba..94f2c8a 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -302,7 +302,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) if (full_path == NULL) goto cdda_exit; - cifs_sb = CIFS_SB(d_inode(mntpt)->i_sb); + cifs_sb = CIFS_SB(mntpt->d_sb); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { mnt = ERR_CAST(tlink); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index aeb26db..4cd4705 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2418,8 +2418,7 @@ cifs_setattr_exit: int cifs_setattr(struct dentry *direntry, struct iattr *attrs) { - struct inode *inode = d_inode(direntry); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb); if (pTcon->unix_ext) diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b30a4a6..4cfb4d9f 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -78,7 +78,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, { struct dentry *dentry, *alias; struct inode *inode; - struct super_block *sb = d_inode(parent)->i_sb; + struct super_block *sb = parent->d_sb; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); cifs_dbg(FYI, "%s: for %s\n", __func__, name->name); diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index f5dc2f0..612de93 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -52,9 +52,7 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) return -EIO; if (d_really_is_negative(direntry)) return -EIO; - sb = d_inode(direntry)->i_sb; - if (sb == NULL) - return -EIO; + sb = direntry->d_sb; cifs_sb = CIFS_SB(sb); tlink = cifs_sb_tlink(cifs_sb); @@ -113,9 +111,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, return -EIO; if (d_really_is_negative(direntry)) return -EIO; - sb = d_inode(direntry)->i_sb; - if (sb == NULL) - return -EIO; + sb = direntry->d_sb; cifs_sb = CIFS_SB(sb); tlink = cifs_sb_tlink(cifs_sb); @@ -248,9 +244,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, return -EIO; if (d_really_is_negative(direntry)) return -EIO; - sb = d_inode(direntry)->i_sb; - if (sb == NULL) - return -EIO; + sb = direntry->d_sb; cifs_sb = CIFS_SB(sb); tlink = cifs_sb_tlink(cifs_sb); @@ -384,9 +378,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) return -EIO; if (d_really_is_negative(direntry)) return -EIO; - sb = d_inode(direntry)->i_sb; - if (sb == NULL) - return -EIO; + sb = direntry->d_sb; cifs_sb = CIFS_SB(sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 40ba9cc..d34a40e 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -113,7 +113,7 @@ struct dentry *efs_get_parent(struct dentry *child) ino = efs_find_entry(d_inode(child), "..", 2); if (ino) - parent = d_obtain_alias(efs_iget(d_inode(child)->i_sb, ino)); + parent = d_obtain_alias(efs_iget(child->d_sb, ino)); return parent; } diff --git a/fs/exofs/super.c b/fs/exofs/super.c index 6658a50..1923736 100644 --- a/fs/exofs/super.c +++ b/fs/exofs/super.c @@ -958,7 +958,7 @@ static struct dentry *exofs_get_parent(struct dentry *child) if (!ino) return ERR_PTR(-ESTALE); - return d_obtain_alias(exofs_iget(d_inode(child)->i_sb, ino)); + return d_obtain_alias(exofs_iget(child->d_sb, ino)); } static struct inode *exofs_nfs_get_inode(struct super_block *sb, diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 7a2be8f..1a7eb49 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -82,7 +82,7 @@ struct dentry *ext2_get_parent(struct dentry *child) unsigned long ino = ext2_inode_by_name(d_inode(child), &dotdot); if (!ino) return ERR_PTR(-ENOENT); - return d_obtain_alias(ext2_iget(d_inode(child)->i_sb, ino)); + return d_obtain_alias(ext2_iget(child->d_sb, ino)); } /* diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 48e4b89..5611ec9 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1638,13 +1638,13 @@ struct dentry *ext4_get_parent(struct dentry *child) ino = le32_to_cpu(de->inode); brelse(bh); - if (!ext4_valid_inum(d_inode(child)->i_sb, ino)) { + if (!ext4_valid_inum(child->d_sb, ino)) { EXT4_ERROR_INODE(d_inode(child), "bad parent inode number: %u", ino); return ERR_PTR(-EFSCORRUPTED); } - return d_obtain_alias(ext4_iget_normal(d_inode(child)->i_sb, ino)); + return d_obtain_alias(ext4_iget_normal(child->d_sb, ino)); } /* diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 7876f10..db874ad 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -202,7 +202,7 @@ struct dentry *f2fs_get_parent(struct dentry *child) unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot); if (!ino) return ERR_PTR(-ENOENT); - return d_obtain_alias(f2fs_iget(d_inode(child)->i_sb, ino)); + return d_obtain_alias(f2fs_iget(child->d_sb, ino)); } static int __recover_dot_dentries(struct inode *dir, nid_t pino) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 49b0bff..c09c63d 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1360,7 +1360,7 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, return ERR_PTR(error); } s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags, - d_inode(path.dentry)->i_sb->s_bdev); + path.dentry->d_sb->s_bdev); path_put(&path); if (IS_ERR(s)) { pr_warn("gfs2 mount does not exist\n"); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index f8a0cd8..9b2ff353 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1176,7 +1176,7 @@ static int gfs2_statfs_i(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *s static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf) { - struct super_block *sb = d_inode(dentry)->i_sb; + struct super_block *sb = dentry->d_sb; struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_statfs_change_host sc; int error; diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 30c4c9e..7a9579e 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -241,7 +241,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry) static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct dentry *dentry) { - struct jffs2_sb_info *c = JFFS2_SB_INFO(d_inode(old_dentry)->i_sb); + struct jffs2_sb_info *c = JFFS2_SB_INFO(old_dentry->d_sb); struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(old_dentry)); struct jffs2_inode_info *dir_f = JFFS2_INODE_INFO(dir_i); int ret; diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 0a9a114..5ef21f4 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -147,7 +147,7 @@ static struct dentry *jffs2_get_parent(struct dentry *child) JFFS2_DEBUG("Parent of directory ino #%u is #%u\n", f->inocache->ino, pino); - return d_obtain_alias(jffs2_iget(d_inode(child)->i_sb, pino)); + return d_obtain_alias(jffs2_iget(child->d_sb, pino)); } static const struct export_operations jffs2_export_ops = { diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 701f893..8a40941 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1524,7 +1524,7 @@ struct dentry *jfs_get_parent(struct dentry *dentry) parent_ino = le32_to_cpu(JFS_IP(d_inode(dentry))->i_dtroot.header.idotdot); - return d_obtain_alias(jfs_iget(d_inode(dentry)->i_sb, parent_ino)); + return d_obtain_alias(jfs_iget(dentry->d_sb, parent_ino)); } const struct inode_operations jfs_dir_inode_operations = { diff --git a/fs/namei.c b/fs/namei.c index 3498d53..c0d551f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2653,7 +2653,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) return NULL; } - mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex); + mutex_lock(&p1->d_sb->s_vfs_rename_mutex); p = d_ancestor(p2, p1); if (p) { @@ -2680,7 +2680,7 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) inode_unlock(p1->d_inode); if (p1 != p2) { inode_unlock(p2->d_inode); - mutex_unlock(&p1->d_inode->i_sb->s_vfs_rename_mutex); + mutex_unlock(&p1->d_sb->s_vfs_rename_mutex); } } EXPORT_SYMBOL(unlock_rename); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 7a0cfd3..e5daa93 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -396,7 +396,7 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write) static void nfs_direct_readpage_release(struct nfs_page *req) { dprintk("NFS: direct read done (%s/%llu %d@%lld)\n", - d_inode(req->wb_context->dentry)->i_sb->s_id, + req->wb_context->dentry->d_sb->s_id, (unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)), req->wb_bytes, (long long)req_offset(req)); diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 51c3b06..d818e4f 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -552,7 +552,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, * different read/write sizes for file systems known to have * problems with large blocks */ if (nfserr == 0) { - struct super_block *sb = d_inode(argp->fh.fh_dentry)->i_sb; + struct super_block *sb = argp->fh.fh_dentry->d_sb; /* Note that we don't care for remote fs's here */ if (sb->s_magic == MSDOS_SUPER_MAGIC) { @@ -588,7 +588,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); if (nfserr == 0) { - struct super_block *sb = d_inode(argp->fh.fh_dentry)->i_sb; + struct super_block *sb = argp->fh.fh_dentry->d_sb; /* Note that we don't care for remote fs's here */ switch (sb->s_magic) { diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 2246454..93d5853 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -146,7 +146,7 @@ static __be32 *encode_fsid(__be32 *p, struct svc_fh *fhp) default: case FSIDSOURCE_DEV: p = xdr_encode_hyper(p, (u64)huge_encode_dev - (d_inode(fhp->fh_dentry)->i_sb->s_dev)); + (fhp->fh_dentry->d_sb->s_dev)); break; case FSIDSOURCE_FSID: p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid); diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index c1681ce..a891944 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -426,7 +426,7 @@ static bool is_root_export(struct svc_export *exp) static struct super_block *exp_sb(struct svc_export *exp) { - return d_inode(exp->ex_path.dentry)->i_sb; + return exp->ex_path.dentry->d_sb; } static bool fsid_type_ok_for_exp(u8 fsid_type, struct svc_export *exp) diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 7ccdb96..38d67f3 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -457,7 +457,7 @@ static struct dentry *nilfs_get_parent(struct dentry *child) root = NILFS_I(d_inode(child))->i_root; - inode = nilfs_iget(d_inode(child)->i_sb, root, ino); + inode = nilfs_iget(child->d_sb, root, ino); if (IS_ERR(inode)) return ERR_CAST(inode); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index c18ab45..c6fdcbd 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1290,7 +1290,7 @@ int ocfs2_getattr(struct vfsmount *mnt, struct kstat *stat) { struct inode *inode = d_inode(dentry); - struct super_block *sb = d_inode(dentry)->i_sb; + struct super_block *sb = dentry->d_sb; struct ocfs2_super *osb = sb->s_fs_info; int err; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index a2ba11e..c3e5c96 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1250,7 +1250,7 @@ static struct dentry *udf_get_parent(struct dentry *child) brelse(fibh.sbh); tloc = lelb_to_cpu(cfi.icb.extLocation); - inode = udf_iget(d_inode(child)->i_sb, &tloc); + inode = udf_iget(child->d_sb, &tloc); if (IS_ERR(inode)) return ERR_CAST(inode); diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 442fd52..f04ab23 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -132,7 +132,7 @@ static struct dentry *ufs_get_parent(struct dentry *child) ino = ufs_inode_by_name(d_inode(child), &dot_dot); if (!ino) return ERR_PTR(-ENOENT); - return d_obtain_alias(ufs_iget(d_inode(child)->i_sb, ino)); + return d_obtain_alias(ufs_iget(child->d_sb, ino)); } static const struct export_operations ufs_export_ops = { diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 4e4b2fa..09c71e9 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -872,7 +872,7 @@ TRACE_EVENT(ext4_sync_file_enter, TP_fast_assign( struct dentry *dentry = file->f_path.dentry; - __entry->dev = d_inode(dentry)->i_sb->s_dev; + __entry->dev = dentry->d_sb->s_dev; __entry->ino = d_inode(dentry)->i_ino; __entry->datasync = datasync; __entry->parent = d_inode(dentry->d_parent)->i_ino; @@ -1451,7 +1451,7 @@ TRACE_EVENT(ext4_unlink_enter, ), TP_fast_assign( - __entry->dev = d_inode(dentry)->i_sb->s_dev; + __entry->dev = dentry->d_sb->s_dev; __entry->ino = d_inode(dentry)->i_ino; __entry->parent = parent->i_ino; __entry->size = d_inode(dentry)->i_size; @@ -1475,7 +1475,7 @@ TRACE_EVENT(ext4_unlink_exit, ), TP_fast_assign( - __entry->dev = d_inode(dentry)->i_sb->s_dev; + __entry->dev = dentry->d_sb->s_dev; __entry->ino = d_inode(dentry)->i_ino; __entry->ret = ret; ), diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 3cf1c59..d6709eb 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -367,7 +367,7 @@ static int audit_get_nd(struct audit_watch *watch, struct path *parent) inode_unlock(d_backing_inode(parent->dentry)); if (d_is_positive(d)) { /* update watch filter fields */ - watch->dev = d_backing_inode(d)->i_sb->s_dev; + watch->dev = d->d_sb->s_dev; watch->ino = d_backing_inode(d)->i_ino; } dput(d); diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index e6ea9d4..84c6d11 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -299,8 +299,8 @@ static int evm_protect_xattr(struct dentry *dentry, const char *xattr_name, return 0; /* exception for pseudo filesystems */ - if (dentry->d_inode->i_sb->s_magic == TMPFS_MAGIC - || dentry->d_inode->i_sb->s_magic == SYSFS_MAGIC) + if (dentry->d_sb->s_magic == TMPFS_MAGIC + || dentry->d_sb->s_magic == SYSFS_MAGIC) return 0; integrity_audit_msg(AUDIT_INTEGRITY_METADATA, diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 912deee..889cd59 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1316,7 +1316,7 @@ static int selinux_genfs_get_sid(struct dentry *dentry, u32 *sid) { int rc; - struct super_block *sb = dentry->d_inode->i_sb; + struct super_block *sb = dentry->d_sb; char *buffer, *path; buffer = (char *)__get_free_page(GFP_KERNEL); diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 11f7901..50bcca2 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1444,7 +1444,7 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name) * XATTR_NAME_SMACKIPOUT */ if (strcmp(name, XATTR_NAME_SMACK) == 0) { - struct super_block *sbp = d_backing_inode(dentry)->i_sb; + struct super_block *sbp = dentry->d_sb; struct superblock_smack *sbsp = sbp->s_security; isp->smk_inode = sbsp->smk_default; -- cgit v0.10.2 From 5fdccfef483d088fcc533b282bf6953579d6d6f4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Apr 2016 17:07:33 -0400 Subject: cifs: kill more bogus checks in ->...xattr() methods none of that stuff can ever be called for NULL or negative dentry. Signed-off-by: Al Viro diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 612de93..159547c 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -42,19 +42,11 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) int rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR unsigned int xid; - struct cifs_sb_info *cifs_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); struct tcon_link *tlink; struct cifs_tcon *pTcon; - struct super_block *sb; char *full_path = NULL; - if (direntry == NULL) - return -EIO; - if (d_really_is_negative(direntry)) - return -EIO; - sb = direntry->d_sb; - - cifs_sb = CIFS_SB(sb); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); @@ -101,19 +93,12 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, int rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR unsigned int xid; - struct cifs_sb_info *cifs_sb; + struct super_block *sb = direntry->d_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct tcon_link *tlink; struct cifs_tcon *pTcon; - struct super_block *sb; char *full_path; - if (direntry == NULL) - return -EIO; - if (d_really_is_negative(direntry)) - return -EIO; - sb = direntry->d_sb; - - cifs_sb = CIFS_SB(sb); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); @@ -234,19 +219,12 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, ssize_t rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR unsigned int xid; - struct cifs_sb_info *cifs_sb; + struct super_block *sb = direntry->d_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); struct tcon_link *tlink; struct cifs_tcon *pTcon; - struct super_block *sb; char *full_path; - if (direntry == NULL) - return -EIO; - if (d_really_is_negative(direntry)) - return -EIO; - sb = direntry->d_sb; - - cifs_sb = CIFS_SB(sb); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); @@ -368,19 +346,11 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) ssize_t rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR unsigned int xid; - struct cifs_sb_info *cifs_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); struct tcon_link *tlink; struct cifs_tcon *pTcon; - struct super_block *sb; char *full_path; - if (direntry == NULL) - return -EIO; - if (d_really_is_negative(direntry)) - return -EIO; - sb = direntry->d_sb; - - cifs_sb = CIFS_SB(sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) return -EOPNOTSUPP; -- cgit v0.10.2 From 79a628d14ec7ee9adfdc3ce04343d5ff7ec20c18 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Apr 2016 18:50:48 -0400 Subject: reiserfs: switch to generic_{get,set,remove}xattr() reiserfs_xattr_[sg]et() will fail with -EOPNOTSUPP for V1 inodes anyway, and all reiserfs instances of ->[sg]et() call it and so does ->set_acl(). Checks for name length in the instances had been bogus; they should've been "bugger off if it's _exactly_ the prefix" (as generic would do on its own) and not "bugger off if it's shorter than the prefix" - that can't happen. xattr_full_name() is needed to adjust for the fact that generic instances will skip the prefix in the name passed to ->[gs]et(); reiserfs homegrown analogues didn't. Signed-off-by: Al Viro diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 9424a4b..9ffaf71 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -260,10 +260,10 @@ const struct file_operations reiserfs_file_operations = { const struct inode_operations reiserfs_file_inode_operations = { .setattr = reiserfs_setattr, - .setxattr = reiserfs_setxattr, - .getxattr = reiserfs_getxattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, .listxattr = reiserfs_listxattr, - .removexattr = reiserfs_removexattr, + .removexattr = generic_removexattr, .permission = reiserfs_permission, .get_acl = reiserfs_get_acl, .set_acl = reiserfs_set_acl, diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 2a12d46..8a36696 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -1650,10 +1650,10 @@ const struct inode_operations reiserfs_dir_inode_operations = { .mknod = reiserfs_mknod, .rename = reiserfs_rename, .setattr = reiserfs_setattr, - .setxattr = reiserfs_setxattr, - .getxattr = reiserfs_getxattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, .listxattr = reiserfs_listxattr, - .removexattr = reiserfs_removexattr, + .removexattr = generic_removexattr, .permission = reiserfs_permission, .get_acl = reiserfs_get_acl, .set_acl = reiserfs_set_acl, @@ -1667,10 +1667,10 @@ const struct inode_operations reiserfs_symlink_inode_operations = { .readlink = generic_readlink, .get_link = page_get_link, .setattr = reiserfs_setattr, - .setxattr = reiserfs_setxattr, - .getxattr = reiserfs_getxattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, .listxattr = reiserfs_listxattr, - .removexattr = reiserfs_removexattr, + .removexattr = generic_removexattr, .permission = reiserfs_permission, }; @@ -1679,10 +1679,10 @@ const struct inode_operations reiserfs_symlink_inode_operations = { */ const struct inode_operations reiserfs_special_inode_operations = { .setattr = reiserfs_setattr, - .setxattr = reiserfs_setxattr, - .getxattr = reiserfs_getxattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, .listxattr = reiserfs_listxattr, - .removexattr = reiserfs_removexattr, + .removexattr = generic_removexattr, .permission = reiserfs_permission, .get_acl = reiserfs_get_acl, .set_acl = reiserfs_set_acl, diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 57e0b23..02137bb 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -764,60 +764,6 @@ find_xattr_handler_prefix(const struct xattr_handler **handlers, return xah; } - -/* - * Inode operation getxattr() - */ -ssize_t -reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer, - size_t size) -{ - const struct xattr_handler *handler; - - handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); - - if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) - return -EOPNOTSUPP; - - return handler->get(handler, dentry, name, buffer, size); -} - -/* - * Inode operation setxattr() - * - * d_inode(dentry)->i_mutex down - */ -int -reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value, - size_t size, int flags) -{ - const struct xattr_handler *handler; - - handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); - - if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) - return -EOPNOTSUPP; - - return handler->set(handler, dentry, name, value, size, flags); -} - -/* - * Inode operation removexattr() - * - * d_inode(dentry)->i_mutex down - */ -int reiserfs_removexattr(struct dentry *dentry, const char *name) -{ - const struct xattr_handler *handler; - - handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name); - - if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1) - return -EOPNOTSUPP; - - return handler->set(handler, dentry, name, NULL, 0, XATTR_REPLACE); -} - struct listxattr_buf { struct dir_context ctx; size_t size; diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h index 15dde62..613ff5a 100644 --- a/fs/reiserfs/xattr.h +++ b/fs/reiserfs/xattr.h @@ -2,6 +2,7 @@ #include #include #include +#include struct inode; struct dentry; @@ -18,12 +19,7 @@ int reiserfs_permission(struct inode *inode, int mask); #ifdef CONFIG_REISERFS_FS_XATTR #define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir) -ssize_t reiserfs_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size); -int reiserfs_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size); -int reiserfs_removexattr(struct dentry *dentry, const char *name); int reiserfs_xattr_get(struct inode *, const char *, void *, size_t); int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int); @@ -92,10 +88,7 @@ static inline void reiserfs_init_xattr_rwsem(struct inode *inode) #else -#define reiserfs_getxattr NULL -#define reiserfs_setxattr NULL #define reiserfs_listxattr NULL -#define reiserfs_removexattr NULL static inline void reiserfs_init_xattr_rwsem(struct inode *inode) { diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index ab0217d..ac7e104 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -12,26 +12,24 @@ static int security_get(const struct xattr_handler *handler, struct dentry *dentry, const char *name, void *buffer, size_t size) { - if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) - return -EINVAL; - if (IS_PRIVATE(d_inode(dentry))) return -EPERM; - return reiserfs_xattr_get(d_inode(dentry), name, buffer, size); + return reiserfs_xattr_get(d_inode(dentry), + xattr_full_name(handler, name), + buffer, size); } static int security_set(const struct xattr_handler *handler, struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags) { - if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX)) - return -EINVAL; - if (IS_PRIVATE(d_inode(dentry))) return -EPERM; - return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags); + return reiserfs_xattr_set(d_inode(dentry), + xattr_full_name(handler, name), + buffer, size, flags); } static bool security_list(struct dentry *dentry) diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c index 64b67aa..cc248a5 100644 --- a/fs/reiserfs/xattr_trusted.c +++ b/fs/reiserfs/xattr_trusted.c @@ -11,26 +11,24 @@ static int trusted_get(const struct xattr_handler *handler, struct dentry *dentry, const char *name, void *buffer, size_t size) { - if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) - return -EINVAL; - if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry))) return -EPERM; - return reiserfs_xattr_get(d_inode(dentry), name, buffer, size); + return reiserfs_xattr_get(d_inode(dentry), + xattr_full_name(handler, name), + buffer, size); } static int trusted_set(const struct xattr_handler *handler, struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags) { - if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX)) - return -EINVAL; - if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry))) return -EPERM; - return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags); + return reiserfs_xattr_set(d_inode(dentry), + xattr_full_name(handler, name), + buffer, size, flags); } static bool trusted_list(struct dentry *dentry) diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index 12e6306..caad583 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c @@ -10,24 +10,22 @@ static int user_get(const struct xattr_handler *handler, struct dentry *dentry, const char *name, void *buffer, size_t size) { - - if (strlen(name) < sizeof(XATTR_USER_PREFIX)) - return -EINVAL; if (!reiserfs_xattrs_user(dentry->d_sb)) return -EOPNOTSUPP; - return reiserfs_xattr_get(d_inode(dentry), name, buffer, size); + return reiserfs_xattr_get(d_inode(dentry), + xattr_full_name(handler, name), + buffer, size); } static int user_set(const struct xattr_handler *handler, struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags) { - if (strlen(name) < sizeof(XATTR_USER_PREFIX)) - return -EINVAL; - if (!reiserfs_xattrs_user(dentry->d_sb)) return -EOPNOTSUPP; - return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags); + return reiserfs_xattr_set(d_inode(dentry), + xattr_full_name(handler, name), + buffer, size, flags); } static bool user_list(struct dentry *dentry) -- cgit v0.10.2 From b296821a7c42fa58baa17513b2b7b30ae66f3336 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 10 Apr 2016 20:48:24 -0400 Subject: xattr_handler: pass dentry and inode as separate arguments of ->get() ... and do not assume they are already attached to each other Signed-off-by: Al Viro diff --git a/fs/9p/acl.c b/fs/9p/acl.c index 2d94e94..eb3589e 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -213,8 +213,8 @@ int v9fs_acl_mode(struct inode *dir, umode_t *modep, } static int v9fs_xattr_get_acl(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { struct v9fs_session_info *v9ses; struct posix_acl *acl; @@ -227,7 +227,7 @@ static int v9fs_xattr_get_acl(const struct xattr_handler *handler, if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) return v9fs_xattr_get(dentry, handler->name, buffer, size); - acl = v9fs_get_cached_acl(d_inode(dentry), handler->flags); + acl = v9fs_get_cached_acl(inode, handler->flags); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl == NULL) diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c index 9dd9b47..18c62ba 100644 --- a/fs/9p/xattr.c +++ b/fs/9p/xattr.c @@ -138,8 +138,8 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) } static int v9fs_xattr_handler_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { const char *full_name = xattr_full_name(handler, name); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 145d2b8..03224b0 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -369,11 +369,9 @@ err: } static int btrfs_xattr_handler_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct inode *inode = d_inode(dentry); - name = xattr_full_name(handler, name); return __btrfs_getxattr(inode, name, buffer, size); } diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index ba97f24..7fd3b86 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -9,10 +9,10 @@ static int ext2_xattr_security_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_SECURITY, name, + return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name, buffer, size); } diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 2c94d19..0f85705 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -16,10 +16,10 @@ ext2_xattr_trusted_list(struct dentry *dentry) static int ext2_xattr_trusted_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_TRUSTED, name, + return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name, buffer, size); } diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index 72a2a96..1fafd27 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -18,12 +18,12 @@ ext2_xattr_user_list(struct dentry *dentry) static int ext2_xattr_user_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - if (!test_opt(dentry->d_sb, XATTR_USER)) + if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext2_xattr_get(d_inode(dentry), EXT2_XATTR_INDEX_USER, + return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size); } diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 3e81bdc..123a7d0 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -13,10 +13,10 @@ static int ext4_xattr_security_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_SECURITY, + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name, buffer, size); } diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index 2a3c6f9..60652fa 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -20,10 +20,10 @@ ext4_xattr_trusted_list(struct dentry *dentry) static int ext4_xattr_trusted_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, void *buffer, - size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_TRUSTED, + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name, buffer, size); } diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index d152f43..17a446f 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -19,12 +19,12 @@ ext4_xattr_user_list(struct dentry *dentry) static int ext4_xattr_user_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - if (!test_opt(dentry->d_sb, XATTR_USER)) + if (!test_opt(inode->i_sb, XATTR_USER)) return -EOPNOTSUPP; - return ext4_xattr_get(d_inode(dentry), EXT4_XATTR_INDEX_USER, + return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size); } diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 06a72dc..17fd2b1 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -26,10 +26,10 @@ #include "xattr.h" static int f2fs_xattr_generic_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, void *buffer, - size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb); + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); switch (handler->flags) { case F2FS_XATTR_INDEX_USER: @@ -45,7 +45,7 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler, default: return -EINVAL; } - return f2fs_getxattr(d_inode(dentry), handler->flags, name, + return f2fs_getxattr(inode, handler->flags, name, buffer, size, NULL); } @@ -86,11 +86,9 @@ static bool f2fs_xattr_trusted_list(struct dentry *dentry) } static int f2fs_xattr_advise_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, void *buffer, - size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct inode *inode = d_inode(dentry); - if (buffer) *((char *)buffer) = F2FS_I(inode)->i_advise; return sizeof(char); diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index e8dfb47..619886b 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -584,10 +584,10 @@ out: * Returns: actual size of data on success, -errno on error */ static int gfs2_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct gfs2_inode *ip = GFS2_I(d_inode(dentry)); + struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_ea_location el; int type = handler->flags; int error; diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c index ab01530..45dc4ae 100644 --- a/fs/hfsplus/xattr.c +++ b/fs/hfsplus/xattr.c @@ -579,7 +579,7 @@ failed_getxattr_init: return res; } -ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, +ssize_t hfsplus_getxattr(struct inode *inode, const char *name, void *value, size_t size, const char *prefix, size_t prefixlen) { @@ -594,7 +594,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, strcpy(xattr_name, prefix); strcpy(xattr_name + prefixlen, name); - res = __hfsplus_getxattr(d_inode(dentry), xattr_name, value, size); + res = __hfsplus_getxattr(inode, xattr_name, value, size); kfree(xattr_name); return res; @@ -844,8 +844,8 @@ end_removexattr: } static int hfsplus_osx_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { /* * Don't allow retrieving properly prefixed attributes @@ -860,7 +860,7 @@ static int hfsplus_osx_getxattr(const struct xattr_handler *handler, * creates), so we pass the name through unmodified (after * ensuring it doesn't conflict with another namespace). */ - return __hfsplus_getxattr(d_inode(dentry), name, buffer, size); + return __hfsplus_getxattr(inode, name, buffer, size); } static int hfsplus_osx_setxattr(const struct xattr_handler *handler, diff --git a/fs/hfsplus/xattr.h b/fs/hfsplus/xattr.h index f9b0955..d04ba6f 100644 --- a/fs/hfsplus/xattr.h +++ b/fs/hfsplus/xattr.h @@ -28,7 +28,7 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name, ssize_t __hfsplus_getxattr(struct inode *inode, const char *name, void *value, size_t size); -ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, +ssize_t hfsplus_getxattr(struct inode *inode, const char *name, void *value, size_t size, const char *prefix, size_t prefixlen); diff --git a/fs/hfsplus/xattr_security.c b/fs/hfsplus/xattr_security.c index 72a68a3..ae2ca8c 100644 --- a/fs/hfsplus/xattr_security.c +++ b/fs/hfsplus/xattr_security.c @@ -14,10 +14,10 @@ #include "acl.h" static int hfsplus_security_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return hfsplus_getxattr(dentry, name, buffer, size, + return hfsplus_getxattr(inode, name, buffer, size, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN); } diff --git a/fs/hfsplus/xattr_trusted.c b/fs/hfsplus/xattr_trusted.c index 95a7704..eae2947 100644 --- a/fs/hfsplus/xattr_trusted.c +++ b/fs/hfsplus/xattr_trusted.c @@ -12,10 +12,10 @@ #include "xattr.h" static int hfsplus_trusted_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return hfsplus_getxattr(dentry, name, buffer, size, + return hfsplus_getxattr(inode, name, buffer, size, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN); } diff --git a/fs/hfsplus/xattr_user.c b/fs/hfsplus/xattr_user.c index 6fc269b..3c9eec3 100644 --- a/fs/hfsplus/xattr_user.c +++ b/fs/hfsplus/xattr_user.c @@ -12,11 +12,11 @@ #include "xattr.h" static int hfsplus_user_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return hfsplus_getxattr(dentry, name, buffer, size, + return hfsplus_getxattr(inode, name, buffer, size, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); } diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c index 7a28fac..3ed9a4b4 100644 --- a/fs/jffs2/security.c +++ b/fs/jffs2/security.c @@ -49,10 +49,10 @@ int jffs2_init_security(struct inode *inode, struct inode *dir, /* ---- XATTR Handler for "security.*" ----------------- */ static int jffs2_security_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_SECURITY, + return do_jffs2_getxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size); } diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c index b2555ef..4ebecff 100644 --- a/fs/jffs2/xattr_trusted.c +++ b/fs/jffs2/xattr_trusted.c @@ -17,10 +17,10 @@ #include "nodelist.h" static int jffs2_trusted_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_TRUSTED, + return do_jffs2_getxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size); } diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c index 539bd63..bce249e 100644 --- a/fs/jffs2/xattr_user.c +++ b/fs/jffs2/xattr_user.c @@ -17,10 +17,10 @@ #include "nodelist.h" static int jffs2_user_getxattr(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return do_jffs2_getxattr(d_inode(dentry), JFFS2_XPREFIX_USER, + return do_jffs2_getxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 327b8c3..7a7ac1d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6263,10 +6263,10 @@ static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler, } static int nfs4_xattr_get_nfs4_acl(const struct xattr_handler *handler, - struct dentry *dentry, const char *key, - void *buf, size_t buflen) + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) { - return nfs4_proc_get_acl(d_inode(dentry), buf, buflen); + return nfs4_proc_get_acl(inode, buf, buflen); } static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry) @@ -6288,11 +6288,11 @@ static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler, } static int nfs4_xattr_get_nfs4_label(const struct xattr_handler *handler, - struct dentry *dentry, const char *key, - void *buf, size_t buflen) + struct dentry *unused, struct inode *inode, + const char *key, void *buf, size_t buflen) { if (security_ismaclabel(key)) - return nfs4_get_security_label(d_inode(dentry), buf, buflen); + return nfs4_get_security_label(inode, buf, buflen); return -EOPNOTSUPP; } diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 7d3d979..72eef4c 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -7250,10 +7250,10 @@ leave: * 'security' attributes support */ static int ocfs2_xattr_security_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_SECURITY, + return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name, buffer, size); } @@ -7321,10 +7321,10 @@ const struct xattr_handler ocfs2_xattr_security_handler = { * 'trusted' attributes support */ static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_TRUSTED, + return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name, buffer, size); } @@ -7346,14 +7346,14 @@ const struct xattr_handler ocfs2_xattr_trusted_handler = { * 'user' attributes support */ static int ocfs2_xattr_user_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unusde, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb); + struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) return -EOPNOTSUPP; - return ocfs2_xattr_get(d_inode(dentry), OCFS2_XATTR_INDEX_USER, name, + return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, buffer, size); } diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c index ef5da75..6a4c0f7 100644 --- a/fs/orangefs/xattr.c +++ b/fs/orangefs/xattr.c @@ -478,12 +478,13 @@ static int orangefs_xattr_set_default(const struct xattr_handler *handler, } static int orangefs_xattr_get_default(const struct xattr_handler *handler, - struct dentry *dentry, + struct dentry *unused, + struct inode *inode, const char *name, void *buffer, size_t size) { - return orangefs_inode_getxattr(dentry->d_inode, + return orangefs_inode_getxattr(inode, ORANGEFS_XATTR_NAME_DEFAULT_PREFIX, name, buffer, @@ -507,12 +508,13 @@ static int orangefs_xattr_set_trusted(const struct xattr_handler *handler, } static int orangefs_xattr_get_trusted(const struct xattr_handler *handler, - struct dentry *dentry, + struct dentry *unused, + struct inode *inode, const char *name, void *buffer, size_t size) { - return orangefs_inode_getxattr(dentry->d_inode, + return orangefs_inode_getxattr(inode, ORANGEFS_XATTR_NAME_TRUSTED_PREFIX, name, buffer, diff --git a/fs/posix_acl.c b/fs/posix_acl.c index db1fb0f..2c60f17 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -797,18 +797,18 @@ EXPORT_SYMBOL (posix_acl_to_xattr); static int posix_acl_xattr_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *value, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *value, size_t size) { struct posix_acl *acl; int error; - if (!IS_POSIXACL(d_backing_inode(dentry))) + if (!IS_POSIXACL(inode)) return -EOPNOTSUPP; - if (d_is_symlink(dentry)) + if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; - acl = get_acl(d_backing_inode(dentry), handler->flags); + acl = get_acl(inode, handler->flags); if (IS_ERR(acl)) return PTR_ERR(acl); if (acl == NULL) diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c index ac7e104..86aeb9d 100644 --- a/fs/reiserfs/xattr_security.c +++ b/fs/reiserfs/xattr_security.c @@ -9,14 +9,13 @@ #include static int -security_get(const struct xattr_handler *handler, struct dentry *dentry, - const char *name, void *buffer, size_t size) +security_get(const struct xattr_handler *handler, struct dentry *unused, + struct inode *inode, const char *name, void *buffer, size_t size) { - if (IS_PRIVATE(d_inode(dentry))) + if (IS_PRIVATE(inode)) return -EPERM; - return reiserfs_xattr_get(d_inode(dentry), - xattr_full_name(handler, name), + return reiserfs_xattr_get(inode, xattr_full_name(handler, name), buffer, size); } diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c index cc248a5..31837f0 100644 --- a/fs/reiserfs/xattr_trusted.c +++ b/fs/reiserfs/xattr_trusted.c @@ -8,14 +8,13 @@ #include static int -trusted_get(const struct xattr_handler *handler, struct dentry *dentry, - const char *name, void *buffer, size_t size) +trusted_get(const struct xattr_handler *handler, struct dentry *unused, + struct inode *inode, const char *name, void *buffer, size_t size) { - if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry))) + if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode)) return -EPERM; - return reiserfs_xattr_get(d_inode(dentry), - xattr_full_name(handler, name), + return reiserfs_xattr_get(inode, xattr_full_name(handler, name), buffer, size); } diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c index caad583..f7c3973 100644 --- a/fs/reiserfs/xattr_user.c +++ b/fs/reiserfs/xattr_user.c @@ -7,13 +7,12 @@ #include static int -user_get(const struct xattr_handler *handler, struct dentry *dentry, - const char *name, void *buffer, size_t size) +user_get(const struct xattr_handler *handler, struct dentry *unused, + struct inode *inode, const char *name, void *buffer, size_t size) { - if (!reiserfs_xattrs_user(dentry->d_sb)) + if (!reiserfs_xattrs_user(inode->i_sb)) return -EOPNOTSUPP; - return reiserfs_xattr_get(d_inode(dentry), - xattr_full_name(handler, name), + return reiserfs_xattr_get(inode, xattr_full_name(handler, name), buffer, size); } diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c index 1e9de96..1548b37 100644 --- a/fs/squashfs/xattr.c +++ b/fs/squashfs/xattr.c @@ -214,10 +214,12 @@ failed: static int squashfs_xattr_handler_get(const struct xattr_handler *handler, - struct dentry *d, const char *name, + struct dentry *unused, + struct inode *inode, + const char *name, void *buffer, size_t size) { - return squashfs_xattr_get(d_inode(d), handler->flags, name, + return squashfs_xattr_get(inode, handler->flags, name, buffer, size); } diff --git a/fs/xattr.c b/fs/xattr.c index 4861322..461ba45 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -698,7 +698,8 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); if (IS_ERR(handler)) return PTR_ERR(handler); - return handler->get(handler, dentry, name, buffer, size); + return handler->get(handler, dentry, d_inode(dentry), + name, buffer, size); } /* diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 110f1d7..d111f69 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -32,11 +32,11 @@ static int -xfs_xattr_get(const struct xattr_handler *handler, struct dentry *dentry, - const char *name, void *value, size_t size) +xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, + struct inode *inode, const char *name, void *value, size_t size) { int xflags = handler->flags; - struct xfs_inode *ip = XFS_I(d_inode(dentry)); + struct xfs_inode *ip = XFS_I(inode); int error, asize = size; /* Convert Linux syscall to XFS internal ATTR flags */ diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 4457541..c11c022 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -30,7 +30,8 @@ struct xattr_handler { int flags; /* fs private flags */ bool (*list)(struct dentry *dentry); int (*get)(const struct xattr_handler *, struct dentry *dentry, - const char *name, void *buffer, size_t size); + struct inode *inode, const char *name, void *buffer, + size_t size); int (*set)(const struct xattr_handler *, struct dentry *dentry, const char *name, const void *buffer, size_t size, int flags); diff --git a/mm/shmem.c b/mm/shmem.c index 9428c51..00d5d02 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2646,10 +2646,10 @@ static int shmem_initxattrs(struct inode *inode, } static int shmem_xattr_handler_get(const struct xattr_handler *handler, - struct dentry *dentry, const char *name, - void *buffer, size_t size) + struct dentry *unused, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct shmem_inode_info *info = SHMEM_I(d_inode(dentry)); + struct shmem_inode_info *info = SHMEM_I(inode); name = xattr_full_name(handler, name); return simple_xattr_get(&info->xattrs, name, buffer, size); -- cgit v0.10.2 From ce23e640133484eebc20ca7b7668388213e11327 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 11 Apr 2016 00:48:00 -0400 Subject: ->getxattr(): pass dentry and inode as separate arguments Signed-off-by: Al Viro diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index f1b87d8..57bb375 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -525,3 +525,9 @@ in your dentry operations instead. set_delayed_call() where it used to set *cookie. ->put_link() is gone - just give the destructor to set_delayed_call() in ->get_link(). +-- +[mandatory] + ->getxattr() and xattr_handler.get() get dentry and inode passed separately. + dentry might be yet to be attached to inode, so do _not_ use its ->d_inode + in the instances. Rationale: !@#!@# security_d_instantiate() needs to be + called before we attach dentry to inode. diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index 3e1572c..d28efd2 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -1042,8 +1042,8 @@ static inline __u64 ll_file_maxbytes(struct inode *inode) /* llite/xattr.c */ int ll_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -ssize_t ll_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size); +ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size); ssize_t ll_listxattr(struct dentry *dentry, char *buffer, size_t size); int ll_removexattr(struct dentry *dentry, const char *name); diff --git a/drivers/staging/lustre/lustre/llite/xattr.c b/drivers/staging/lustre/lustre/llite/xattr.c index b68dcc9..c671f22 100644 --- a/drivers/staging/lustre/lustre/llite/xattr.c +++ b/drivers/staging/lustre/lustre/llite/xattr.c @@ -451,11 +451,9 @@ out: return rc; } -ssize_t ll_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size) +ssize_t ll_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { - struct inode *inode = d_inode(dentry); - LASSERT(inode); LASSERT(name); diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 103f5d7..72e35b7 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -106,8 +106,8 @@ static int bad_inode_setxattr(struct dentry *dentry, const char *name, return -EIO; } -static ssize_t bad_inode_getxattr(struct dentry *dentry, const char *name, - void *buffer, size_t size) +static ssize_t bad_inode_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { return -EIO; } diff --git a/fs/ceph/super.h b/fs/ceph/super.h index e705c4d..beb893b 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -795,7 +795,7 @@ extern int ceph_setxattr(struct dentry *, const char *, const void *, int __ceph_setxattr(struct dentry *, const char *, const void *, size_t, int); ssize_t __ceph_getxattr(struct inode *, const char *, void *, size_t); int __ceph_removexattr(struct dentry *, const char *); -extern ssize_t ceph_getxattr(struct dentry *, const char *, void *, size_t); +extern ssize_t ceph_getxattr(struct dentry *, struct inode *, const char *, void *, size_t); extern ssize_t ceph_listxattr(struct dentry *, char *, size_t); extern int ceph_removexattr(struct dentry *, const char *); extern void __ceph_build_xattrs_blob(struct ceph_inode_info *ci); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 9410abd..c6e917d 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -804,13 +804,13 @@ out: return err; } -ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, - size_t size) +ssize_t ceph_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size) { if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) - return generic_getxattr(dentry, name, value, size); + return generic_getxattr(dentry, inode, name, value, size); - return __ceph_getxattr(d_inode(dentry), name, value, size); + return __ceph_getxattr(inode, name, value, size); } ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 83aac8b..c89ecd7 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -123,7 +123,7 @@ extern int cifs_symlink(struct inode *inode, struct dentry *direntry, extern int cifs_removexattr(struct dentry *, const char *); extern int cifs_setxattr(struct dentry *, const char *, const void *, size_t, int); -extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); +extern ssize_t cifs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t); extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); #ifdef CONFIG_CIFS_NFSD_EXPORT diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 159547c..5d57c85 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -213,8 +213,8 @@ set_ea_exit: return rc; } -ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, - void *ea_value, size_t buf_size) +ssize_t cifs_getxattr(struct dentry *direntry, struct inode *inode, + const char *ea_name, void *ea_value, size_t buf_size) { ssize_t rc = -EOPNOTSUPP; #ifdef CONFIG_CIFS_XATTR @@ -296,7 +296,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, goto get_ea_exit; /* rc already EOPNOTSUPP */ pacl = pTcon->ses->server->ops->get_acl(cifs_sb, - d_inode(direntry), full_path, &acllen); + inode, full_path, &acllen); if (IS_ERR(pacl)) { rc = PTR_ERR(pacl); cifs_dbg(VFS, "%s: error %zd getting sec desc\n", diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 64026e5..543a146 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1369,7 +1369,9 @@ int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode) ssize_t size; int rc = 0; - size = ecryptfs_getxattr_lower(lower_dentry, ECRYPTFS_XATTR_NAME, + size = ecryptfs_getxattr_lower(lower_dentry, + ecryptfs_inode_to_lower(ecryptfs_inode), + ECRYPTFS_XATTR_NAME, page_virt, ECRYPTFS_DEFAULT_EXTENT_SIZE); if (size < 0) { if (unlikely(ecryptfs_verbosity > 0)) @@ -1391,6 +1393,7 @@ int ecryptfs_read_and_validate_xattr_region(struct dentry *dentry, int rc; rc = ecryptfs_getxattr_lower(ecryptfs_dentry_to_lower(dentry), + ecryptfs_inode_to_lower(inode), ECRYPTFS_XATTR_NAME, file_size, ECRYPTFS_SIZE_AND_MARKER_BYTES); if (rc < ECRYPTFS_SIZE_AND_MARKER_BYTES) diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index d123fba..6ff907f 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -607,8 +607,8 @@ ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat, unsigned char *src, struct dentry *ecryptfs_dentry); int ecryptfs_truncate(struct dentry *dentry, loff_t new_length); ssize_t -ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, - void *value, size_t size); +ecryptfs_getxattr_lower(struct dentry *lower_dentry, struct inode *lower_inode, + const char *name, void *value, size_t size); int ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 121114e..1ac631c 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -1033,29 +1033,30 @@ out: } ssize_t -ecryptfs_getxattr_lower(struct dentry *lower_dentry, const char *name, - void *value, size_t size) +ecryptfs_getxattr_lower(struct dentry *lower_dentry, struct inode *lower_inode, + const char *name, void *value, size_t size) { int rc = 0; - if (!d_inode(lower_dentry)->i_op->getxattr) { + if (!lower_inode->i_op->getxattr) { rc = -EOPNOTSUPP; goto out; } - inode_lock(d_inode(lower_dentry)); - rc = d_inode(lower_dentry)->i_op->getxattr(lower_dentry, name, value, - size); - inode_unlock(d_inode(lower_dentry)); + inode_lock(lower_inode); + rc = lower_inode->i_op->getxattr(lower_dentry, lower_inode, + name, value, size); + inode_unlock(lower_inode); out: return rc; } static ssize_t -ecryptfs_getxattr(struct dentry *dentry, const char *name, void *value, - size_t size) +ecryptfs_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size) { - return ecryptfs_getxattr_lower(ecryptfs_dentry_to_lower(dentry), name, - value, size); + return ecryptfs_getxattr_lower(ecryptfs_dentry_to_lower(dentry), + ecryptfs_inode_to_lower(inode), + name, value, size); } static ssize_t diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 1f58652..39e4381 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -436,7 +436,8 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode) goto out; } inode_lock(lower_inode); - size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME, + size = lower_inode->i_op->getxattr(lower_dentry, lower_inode, + ECRYPTFS_XATTR_NAME, xattr_virt, PAGE_CACHE_SIZE); if (size < 0) size = 8; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 4b855b6..b618527 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1759,10 +1759,9 @@ static int fuse_setxattr(struct dentry *entry, const char *name, return err; } -static ssize_t fuse_getxattr(struct dentry *entry, const char *name, - void *value, size_t size) +static ssize_t fuse_getxattr(struct dentry *entry, struct inode *inode, + const char *name, void *value, size_t size) { - struct inode *inode = d_inode(entry); struct fuse_conn *fc = get_fuse_conn(inode); FUSE_ARGS(args); struct fuse_getxattr_in inarg; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index bb30f9a..45f516ca 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1968,22 +1968,21 @@ static int gfs2_setxattr(struct dentry *dentry, const char *name, return ret; } -static ssize_t gfs2_getxattr(struct dentry *dentry, const char *name, - void *data, size_t size) +static ssize_t gfs2_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *data, size_t size) { - struct inode *inode = d_inode(dentry); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int ret; /* For selinux during lookup */ if (gfs2_glock_is_locked_by_me(ip->i_gl)) - return generic_getxattr(dentry, name, data, size); + return generic_getxattr(dentry, inode, name, data, size); gfs2_holder_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); ret = gfs2_glock_nq(&gh); if (ret == 0) { - ret = generic_getxattr(dentry, name, data, size); + ret = generic_getxattr(dentry, inode, name, data, size); gfs2_glock_dq(&gh); } gfs2_holder_uninit(&gh); diff --git a/fs/hfs/attr.c b/fs/hfs/attr.c index 8d931b1..064f92f 100644 --- a/fs/hfs/attr.c +++ b/fs/hfs/attr.c @@ -56,10 +56,9 @@ out: return res; } -ssize_t hfs_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size) +ssize_t hfs_getxattr(struct dentry *unused, struct inode *inode, + const char *name, void *value, size_t size) { - struct inode *inode = d_inode(dentry); struct hfs_find_data fd; hfs_cat_rec rec; struct hfs_cat_file *file; diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 1f1c7dc..79daa09 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -213,8 +213,8 @@ extern void hfs_delete_inode(struct inode *); /* attr.c */ extern int hfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -extern ssize_t hfs_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size); +extern ssize_t hfs_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size); extern ssize_t hfs_listxattr(struct dentry *dentry, char *buffer, size_t size); /* mdb.c */ diff --git a/fs/jfs/jfs_xattr.h b/fs/jfs/jfs_xattr.h index e8d717d..e69e14f 100644 --- a/fs/jfs/jfs_xattr.h +++ b/fs/jfs/jfs_xattr.h @@ -57,7 +57,7 @@ extern int __jfs_setxattr(tid_t, struct inode *, const char *, const void *, extern int jfs_setxattr(struct dentry *, const char *, const void *, size_t, int); extern ssize_t __jfs_getxattr(struct inode *, const char *, void *, size_t); -extern ssize_t jfs_getxattr(struct dentry *, const char *, void *, size_t); +extern ssize_t jfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t); extern ssize_t jfs_listxattr(struct dentry *, char *, size_t); extern int jfs_removexattr(struct dentry *, const char *); diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 48b15a6..5becc6a 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -933,8 +933,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data, return size; } -ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data, - size_t buf_size) +ssize_t jfs_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *data, size_t buf_size) { int err; @@ -944,7 +944,7 @@ ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data, * for it via sb->s_xattr. */ if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) - return generic_getxattr(dentry, name, data, buf_size); + return generic_getxattr(dentry, inode, name, data, buf_size); if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { /* @@ -959,7 +959,7 @@ ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data, return -EOPNOTSUPP; } - err = __jfs_getxattr(d_inode(dentry), name, data, buf_size); + err = __jfs_getxattr(inode, name, data, buf_size); return err; } diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 16405ae..b524722 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -208,10 +208,10 @@ int kernfs_iop_removexattr(struct dentry *dentry, const char *name) return simple_xattr_set(&attrs->xattrs, name, NULL, 0, XATTR_REPLACE); } -ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf, - size_t size) +ssize_t kernfs_iop_getxattr(struct dentry *unused, struct inode *inode, + const char *name, void *buf, size_t size) { - struct kernfs_node *kn = dentry->d_fsdata; + struct kernfs_node *kn = inode->i_private; struct kernfs_iattrs *attrs; attrs = kernfs_iattrs(kn); diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 6762bfb..45c9192 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -84,8 +84,8 @@ int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry, int kernfs_iop_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int kernfs_iop_removexattr(struct dentry *dentry, const char *name); -ssize_t kernfs_iop_getxattr(struct dentry *dentry, const char *name, void *buf, - size_t size); +ssize_t kernfs_iop_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buf, size_t size); ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size); /* diff --git a/fs/libfs.c b/fs/libfs.c index 0ca80b2..03332f4 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1127,8 +1127,8 @@ static int empty_dir_setxattr(struct dentry *dentry, const char *name, return -EOPNOTSUPP; } -static ssize_t empty_dir_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size) +static ssize_t empty_dir_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size) { return -EOPNOTSUPP; } diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index a4ff5d0..c7b31a0 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -246,8 +246,8 @@ static bool ovl_need_xattr_filter(struct dentry *dentry, return false; } -ssize_t ovl_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size) +ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size) { struct path realpath; enum ovl_path_type type = ovl_path_real(dentry, &realpath); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 6a7090f..99ec4b0 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -173,8 +173,8 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr); int ovl_permission(struct inode *inode, int mask); int ovl_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -ssize_t ovl_getxattr(struct dentry *dentry, const char *name, - void *value, size_t size); +ssize_t ovl_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *value, size_t size); ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size); int ovl_removexattr(struct dentry *dentry, const char *name); struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index ef64984..14cab38 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -274,7 +274,7 @@ static bool ovl_is_opaquedir(struct dentry *dentry) if (!S_ISDIR(inode->i_mode) || !inode->i_op->getxattr) return false; - res = inode->i_op->getxattr(dentry, OVL_XATTR_OPAQUE, &val, 1); + res = inode->i_op->getxattr(dentry, inode, OVL_XATTR_OPAQUE, &val, 1); if (res == 1 && val == 'y') return true; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index c2a57e1..536fb49 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1734,8 +1734,8 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, /* xattr.c */ int ubifs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, - size_t size); +ssize_t ubifs_getxattr(struct dentry *dentry, struct inode *host, + const char *name, void *buf, size_t size); ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); int ubifs_removexattr(struct dentry *dentry, const char *name); int ubifs_init_security(struct inode *dentry, struct inode *inode, diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index b043e04..413d650 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -372,10 +372,10 @@ int ubifs_setxattr(struct dentry *dentry, const char *name, return setxattr(d_inode(dentry), name, value, size, flags); } -ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf, - size_t size) +ssize_t ubifs_getxattr(struct dentry *dentry, struct inode *host, + const char *name, void *buf, size_t size) { - struct inode *inode, *host = d_inode(dentry); + struct inode *inode; struct ubifs_info *c = host->i_sb->s_fs_info; struct qstr nm = QSTR_INIT(name, strlen(name)); struct ubifs_inode *ui; diff --git a/fs/xattr.c b/fs/xattr.c index 461ba45..b11945e 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -192,7 +192,7 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, if (!inode->i_op->getxattr) return -EOPNOTSUPP; - error = inode->i_op->getxattr(dentry, name, NULL, 0); + error = inode->i_op->getxattr(dentry, inode, name, NULL, 0); if (error < 0) return error; @@ -203,7 +203,7 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value, memset(value, 0, error + 1); } - error = inode->i_op->getxattr(dentry, name, value, error); + error = inode->i_op->getxattr(dentry, inode, name, value, error); *xattr_value = value; return error; } @@ -236,7 +236,7 @@ vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size) } nolsm: if (inode->i_op->getxattr) - error = inode->i_op->getxattr(dentry, name, value, size); + error = inode->i_op->getxattr(dentry, inode, name, value, size); else error = -EOPNOTSUPP; @@ -691,14 +691,15 @@ xattr_resolve_name(const struct xattr_handler **handlers, const char **name) * Find the handler for the prefix and dispatch its get() operation. */ ssize_t -generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) +generic_getxattr(struct dentry *dentry, struct inode *inode, + const char *name, void *buffer, size_t size) { const struct xattr_handler *handler; handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name); if (IS_ERR(handler)) return PTR_ERR(handler); - return handler->get(handler, dentry, d_inode(dentry), + return handler->get(handler, dentry, inode, name, buffer, size); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 329ed37..1b5fcae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1702,7 +1702,8 @@ struct inode_operations { int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); - ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); + ssize_t (*getxattr) (struct dentry *, struct inode *, + const char *, void *, size_t); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, diff --git a/include/linux/xattr.h b/include/linux/xattr.h index c11c022..1cc4c57 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -52,7 +52,7 @@ int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, i int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); int vfs_removexattr(struct dentry *, const char *); -ssize_t generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size); +ssize_t generic_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size); ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); int generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); int generic_removexattr(struct dentry *dentry, const char *name); diff --git a/net/socket.c b/net/socket.c index 5f77a8e..35e4523 100644 --- a/net/socket.c +++ b/net/socket.c @@ -466,7 +466,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) #define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname" #define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX) #define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1) -static ssize_t sockfs_getxattr(struct dentry *dentry, +static ssize_t sockfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *value, size_t size) { const char *proto_name; diff --git a/security/commoncap.c b/security/commoncap.c index 48071ed..a042077 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -313,7 +313,7 @@ int cap_inode_need_killpriv(struct dentry *dentry) if (!inode->i_op->getxattr) return 0; - error = inode->i_op->getxattr(dentry, XATTR_NAME_CAPS, NULL, 0); + error = inode->i_op->getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0); if (error <= 0) return 0; return 1; @@ -397,8 +397,8 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data if (!inode || !inode->i_op->getxattr) return -ENODATA; - size = inode->i_op->getxattr((struct dentry *)dentry, XATTR_NAME_CAPS, &caps, - XATTR_CAPS_SZ); + size = inode->i_op->getxattr((struct dentry *)dentry, inode, + XATTR_NAME_CAPS, &caps, XATTR_CAPS_SZ); if (size == -ENODATA || size == -EOPNOTSUPP) /* no data, that's ok */ return -ENODATA; diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c index 84c6d11..b9e2628 100644 --- a/security/integrity/evm/evm_main.c +++ b/security/integrity/evm/evm_main.c @@ -82,7 +82,7 @@ static int evm_find_protected_xattrs(struct dentry *dentry) return -EOPNOTSUPP; for (xattr = evm_config_xattrnames; *xattr != NULL; xattr++) { - error = inode->i_op->getxattr(dentry, *xattr, NULL, 0); + error = inode->i_op->getxattr(dentry, inode, *xattr, NULL, 0); if (error < 0) { if (error == -ENODATA) continue; diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 889cd59..469f5c7 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -506,7 +506,8 @@ static int sb_finish_set_opts(struct super_block *sb) rc = -EOPNOTSUPP; goto out; } - rc = root_inode->i_op->getxattr(root, XATTR_NAME_SELINUX, NULL, 0); + rc = root_inode->i_op->getxattr(root, root_inode, + XATTR_NAME_SELINUX, NULL, 0); if (rc < 0 && rc != -ENODATA) { if (rc == -EOPNOTSUPP) printk(KERN_WARNING "SELinux: (dev %s, type " @@ -1412,13 +1413,13 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent goto out_unlock; } context[len] = '\0'; - rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, + rc = inode->i_op->getxattr(dentry, inode, XATTR_NAME_SELINUX, context, len); if (rc == -ERANGE) { kfree(context); /* Need a larger buffer. Query for the right size. */ - rc = inode->i_op->getxattr(dentry, XATTR_NAME_SELINUX, + rc = inode->i_op->getxattr(dentry, inode, XATTR_NAME_SELINUX, NULL, 0); if (rc < 0) { dput(dentry); @@ -1432,7 +1433,7 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent goto out_unlock; } context[len] = '\0'; - rc = inode->i_op->getxattr(dentry, + rc = inode->i_op->getxattr(dentry, inode, XATTR_NAME_SELINUX, context, len); } diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 50bcca2..ff2b8c3 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -272,7 +272,7 @@ static struct smack_known *smk_fetch(const char *name, struct inode *ip, if (buffer == NULL) return ERR_PTR(-ENOMEM); - rc = ip->i_op->getxattr(dp, name, buffer, SMK_LONGLABEL); + rc = ip->i_op->getxattr(dp, ip, name, buffer, SMK_LONGLABEL); if (rc < 0) skp = ERR_PTR(rc); else if (rc == 0) @@ -3519,7 +3519,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) TRANS_TRUE, TRANS_TRUE_SIZE, 0); } else { - rc = inode->i_op->getxattr(dp, + rc = inode->i_op->getxattr(dp, inode, XATTR_NAME_SMACKTRANSMUTE, trattr, TRANS_TRUE_SIZE); if (rc >= 0 && strncmp(trattr, TRANS_TRUE, -- cgit v0.10.2 From b96809173e94ea2fa8c19c2e40e8545a1821bf57 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 11 Apr 2016 00:53:26 -0400 Subject: security_d_instantiate(): move to the point prior to attaching dentry to inode Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index d5ecc6e..4962a7b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1775,11 +1775,11 @@ void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); if (inode) { + security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); __d_instantiate(entry, inode); spin_unlock(&inode->i_lock); } - security_d_instantiate(entry, inode); } EXPORT_SYMBOL(d_instantiate); @@ -1796,6 +1796,7 @@ int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode) { BUG_ON(!hlist_unhashed(&entry->d_u.d_alias)); + security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) { spin_unlock(&inode->i_lock); @@ -1804,7 +1805,6 @@ int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode) } __d_instantiate(entry, inode); spin_unlock(&inode->i_lock); - security_d_instantiate(entry, inode); return 0; } @@ -1878,6 +1878,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) goto out_iput; } + security_d_instantiate(tmp, inode); spin_lock(&inode->i_lock); res = __d_find_any_alias(inode); if (res) { @@ -1900,13 +1901,10 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) hlist_bl_unlock(&tmp->d_sb->s_anon); spin_unlock(&tmp->d_lock); spin_unlock(&inode->i_lock); - security_d_instantiate(tmp, inode); return tmp; out_iput: - if (res && !IS_ERR(res)) - security_d_instantiate(res, inode); iput(inode); return res; } @@ -2372,7 +2370,6 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode) __d_instantiate(dentry, inode); spin_unlock(&inode->i_lock); } - security_d_instantiate(dentry, inode); d_rehash(dentry); } @@ -2387,8 +2384,10 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode) void d_add(struct dentry *entry, struct inode *inode) { - if (inode) + if (inode) { + security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); + } __d_add(entry, inode); } EXPORT_SYMBOL(d_add); @@ -2782,6 +2781,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) if (!inode) goto out; + security_d_instantiate(dentry, inode); spin_lock(&inode->i_lock); if (S_ISDIR(inode->i_mode)) { struct dentry *new = __d_find_any_alias(inode); @@ -2809,7 +2809,6 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) } else { __d_move(new, dentry, false); write_sequnlock(&rename_lock); - security_d_instantiate(new, inode); } iput(inode); return new; -- cgit v0.10.2 From 779b839133d7ab23229be7e601cabab7678ceab1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 11 Apr 2016 08:42:55 -0400 Subject: kernfs: use lookup_one_len_unlocked() Signed-off-by: Al Viro diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index f73541f..b5b5f07 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -120,9 +120,8 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn, kntmp = find_next_ancestor(kn, knparent); if (WARN_ON(!kntmp)) return ERR_PTR(-EINVAL); - mutex_lock(&d_inode(dentry)->i_mutex); - dtmp = lookup_one_len(kntmp->name, dentry, strlen(kntmp->name)); - mutex_unlock(&d_inode(dentry)->i_mutex); + dtmp = lookup_one_len_unlocked(kntmp->name, dentry, + strlen(kntmp->name)); dput(dentry); if (IS_ERR(dtmp)) return dtmp; -- cgit v0.10.2 From 48f35b7b7367c6c2594f9c209ee2e1362519a899 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 12 Apr 2016 00:37:59 -0400 Subject: configfs_detach_prep(): make sure that wait_mutex won't go away grab a reference to dentry we'd got the sucker from, and return that dentry via *wait, rather than just returning the address of ->i_mutex. Signed-off-by: Al Viro diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index ea59c89..48929c4 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -494,7 +494,7 @@ out: * If there is an error, the caller will reset the flags via * configfs_detach_rollback(). */ -static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex) +static int configfs_detach_prep(struct dentry *dentry, struct dentry **wait) { struct configfs_dirent *parent_sd = dentry->d_fsdata; struct configfs_dirent *sd; @@ -515,8 +515,8 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex if (sd->s_type & CONFIGFS_USET_DEFAULT) { /* Abort if racing with mkdir() */ if (sd->s_type & CONFIGFS_USET_IN_MKDIR) { - if (wait_mutex) - *wait_mutex = &d_inode(sd->s_dentry)->i_mutex; + if (wait) + *wait= dget(sd->s_dentry); return -EAGAIN; } @@ -524,7 +524,7 @@ static int configfs_detach_prep(struct dentry *dentry, struct mutex **wait_mutex * Yup, recursive. If there's a problem, blame * deep nesting of default_groups */ - ret = configfs_detach_prep(sd->s_dentry, wait_mutex); + ret = configfs_detach_prep(sd->s_dentry, wait); if (!ret) continue; } else @@ -1458,7 +1458,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) * the new link is temporarily attached */ do { - struct mutex *wait_mutex; + struct dentry *wait; mutex_lock(&configfs_symlink_mutex); spin_lock(&configfs_dirent_lock); @@ -1469,7 +1469,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) */ ret = sd->s_dependent_count ? -EBUSY : 0; if (!ret) { - ret = configfs_detach_prep(dentry, &wait_mutex); + ret = configfs_detach_prep(dentry, &wait); if (ret) configfs_detach_rollback(dentry); } @@ -1483,8 +1483,9 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) } /* Wait until the racing operation terminates */ - mutex_lock(wait_mutex); - mutex_unlock(wait_mutex); + inode_lock(d_inode(wait)); + inode_unlock(d_inode(wait)); + dput(wait); } } while (ret == -EAGAIN); -- cgit v0.10.2 From 7b9743eb8932cee1bc646725a6364ccb29d86a2c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 12 Apr 2016 00:41:01 -0400 Subject: ocfs2: don't open-code inode_lock/inode_unlock Signed-off-by: Al Viro diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index ad15773..abb0b0b 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2311,7 +2311,7 @@ static void ocfs2_dio_end_io_write(struct inode *inode, /* ocfs2_file_write_iter will get i_mutex, so we need not lock if we * are in that context. */ if (dwc->dw_writer_pid != task_pid_nr(current)) { - mutex_lock(&inode->i_mutex); + inode_lock(inode); locked = 1; } @@ -2390,7 +2390,7 @@ out: ocfs2_free_alloc_context(meta_ac); ocfs2_run_deallocs(osb, &dealloc); if (locked) - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); ocfs2_dio_free_write_ctx(inode, dwc); } -- cgit v0.10.2 From 5ecfcb265f1e77d5de0140d21de3f8ab25441df7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 12 Apr 2016 00:43:20 -0400 Subject: orangefs: don't open-code inode_lock/inode_unlock Signed-off-by: Al Viro diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c index ae92795..491e82c 100644 --- a/fs/orangefs/file.c +++ b/fs/orangefs/file.c @@ -445,7 +445,7 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_write_iter\n"); - mutex_lock(&file->f_mapping->host->i_mutex); + inode_lock(file->f_mapping->host); /* Make sure generic_write_checks sees an up to date inode size. */ if (file->f_flags & O_APPEND) { @@ -492,7 +492,7 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite out: - mutex_unlock(&file->f_mapping->host->i_mutex); + inode_unlock(file->f_mapping->host); return rc; } diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index a9925e2..2281882 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -612,11 +612,11 @@ do { \ static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size) { #if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - mutex_lock(&inode->i_mutex); + inode_lock(inode); #endif i_size_write(inode, i_size); #if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - mutex_unlock(&inode->i_mutex); + inode_unlock(inode); #endif } -- cgit v0.10.2 From 1ae1f3f64735905d1751a64ae4b05b9509486612 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Apr 2016 14:44:31 -0400 Subject: reiserfs: open-code reiserfs_mutex_lock_safe() in reiserfs_unpack() ... and have it use inode_lock() Signed-off-by: Al Viro diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 57045f4..2f1ddc9 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -187,7 +187,11 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) } /* we need to make sure nobody is changing the file size beneath us */ - reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb); +{ + int depth = reiserfs_write_unlock_nested(inode->i_sb); + inode_lock(inode); + reiserfs_write_lock_nested(inode->i_sb, depth); +} reiserfs_write_lock(inode->i_sb); -- cgit v0.10.2 From 383d4e8ab02c02f7b31f90174c644d935c223c85 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Apr 2016 19:12:42 -0400 Subject: reconnect_one(): use lookup_one_len_unlocked() ... and explain the non-obvious logics in case when lookup yields a different dentry. Signed-off-by: Al Viro diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index c46f1a1..402c5ca 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -143,14 +143,18 @@ static struct dentry *reconnect_one(struct vfsmount *mnt, if (err) goto out_err; dprintk("%s: found name: %s\n", __func__, nbuf); - inode_lock(parent->d_inode); - tmp = lookup_one_len(nbuf, parent, strlen(nbuf)); - inode_unlock(parent->d_inode); + tmp = lookup_one_len_unlocked(nbuf, parent, strlen(nbuf)); if (IS_ERR(tmp)) { dprintk("%s: lookup failed: %d\n", __func__, PTR_ERR(tmp)); goto out_err; } if (tmp != dentry) { + /* + * Somebody has renamed it since exportfs_get_name(); + * great, since it could've only been renamed if it + * got looked up and thus connected, and it would + * remain connected afterwards. We are done. + */ dput(tmp); goto out_reconnected; } -- cgit v0.10.2 From b9e1d435fdf4ae0b925070d44b65d608f2707688 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Apr 2016 19:20:29 -0400 Subject: ovl_lookup_real(): use lookup_one_len_unlocked() Signed-off-by: Al Viro diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 29c4d32..8784691 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -411,9 +411,7 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir, { struct dentry *dentry; - inode_lock(dir->d_inode); - dentry = lookup_one_len(name->name, dir, name->len); - inode_unlock(dir->d_inode); + dentry = lookup_one_len_unlocked(name->name, dir, name->len); if (IS_ERR(dentry)) { if (PTR_ERR(dentry) == -ENOENT) -- cgit v0.10.2 From be5b82dbfec2a900925da4437af3c60b61f4c53d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 22 Apr 2016 15:06:44 -0400 Subject: make ext2_get_page() and friends work without external serialization Right now ext2_get_page() (and its analogues in a bunch of other filesystems) relies upon the directory being locked - the way it sets and tests Checked and Error bits would be racy without that. Switch to a slightly different scheme, _not_ setting Checked in case of failure. That way the logics becomes if Checked => OK else if Error => fail else if !validate => fail else => OK with validation setting Checked or Error on success and failure resp. and returning which one had happened. Equivalent to the current logics, but unlike the current logics not sensitive to the order of set_bit, test_bit getting reordered by CPU, etc. Signed-off-by: Al Viro diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 5fda2bc..cdf8fbb 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -128,7 +128,7 @@ struct afs_lookup_cookie { /* * check that a directory page is valid */ -static inline void afs_dir_check_page(struct inode *dir, struct page *page) +static inline bool afs_dir_check_page(struct inode *dir, struct page *page) { struct afs_dir_page *dbuf; loff_t latter; @@ -168,11 +168,11 @@ static inline void afs_dir_check_page(struct inode *dir, struct page *page) } SetPageChecked(page); - return; + return true; error: - SetPageChecked(page); SetPageError(page); + return false; } /* @@ -196,10 +196,10 @@ static struct page *afs_dir_get_page(struct inode *dir, unsigned long index, page = read_cache_page(dir->i_mapping, index, afs_page_filler, key); if (!IS_ERR(page)) { kmap(page); - if (!PageChecked(page)) - afs_dir_check_page(dir, page); - if (PageError(page)) - goto fail; + if (unlikely(!PageChecked(page))) { + if (PageError(page) || !afs_dir_check_page(dir, page)) + goto fail; + } } return page; diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 547b93c..a7661aa 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -79,7 +79,7 @@ static int exofs_commit_chunk(struct page *page, loff_t pos, unsigned len) return err; } -static void exofs_check_page(struct page *page) +static bool exofs_check_page(struct page *page) { struct inode *dir = page->mapping->host; unsigned chunk_size = exofs_chunk_size(dir); @@ -114,7 +114,7 @@ static void exofs_check_page(struct page *page) goto Eend; out: SetPageChecked(page); - return; + return true; Ebadsize: EXOFS_ERR("ERROR [exofs_check_page]: " @@ -150,8 +150,8 @@ Eend: dir->i_ino, (page->index<inode_no))); fail: - SetPageChecked(page); SetPageError(page); + return false; } static struct page *exofs_get_page(struct inode *dir, unsigned long n) @@ -161,10 +161,10 @@ static struct page *exofs_get_page(struct inode *dir, unsigned long n) if (!IS_ERR(page)) { kmap(page); - if (!PageChecked(page)) - exofs_check_page(page); - if (PageError(page)) - goto fail; + if (unlikely(!PageChecked(page))) { + if (PageError(page) || !exofs_check_page(page)) + goto fail; + } } return page; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 7ff6fcf..f0f4363 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -110,7 +110,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) return err; } -static void ext2_check_page(struct page *page, int quiet) +static bool ext2_check_page(struct page *page, int quiet) { struct inode *dir = page->mapping->host; struct super_block *sb = dir->i_sb; @@ -148,7 +148,7 @@ static void ext2_check_page(struct page *page, int quiet) goto Eend; out: SetPageChecked(page); - return; + return true; /* Too bad, we had an error */ @@ -190,8 +190,8 @@ Eend: (unsigned long) le32_to_cpu(p->inode)); } fail: - SetPageChecked(page); SetPageError(page); + return false; } static struct page * ext2_get_page(struct inode *dir, unsigned long n, @@ -201,10 +201,10 @@ static struct page * ext2_get_page(struct inode *dir, unsigned long n, struct page *page = read_mapping_page(mapping, n, NULL); if (!IS_ERR(page)) { kmap(page); - if (!PageChecked(page)) - ext2_check_page(page, quiet); - if (PageError(page)) - goto fail; + if (unlikely(!PageChecked(page))) { + if (PageError(page) || !ext2_check_page(page, quiet)) + goto fail; + } } return page; diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index e08f064..2eaed92 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -102,7 +102,7 @@ static void nilfs_commit_chunk(struct page *page, unlock_page(page); } -static void nilfs_check_page(struct page *page) +static bool nilfs_check_page(struct page *page) { struct inode *dir = page->mapping->host; struct super_block *sb = dir->i_sb; @@ -137,7 +137,7 @@ static void nilfs_check_page(struct page *page) goto Eend; out: SetPageChecked(page); - return; + return true; /* Too bad, we had an error */ @@ -173,8 +173,8 @@ Eend: dir->i_ino, (page->index<inode)); fail: - SetPageChecked(page); SetPageError(page); + return false; } static struct page *nilfs_get_page(struct inode *dir, unsigned long n) @@ -184,10 +184,10 @@ static struct page *nilfs_get_page(struct inode *dir, unsigned long n) if (!IS_ERR(page)) { kmap(page); - if (!PageChecked(page)) - nilfs_check_page(page); - if (PageError(page)) - goto fail; + if (unlikely(!PageChecked(page))) { + if (PageError(page) || !nilfs_check_page(page)) + goto fail; + } } return page; diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 0b14572..4c07421 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -105,7 +105,7 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de, } -static void ufs_check_page(struct page *page) +static bool ufs_check_page(struct page *page) { struct inode *dir = page->mapping->host; struct super_block *sb = dir->i_sb; @@ -143,7 +143,7 @@ static void ufs_check_page(struct page *page) goto Eend; out: SetPageChecked(page); - return; + return true; /* Too bad, we had an error */ @@ -180,8 +180,8 @@ Eend: "offset=%lu", dir->i_ino, (page->index< Date: Sat, 30 Apr 2016 16:38:39 -0400 Subject: nfs: missing wakeup in nfs_unblock_sillyrename() will be needed as soon as lookups are not serialized by ->i_mutex Signed-off-by: Al Viro diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index fa538b2..7d6deac 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -229,6 +229,7 @@ void nfs_unblock_sillyrename(struct dentry *dentry) struct nfs_unlinkdata *data; atomic_inc(&nfsi->silly_count); + wake_up(&nfsi->waitqueue); spin_lock(&dir->i_lock); while (!hlist_empty(&nfsi->silly_list)) { if (!atomic_inc_not_zero(&nfsi->silly_count)) -- cgit v0.10.2 From 1936386ea96591c27ae4b70caef9591c41e6290f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Apr 2016 19:33:34 -0400 Subject: lookup_slow(): bugger off on IS_DEADDIR() from the very beginning Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index bdcea8a..c275635 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1603,8 +1603,15 @@ static struct dentry *lookup_slow(const struct qstr *name, struct dentry *dir, unsigned int flags) { - struct dentry *dentry; - inode_lock(dir->d_inode); + struct dentry *dentry, *old; + struct inode *inode = dir->d_inode; + + inode_lock(inode); + /* Don't go there if it's already dead */ + if (unlikely(IS_DEADDIR(inode))) { + inode_unlock(inode); + return ERR_PTR(-ENOENT); + } dentry = d_lookup(dir, name); if (unlikely(dentry)) { if ((dentry->d_flags & DCACHE_OP_REVALIDATE) && @@ -1618,17 +1625,21 @@ static struct dentry *lookup_slow(const struct qstr *name, } } if (dentry) { - inode_unlock(dir->d_inode); + inode_unlock(inode); return dentry; } } dentry = d_alloc(dir, name); if (unlikely(!dentry)) { - inode_unlock(dir->d_inode); + inode_unlock(inode); return ERR_PTR(-ENOMEM); } - dentry = lookup_real(dir->d_inode, dentry, flags); - inode_unlock(dir->d_inode); + old = inode->i_op->lookup(inode, dentry, flags); + if (unlikely(old)) { + dput(dentry); + dentry = old; + } + inode_unlock(inode); return dentry; } -- cgit v0.10.2 From 0568d705b0087751f0c085c0a665aa3e954c810d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Apr 2016 19:40:56 -0400 Subject: __d_add(): don't drop/regain ->d_lock Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 4962a7b..20394fb 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2366,11 +2366,19 @@ EXPORT_SYMBOL(d_rehash); static inline void __d_add(struct dentry *dentry, struct inode *inode) { + spin_lock(&dentry->d_lock); if (inode) { - __d_instantiate(dentry, inode); + unsigned add_flags = d_flags_for_inode(inode); + hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); + raw_write_seqcount_begin(&dentry->d_seq); + __d_set_inode_and_type(dentry, inode, add_flags); + raw_write_seqcount_end(&dentry->d_seq); + __fsnotify_d_instantiate(dentry); + } + _d_rehash(dentry); + spin_unlock(&dentry->d_lock); + if (inode) spin_unlock(&inode->i_lock); - } - d_rehash(dentry); } /** -- cgit v0.10.2 From 85c7f81041d57cfe9dc97f4680d5586b54534a39 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Apr 2016 19:52:13 -0400 Subject: beginning of transition to parallel lookups - marking in-lookup dentries marked as such when (would be) parallel lookup is about to pass them to actual ->lookup(); unmarked when * __d_add() is about to make it hashed, positive or not. * __d_move() (from d_splice_alias(), directly or via __d_unalias()) puts a preexisting dentry in its place * in caller of ->lookup() if it has escaped all of the above. Bug (WARN_ON, actually) if it reaches the final dput() or d_instantiate() while still marked such. As the result, we are guaranteed that for as long as the flag is set, dentry will * remain negative unhashed with positive refcount * never have its ->d_alias looked at * never have its ->d_lru looked at * never have its ->d_parent and ->d_name changed Right now we have at most one such for any given parent directory. With parallel lookups that restriction will weaken to * only exist when parent is locked shared * at most one with given (parent,name) pair (comparison of names is according to ->d_compare()) * only exist when there's no hashed dentry with the same (parent,name) Transition will take the next several commits; unfortunately, we'll only be able to switch to rwsem at the end of this series. The reason for not making it a single patch is to simplify review. New primitives: d_in_lookup() (a predicate checking if dentry is in the in-lookup state) and d_lookup_done() (tells the system that we are done with lookup and if it's still marked as in-lookup, it should cease to be such). Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 20394fb..0f1d938 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -761,6 +761,8 @@ repeat: /* Slow case: now with the dentry lock held */ rcu_read_unlock(); + WARN_ON(d_in_lookup(dentry)); + /* Unreachable? Get rid of it */ if (unlikely(d_unhashed(dentry))) goto kill_it; @@ -1746,6 +1748,7 @@ type_determined: static void __d_instantiate(struct dentry *dentry, struct inode *inode) { unsigned add_flags = d_flags_for_inode(inode); + WARN_ON(d_in_lookup(dentry)); spin_lock(&dentry->d_lock); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); @@ -2361,12 +2364,20 @@ void d_rehash(struct dentry * entry) } EXPORT_SYMBOL(d_rehash); +void __d_lookup_done(struct dentry *dentry) +{ + dentry->d_flags &= ~DCACHE_PAR_LOOKUP; + /* more stuff will land here */ +} +EXPORT_SYMBOL(__d_lookup_done); /* inode->i_lock held if inode is non-NULL */ static inline void __d_add(struct dentry *dentry, struct inode *inode) { spin_lock(&dentry->d_lock); + if (unlikely(d_in_lookup(dentry))) + __d_lookup_done(dentry); if (inode) { unsigned add_flags = d_flags_for_inode(inode); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); @@ -2612,6 +2623,8 @@ static void __d_move(struct dentry *dentry, struct dentry *target, BUG_ON(d_ancestor(target, dentry)); dentry_lock_for_move(dentry, target); + if (unlikely(d_in_lookup(target))) + __d_lookup_done(target); write_seqcount_begin(&dentry->d_seq); write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); diff --git a/fs/namei.c b/fs/namei.c index c275635..26e5f84 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1634,7 +1634,11 @@ static struct dentry *lookup_slow(const struct qstr *name, inode_unlock(inode); return ERR_PTR(-ENOMEM); } + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_PAR_LOOKUP; + spin_unlock(&dentry->d_lock); old = inode->i_op->lookup(inode, dentry, flags); + d_lookup_done(dentry); if (unlikely(old)) { dput(dentry); dentry = old; diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4bb4de8..9a7aa89 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -232,6 +232,8 @@ struct dentry_operations { #define DCACHE_ENCRYPTED_WITH_KEY 0x04000000 /* dir is encrypted with a valid key */ #define DCACHE_OP_REAL 0x08000000 +#define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ + extern seqlock_t rename_lock; /* @@ -367,6 +369,22 @@ static inline void dont_mount(struct dentry *dentry) spin_unlock(&dentry->d_lock); } +extern void __d_lookup_done(struct dentry *); + +static inline int d_in_lookup(struct dentry *dentry) +{ + return dentry->d_flags & DCACHE_PAR_LOOKUP; +} + +static inline void d_lookup_done(struct dentry *dentry) +{ + if (unlikely(d_in_lookup(dentry))) { + spin_lock(&dentry->d_lock); + __d_lookup_done(dentry); + spin_unlock(&dentry->d_lock); + } +} + extern void dput(struct dentry *); static inline bool d_managed(const struct dentry *dentry) -- cgit v0.10.2 From 84e710da2a1dfacfc87f604869a4d22df91ce6cd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Apr 2016 00:58:55 -0400 Subject: parallel lookups machinery, part 2 We'll need to verify that there's neither a hashed nor in-lookup dentry with desired parent/name before adding to in-lookup set. One possible solution would be to hold the parent's ->d_lock through both checks, but while the in-lookup set is relatively small at any time, dcache is not. And holding the parent's ->d_lock through something like __d_lookup_rcu() would suck too badly. So we leave the parent's ->d_lock alone, which means that we watch out for the following scenario: * we verify that there's no hashed match * existing in-lookup match gets hashed by another process * we verify that there's no in-lookup matches and decide that everything's fine. Solution: per-directory kinda-sorta seqlock, bumped around the times we hash something that used to be in-lookup or move (and hash) something in place of in-lookup. Then the above would turn into * read the counter * do dcache lookup * if no matches found, check for in-lookup matches * if there had been none of those either, check if the counter has changed; repeat if it has. The "kinda-sorta" part is due to the fact that we don't have much spare space in inode. There is a spare word (shared with i_bdev/i_cdev/i_pipe), so the counter part is not a problem, but spinlock is a different story. We could use the parent's ->d_lock, and it would be less painful in terms of contention, for __d_add() it would be rather inconvenient to grab; we could do that (using lock_parent()), but... Fortunately, we can get serialization on the counter itself, and it might be a good idea in general; we can use cmpxchg() in a loop to get from even to odd and smp_store_release() from odd to even. This commit adds the counter and updating logics; the readers will be added in the next commit. Signed-off-by: Al Viro diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 57bb375..8810e23 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -531,3 +531,11 @@ in your dentry operations instead. dentry might be yet to be attached to inode, so do _not_ use its ->d_inode in the instances. Rationale: !@#!@# security_d_instantiate() needs to be called before we attach dentry to inode. +-- +[mandatory] + symlinks are no longer the only inodes that do *not* have i_bdev/i_cdev/ + i_pipe/i_link union zeroed out at inode eviction. As the result, you can't + assume that non-NULL value in ->i_nlink at ->destroy_inode() implies that + it's a symlink. Checking ->i_mode is really needed now. In-tree we had + to fix shmem_destroy_callback() that used to take that kind of shortcut; + watch out, since that shortcut is no longer valid. diff --git a/fs/dcache.c b/fs/dcache.c index 0f1d938..10988f7 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2364,6 +2364,22 @@ void d_rehash(struct dentry * entry) } EXPORT_SYMBOL(d_rehash); +static inline unsigned start_dir_add(struct inode *dir) +{ + + for (;;) { + unsigned n = dir->i_dir_seq; + if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) + return n; + cpu_relax(); + } +} + +static inline void end_dir_add(struct inode *dir, unsigned n) +{ + smp_store_release(&dir->i_dir_seq, n + 2); +} + void __d_lookup_done(struct dentry *dentry) { dentry->d_flags &= ~DCACHE_PAR_LOOKUP; @@ -2375,9 +2391,14 @@ EXPORT_SYMBOL(__d_lookup_done); static inline void __d_add(struct dentry *dentry, struct inode *inode) { + struct inode *dir = NULL; + unsigned n; spin_lock(&dentry->d_lock); - if (unlikely(d_in_lookup(dentry))) + if (unlikely(d_in_lookup(dentry))) { + dir = dentry->d_parent->d_inode; + n = start_dir_add(dir); __d_lookup_done(dentry); + } if (inode) { unsigned add_flags = d_flags_for_inode(inode); hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); @@ -2387,6 +2408,8 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode) __fsnotify_d_instantiate(dentry); } _d_rehash(dentry); + if (dir) + end_dir_add(dir, n); spin_unlock(&dentry->d_lock); if (inode) spin_unlock(&inode->i_lock); @@ -2616,6 +2639,8 @@ static void dentry_unlock_for_move(struct dentry *dentry, struct dentry *target) static void __d_move(struct dentry *dentry, struct dentry *target, bool exchange) { + struct inode *dir = NULL; + unsigned n; if (!dentry->d_inode) printk(KERN_WARNING "VFS: moving negative dcache entry\n"); @@ -2623,8 +2648,11 @@ static void __d_move(struct dentry *dentry, struct dentry *target, BUG_ON(d_ancestor(target, dentry)); dentry_lock_for_move(dentry, target); - if (unlikely(d_in_lookup(target))) + if (unlikely(d_in_lookup(target))) { + dir = target->d_parent->d_inode; + n = start_dir_add(dir); __d_lookup_done(target); + } write_seqcount_begin(&dentry->d_seq); write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); @@ -2674,6 +2702,8 @@ static void __d_move(struct dentry *dentry, struct dentry *target, write_seqcount_end(&target->d_seq); write_seqcount_end(&dentry->d_seq); + if (dir) + end_dir_add(dir, n); dentry_unlock_for_move(dentry, target); } diff --git a/fs/inode.c b/fs/inode.c index 4202aac..4b884f7 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -151,6 +151,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_bdev = NULL; inode->i_cdev = NULL; inode->i_link = NULL; + inode->i_dir_seq = 0; inode->i_rdev = 0; inode->dirtied_when = 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index 6d0fa91..00cecc5 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -684,6 +684,7 @@ struct inode { struct block_device *i_bdev; struct cdev *i_cdev; char *i_link; + unsigned i_dir_seq; }; __u32 i_generation; diff --git a/mm/shmem.c b/mm/shmem.c index 4640699..e684a91 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3123,7 +3123,8 @@ static struct inode *shmem_alloc_inode(struct super_block *sb) static void shmem_destroy_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); - kfree(inode->i_link); + if (S_ISLNK(inode->i_mode)) + kfree(inode->i_link); kmem_cache_free(shmem_inode_cachep, SHMEM_I(inode)); } -- cgit v0.10.2 From 94bdd655caba2080ae81d83d756d325abdffcb9f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Apr 2016 02:42:04 -0400 Subject: parallel lookups machinery, part 3 We will need to be able to check if there is an in-lookup dentry with matching parent/name. Right now it's impossible, but as soon as start locking directories shared such beasts will appear. Add a secondary hash for locating those. Hash chains go through the same space where d_alias will be once it's not in-lookup anymore. Search is done under the same bitlock we use for modifications - with the primary hash we can rely on d_rehash() into the wrong chain being the worst that could happen, but here the pointers are buggered once it's removed from the chain. On the other hand, the chains are not going to be long and normally we'll end up adding to the chain anyway. That allows us to avoid bothering with ->d_lock when doing the comparisons - everything is stable until removed from chain. New helper: d_alloc_parallel(). Right now it allocates, verifies that no hashed and in-lookup matches exist and adds to in-lookup hash. Returns ERR_PTR() for error, hashed match (in the unlikely case it's been found) or new dentry. In-lookup matches trigger BUG() for now; that will change in the next commit when we introduce waiting for ongoing lookup to finish. Note that in-lookup matches won't be possible until we actually go for shared locking. lookup_slow() switched to use of d_alloc_parallel(). Again, these commits are separated only for making it easier to review. All this machinery will start doing something useful only when we go for shared locking; it's just that the combination is too large for my taste. Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 10988f7..ea2de7c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -111,6 +111,17 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent, return dentry_hashtable + hash_32(hash, d_hash_shift); } +#define IN_LOOKUP_SHIFT 10 +static struct hlist_bl_head in_lookup_hashtable[1 << IN_LOOKUP_SHIFT]; + +static inline struct hlist_bl_head *in_lookup_hash(const struct dentry *parent, + unsigned int hash) +{ + hash += (unsigned long) parent / L1_CACHE_BYTES; + return in_lookup_hashtable + hash_32(hash, IN_LOOKUP_SHIFT); +} + + /* Statistics gathering. */ struct dentry_stat_t dentry_stat = { .age_limit = 45, @@ -2380,9 +2391,102 @@ static inline void end_dir_add(struct inode *dir, unsigned n) smp_store_release(&dir->i_dir_seq, n + 2); } +struct dentry *d_alloc_parallel(struct dentry *parent, + const struct qstr *name) +{ + unsigned int len = name->len; + unsigned int hash = name->hash; + const unsigned char *str = name->name; + struct hlist_bl_head *b = in_lookup_hash(parent, hash); + struct hlist_bl_node *node; + struct dentry *new = d_alloc(parent, name); + struct dentry *dentry; + unsigned seq, r_seq, d_seq; + + if (unlikely(!new)) + return ERR_PTR(-ENOMEM); + +retry: + rcu_read_lock(); + seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1; + r_seq = read_seqbegin(&rename_lock); + dentry = __d_lookup_rcu(parent, name, &d_seq); + if (unlikely(dentry)) { + if (!lockref_get_not_dead(&dentry->d_lockref)) { + rcu_read_unlock(); + goto retry; + } + if (read_seqcount_retry(&dentry->d_seq, d_seq)) { + rcu_read_unlock(); + dput(dentry); + goto retry; + } + rcu_read_unlock(); + dput(new); + return dentry; + } + if (unlikely(read_seqretry(&rename_lock, r_seq))) { + rcu_read_unlock(); + goto retry; + } + hlist_bl_lock(b); + if (unlikely(parent->d_inode->i_dir_seq != seq)) { + hlist_bl_unlock(b); + rcu_read_unlock(); + goto retry; + } + rcu_read_unlock(); + /* + * No changes for the parent since the beginning of d_lookup(). + * Since all removals from the chain happen with hlist_bl_lock(), + * any potential in-lookup matches are going to stay here until + * we unlock the chain. All fields are stable in everything + * we encounter. + */ + hlist_bl_for_each_entry(dentry, node, b, d_u.d_in_lookup_hash) { + if (dentry->d_name.hash != hash) + continue; + if (dentry->d_parent != parent) + continue; + if (d_unhashed(dentry)) + continue; + if (parent->d_flags & DCACHE_OP_COMPARE) { + int tlen = dentry->d_name.len; + const char *tname = dentry->d_name.name; + if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) + continue; + } else { + if (dentry->d_name.len != len) + continue; + if (dentry_cmp(dentry, str, len)) + continue; + } + dget(dentry); + hlist_bl_unlock(b); + /* impossible until we actually enable parallel lookups */ + BUG(); + /* and this will be "wait for it to stop being in-lookup" */ + /* this one will be handled in the next commit */ + dput(new); + return dentry; + } + /* we can't take ->d_lock here; it's OK, though. */ + new->d_flags |= DCACHE_PAR_LOOKUP; + hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b); + hlist_bl_unlock(b); + return new; +} +EXPORT_SYMBOL(d_alloc_parallel); + void __d_lookup_done(struct dentry *dentry) { + struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent, + dentry->d_name.hash); + hlist_bl_lock(b); dentry->d_flags &= ~DCACHE_PAR_LOOKUP; + __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); + hlist_bl_unlock(b); + INIT_HLIST_NODE(&dentry->d_u.d_alias); /* more stuff will land here */ } EXPORT_SYMBOL(__d_lookup_done); diff --git a/fs/namei.c b/fs/namei.c index 26e5f84..aa04320 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1603,46 +1603,40 @@ static struct dentry *lookup_slow(const struct qstr *name, struct dentry *dir, unsigned int flags) { - struct dentry *dentry, *old; + struct dentry *dentry = ERR_PTR(-ENOENT), *old; struct inode *inode = dir->d_inode; inode_lock(inode); /* Don't go there if it's already dead */ - if (unlikely(IS_DEADDIR(inode))) { - inode_unlock(inode); - return ERR_PTR(-ENOENT); - } - dentry = d_lookup(dir, name); - if (unlikely(dentry)) { + if (unlikely(IS_DEADDIR(inode))) + goto out; +again: + dentry = d_alloc_parallel(dir, name); + if (IS_ERR(dentry)) + goto out; + if (unlikely(!d_in_lookup(dentry))) { if ((dentry->d_flags & DCACHE_OP_REVALIDATE) && !(flags & LOOKUP_NO_REVAL)) { int error = d_revalidate(dentry, flags); if (unlikely(error <= 0)) { - if (!error) + if (!error) { d_invalidate(dentry); + dput(dentry); + goto again; + } dput(dentry); dentry = ERR_PTR(error); } } - if (dentry) { - inode_unlock(inode); - return dentry; + } else { + old = inode->i_op->lookup(inode, dentry, flags); + d_lookup_done(dentry); + if (unlikely(old)) { + dput(dentry); + dentry = old; } } - dentry = d_alloc(dir, name); - if (unlikely(!dentry)) { - inode_unlock(inode); - return ERR_PTR(-ENOMEM); - } - spin_lock(&dentry->d_lock); - dentry->d_flags |= DCACHE_PAR_LOOKUP; - spin_unlock(&dentry->d_lock); - old = inode->i_op->lookup(inode, dentry, flags); - d_lookup_done(dentry); - if (unlikely(old)) { - dput(dentry); - dentry = old; - } +out: inode_unlock(inode); return dentry; } diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 9a7aa89..3eea562 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -131,6 +131,7 @@ struct dentry { */ union { struct hlist_node d_alias; /* inode alias list */ + struct hlist_bl_node d_in_lookup_hash; /* only for in-lookup ones */ struct rcu_head d_rcu; } d_u; }; @@ -250,6 +251,7 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); +extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_exact_alias(struct dentry *, struct inode *); -- cgit v0.10.2 From d9171b9345261e0d941d92fdda5672b5db67f968 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Apr 2016 03:33:13 -0400 Subject: parallel lookups machinery, part 4 (and last) If we *do* run into an in-lookup match, we need to wait for it to cease being in-lookup. Fortunately, we do have unused space in in-lookup dentries - d_lru is never looked at until it stops being in-lookup. So we can stash a pointer to wait_queue_head from stack frame of the caller of ->lookup(). Some precautions are needed while waiting, but it's not that hard - we do hold a reference to dentry we are waiting for, so it can't go away. If it's found to be in-lookup the wait_queue_head is still alive and will remain so at least while ->d_lock is held. Moreover, the condition we are waiting for becomes true at the same point where everything on that wq gets woken up, so we can just add ourselves to the queue once. d_alloc_parallel() gets a pointer to wait_queue_head_t from its caller; lookup_slow() adjusted, d_add_ci() taught to use d_alloc_parallel() if the dentry passed to it happens to be in-lookup one (i.e. if it's been called from the parallel lookup). That's pretty much it - all that remains is to switch ->i_mutex to rwsem and have lookup_slow() take it shared. Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index ea2de7c..59fcffc 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1987,28 +1987,36 @@ EXPORT_SYMBOL(d_obtain_root); struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, struct qstr *name) { - struct dentry *found; - struct dentry *new; + struct dentry *found, *res; /* * First check if a dentry matching the name already exists, * if not go ahead and create it now. */ found = d_hash_and_lookup(dentry->d_parent, name); - if (!found) { - new = d_alloc(dentry->d_parent, name); - if (!new) { - found = ERR_PTR(-ENOMEM); - } else { - found = d_splice_alias(inode, new); - if (found) { - dput(new); - return found; - } - return new; + if (found) { + iput(inode); + return found; + } + if (d_in_lookup(dentry)) { + found = d_alloc_parallel(dentry->d_parent, name, + dentry->d_wait); + if (IS_ERR(found) || !d_in_lookup(found)) { + iput(inode); + return found; } + } else { + found = d_alloc(dentry->d_parent, name); + if (!found) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + } + res = d_splice_alias(inode, found); + if (res) { + dput(found); + return res; } - iput(inode); return found; } EXPORT_SYMBOL(d_add_ci); @@ -2391,8 +2399,23 @@ static inline void end_dir_add(struct inode *dir, unsigned n) smp_store_release(&dir->i_dir_seq, n + 2); } +static void d_wait_lookup(struct dentry *dentry) +{ + if (d_in_lookup(dentry)) { + DECLARE_WAITQUEUE(wait, current); + add_wait_queue(dentry->d_wait, &wait); + do { + set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&dentry->d_lock); + schedule(); + spin_lock(&dentry->d_lock); + } while (d_in_lookup(dentry)); + } +} + struct dentry *d_alloc_parallel(struct dentry *parent, - const struct qstr *name) + const struct qstr *name, + wait_queue_head_t *wq) { unsigned int len = name->len; unsigned int hash = name->hash; @@ -2463,18 +2486,47 @@ retry: } dget(dentry); hlist_bl_unlock(b); - /* impossible until we actually enable parallel lookups */ - BUG(); - /* and this will be "wait for it to stop being in-lookup" */ - /* this one will be handled in the next commit */ + /* somebody is doing lookup for it right now; wait for it */ + spin_lock(&dentry->d_lock); + d_wait_lookup(dentry); + /* + * it's not in-lookup anymore; in principle we should repeat + * everything from dcache lookup, but it's likely to be what + * d_lookup() would've found anyway. If it is, just return it; + * otherwise we really have to repeat the whole thing. + */ + if (unlikely(dentry->d_name.hash != hash)) + goto mismatch; + if (unlikely(dentry->d_parent != parent)) + goto mismatch; + if (unlikely(d_unhashed(dentry))) + goto mismatch; + if (parent->d_flags & DCACHE_OP_COMPARE) { + int tlen = dentry->d_name.len; + const char *tname = dentry->d_name.name; + if (parent->d_op->d_compare(parent, dentry, tlen, tname, name)) + goto mismatch; + } else { + if (unlikely(dentry->d_name.len != len)) + goto mismatch; + if (unlikely(dentry_cmp(dentry, str, len))) + goto mismatch; + } + /* OK, it *is* a hashed match; return it */ + spin_unlock(&dentry->d_lock); dput(new); return dentry; } /* we can't take ->d_lock here; it's OK, though. */ new->d_flags |= DCACHE_PAR_LOOKUP; + new->d_wait = wq; hlist_bl_add_head_rcu(&new->d_u.d_in_lookup_hash, b); hlist_bl_unlock(b); return new; +mismatch: + spin_unlock(&dentry->d_lock); + dput(dentry); + goto retry; } EXPORT_SYMBOL(d_alloc_parallel); @@ -2485,9 +2537,11 @@ void __d_lookup_done(struct dentry *dentry) hlist_bl_lock(b); dentry->d_flags &= ~DCACHE_PAR_LOOKUP; __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); + wake_up_all(dentry->d_wait); + dentry->d_wait = NULL; hlist_bl_unlock(b); INIT_HLIST_NODE(&dentry->d_u.d_alias); - /* more stuff will land here */ + INIT_LIST_HEAD(&dentry->d_lru); } EXPORT_SYMBOL(__d_lookup_done); diff --git a/fs/namei.c b/fs/namei.c index aa04320..7babb5e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1605,13 +1605,14 @@ static struct dentry *lookup_slow(const struct qstr *name, { struct dentry *dentry = ERR_PTR(-ENOENT), *old; struct inode *inode = dir->d_inode; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); inode_lock(inode); /* Don't go there if it's already dead */ if (unlikely(IS_DEADDIR(inode))) goto out; again: - dentry = d_alloc_parallel(dir, name); + dentry = d_alloc_parallel(dir, name, &wq); if (IS_ERR(dentry)) goto out; if (unlikely(!d_in_lookup(dentry))) { diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 3eea562..6b1d8bb 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -123,7 +123,10 @@ struct dentry { unsigned long d_time; /* used by d_revalidate */ void *d_fsdata; /* fs-specific data */ - struct list_head d_lru; /* LRU list */ + union { + struct list_head d_lru; /* LRU list */ + wait_queue_head_t *d_wait; /* in-lookup ones only */ + }; struct list_head d_child; /* child of parent list */ struct list_head d_subdirs; /* our children */ /* @@ -251,7 +254,8 @@ extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op /* allocate/de-allocate */ extern struct dentry * d_alloc(struct dentry *, const struct qstr *); extern struct dentry * d_alloc_pseudo(struct super_block *, const struct qstr *); -extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *); +extern struct dentry * d_alloc_parallel(struct dentry *, const struct qstr *, + wait_queue_head_t *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern struct dentry * d_add_ci(struct dentry *, struct inode *, struct qstr *); extern struct dentry * d_exact_alias(struct dentry *, struct inode *); -- cgit v0.10.2 From 9902af79c01a8e39bb99b922fa3eef6d4ea23d69 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 15 Apr 2016 15:08:36 -0400 Subject: parallel lookups: actual switch to rwsem ta-da! The main issue is the lack of down_write_killable(), so the places like readdir.c switched to plain inode_lock(); once killable variants of rwsem primitives appear, that'll be dealt with. lockdep side also might need more work Signed-off-by: Al Viro diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 8810e23..1567a53 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -539,3 +539,21 @@ in your dentry operations instead. it's a symlink. Checking ->i_mode is really needed now. In-tree we had to fix shmem_destroy_callback() that used to take that kind of shortcut; watch out, since that shortcut is no longer valid. +-- +[mandatory] + ->i_mutex is replaced with ->i_rwsem now. inode_lock() et.al. work as + they used to - they just take it exclusive. However, ->lookup() may be + called with parent locked shared. Its instances must not + * use d_instantiate) and d_rehash() separately - use d_add() or + d_splice_alias() instead. + * use d_rehash() alone - call d_add(new_dentry, NULL) instead. + * in the unlikely case when (read-only) access to filesystem + data structures needs exclusion for some reason, arrange it + yourself. None of the in-tree filesystems needed that. + * rely on ->d_parent and ->d_name not changing after dentry has + been fed to d_add() or d_splice_alias(). Again, none of the + in-tree instances relied upon that. + We are guaranteed that lookups of the same name in the same directory + will not happen in parallel ("same" in the sense of your ->d_compare()). + Lookups on different names in the same directory can and do happen in + parallel now. diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5a23806..0b8ba71 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -837,9 +837,11 @@ static noinline int btrfs_mksubvol(struct path *parent, struct dentry *dentry; int error; - error = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); - if (error == -EINTR) - return error; + inode_lock_nested(dir, I_MUTEX_PARENT); + // XXX: should've been + // mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); + // if (error == -EINTR) + // return error; dentry = lookup_one_len(name, parent->dentry, namelen); error = PTR_ERR(dentry); @@ -2366,9 +2368,11 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, goto out; - err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); - if (err == -EINTR) - goto out_drop_write; + inode_lock_nested(dir, I_MUTEX_PARENT); + // XXX: should've been + // err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); + // if (err == -EINTR) + // goto out_drop_write; dentry = lookup_one_len(vol_args->name, parent, namelen); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); @@ -2558,7 +2562,7 @@ out_dput: dput(dentry); out_unlock_dir: inode_unlock(dir); -out_drop_write: +//out_drop_write: mnt_drop_write_file(file); out: kfree(vol_args); diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 03d124a..0387968 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -156,7 +156,7 @@ static void configfs_set_inode_lock_class(struct configfs_dirent *sd, if (depth > 0) { if (depth <= ARRAY_SIZE(default_group_class)) { - lockdep_set_class(&inode->i_mutex, + lockdep_set_class(&inode->i_rwsem, &default_group_class[depth - 1]); } else { /* diff --git a/fs/dcache.c b/fs/dcache.c index 59fcffc..e49ba7d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2932,7 +2932,8 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) static int __d_unalias(struct inode *inode, struct dentry *dentry, struct dentry *alias) { - struct mutex *m1 = NULL, *m2 = NULL; + struct mutex *m1 = NULL; + struct rw_semaphore *m2 = NULL; int ret = -ESTALE; /* If alias and dentry share a parent, then no extra locks required */ @@ -2943,15 +2944,15 @@ static int __d_unalias(struct inode *inode, if (!mutex_trylock(&dentry->d_sb->s_vfs_rename_mutex)) goto out_err; m1 = &dentry->d_sb->s_vfs_rename_mutex; - if (!inode_trylock(alias->d_parent->d_inode)) + if (!inode_trylock_shared(alias->d_parent->d_inode)) goto out_err; - m2 = &alias->d_parent->d_inode->i_mutex; + m2 = &alias->d_parent->d_inode->i_rwsem; out_unalias: __d_move(alias, dentry, false); ret = 0; out_err: if (m2) - mutex_unlock(m2); + up_read(m2); if (m1) mutex_unlock(m1); return ret; diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index c09c63d..4546360 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -824,7 +824,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) * i_mutex on quota files is special. Since this inode is hidden system * file, we are safe to define locking ourselves. */ - lockdep_set_class(&sdp->sd_quota_inode->i_mutex, + lockdep_set_class(&sdp->sd_quota_inode->i_rwsem, &gfs2_quota_imutex_key); error = gfs2_rindex_update(sdp); diff --git a/fs/inode.c b/fs/inode.c index 4b884f7..4ccbc21 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -166,8 +166,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode) spin_lock_init(&inode->i_lock); lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); - mutex_init(&inode->i_mutex); - lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); + init_rwsem(&inode->i_rwsem); + lockdep_set_class(&inode->i_rwsem, &sb->s_type->i_mutex_key); atomic_set(&inode->i_dio_count, 0); @@ -925,13 +925,13 @@ void lockdep_annotate_inode_mutex_key(struct inode *inode) struct file_system_type *type = inode->i_sb->s_type; /* Set new key only if filesystem hasn't already changed it */ - if (lockdep_match_class(&inode->i_mutex, &type->i_mutex_key)) { + if (lockdep_match_class(&inode->i_rwsem, &type->i_mutex_key)) { /* * ensure nobody is actually holding i_mutex */ - mutex_destroy(&inode->i_mutex); - mutex_init(&inode->i_mutex); - lockdep_set_class(&inode->i_mutex, + // mutex_destroy(&inode->i_mutex); + init_rwsem(&inode->i_rwsem); + lockdep_set_class(&inode->i_rwsem, &type->i_mutex_dir_key); } } diff --git a/fs/namei.c b/fs/namei.c index 7babb5e..8249852 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1607,7 +1607,7 @@ static struct dentry *lookup_slow(const struct qstr *name, struct inode *inode = dir->d_inode; DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); - inode_lock(inode); + inode_lock_shared(inode); /* Don't go there if it's already dead */ if (unlikely(IS_DEADDIR(inode))) goto out; @@ -1638,7 +1638,7 @@ again: } } out: - inode_unlock(inode); + inode_unlock_shared(inode); return dentry; } diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 12f4a9e..0748777 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -262,7 +262,7 @@ static int ocfs2_init_locked_inode(struct inode *inode, void *opaque) inode->i_ino = args->fi_ino; OCFS2_I(inode)->ip_blkno = args->fi_blkno; if (args->fi_sysfile_type != 0) - lockdep_set_class(&inode->i_mutex, + lockdep_set_class(&inode->i_rwsem, &ocfs2_sysfile_lock_key[args->fi_sysfile_type]); if (args->fi_sysfile_type == USER_QUOTA_SYSTEM_INODE || args->fi_sysfile_type == GROUP_QUOTA_SYSTEM_INODE || diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c index 6ec1e43..da186ee 100644 --- a/fs/overlayfs/readdir.c +++ b/fs/overlayfs/readdir.c @@ -218,7 +218,9 @@ static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd) cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE); old_cred = override_creds(override_cred); - err = mutex_lock_killable(&dir->d_inode->i_mutex); + inode_lock(dir->d_inode); + err = 0; + // XXX: err = mutex_lock_killable(&dir->d_inode->i_mutex); if (!err) { while (rdd->first_maybe_whiteout) { p = rdd->first_maybe_whiteout; diff --git a/fs/readdir.c b/fs/readdir.c index e69ef3b..bf583e8 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -32,9 +32,10 @@ int iterate_dir(struct file *file, struct dir_context *ctx) if (res) goto out; - res = mutex_lock_killable(&inode->i_mutex); - if (res) - goto out; + inode_lock(inode); + // res = mutex_lock_killable(&inode->i_mutex); + // if (res) + // goto out; res = -ENOENT; if (!IS_DEADDIR(inode)) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 00cecc5..3018f31 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -647,7 +647,7 @@ struct inode { /* Misc */ unsigned long i_state; - struct mutex i_mutex; + struct rw_semaphore i_rwsem; unsigned long dirtied_when; /* jiffies of first dirtying */ unsigned long dirtied_time_when; @@ -734,27 +734,42 @@ enum inode_i_mutex_lock_class static inline void inode_lock(struct inode *inode) { - mutex_lock(&inode->i_mutex); + down_write(&inode->i_rwsem); } static inline void inode_unlock(struct inode *inode) { - mutex_unlock(&inode->i_mutex); + up_write(&inode->i_rwsem); +} + +static inline void inode_lock_shared(struct inode *inode) +{ + down_read(&inode->i_rwsem); +} + +static inline void inode_unlock_shared(struct inode *inode) +{ + up_read(&inode->i_rwsem); } static inline int inode_trylock(struct inode *inode) { - return mutex_trylock(&inode->i_mutex); + return down_write_trylock(&inode->i_rwsem); +} + +static inline int inode_trylock_shared(struct inode *inode) +{ + return down_read_trylock(&inode->i_rwsem); } static inline int inode_is_locked(struct inode *inode) { - return mutex_is_locked(&inode->i_mutex); + return rwsem_is_locked(&inode->i_rwsem); } static inline void inode_lock_nested(struct inode *inode, unsigned subclass) { - mutex_lock_nested(&inode->i_mutex, subclass); + down_write_nested(&inode->i_rwsem, subclass); } void lock_two_nondirectories(struct inode *, struct inode*); -- cgit v0.10.2 From 63b6df14134ddd048984c8afadb46e721815bfc6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 17:08:21 -0400 Subject: give readdir(2)/getdents(2)/etc. uniform exclusion with lseek() same as read() on regular files has, and for the same reason. Signed-off-by: Al Viro diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 6cc0816..ffb93f49 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -147,7 +147,7 @@ SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd, long __user *, basep) { int error; - struct fd arg = fdget(fd); + struct fd arg = fdget_pos(fd); struct osf_dirent_callback buf = { .ctx.actor = osf_filldir, .dirent = dirent, @@ -164,7 +164,7 @@ SYSCALL_DEFINE4(osf_getdirentries, unsigned int, fd, if (count != buf.count) error = count - buf.count; - fdput(arg); + fdput_pos(arg); return error; } diff --git a/fs/compat.c b/fs/compat.c index a71936a..8754e9a 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -884,7 +884,7 @@ COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, struct compat_old_linux_dirent __user *, dirent, unsigned int, count) { int error; - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); struct compat_readdir_callback buf = { .ctx.actor = compat_fillonedir, .dirent = dirent @@ -897,7 +897,7 @@ COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, if (buf.result) error = buf.result; - fdput(f); + fdput_pos(f); return error; } @@ -975,7 +975,7 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, if (!access_ok(VERIFY_WRITE, dirent, count)) return -EFAULT; - f = fdget(fd); + f = fdget_pos(fd); if (!f.file) return -EBADF; @@ -989,7 +989,7 @@ COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, else error = count - buf.count; } - fdput(f); + fdput_pos(f); return error; } @@ -1062,7 +1062,7 @@ COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd, if (!access_ok(VERIFY_WRITE, dirent, count)) return -EFAULT; - f = fdget(fd); + f = fdget_pos(fd); if (!f.file) return -EBADF; @@ -1077,7 +1077,7 @@ COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd, else error = count - buf.count; } - fdput(f); + fdput_pos(f); return error; } #endif /* __ARCH_WANT_COMPAT_SYS_GETDENTS64 */ diff --git a/fs/file.c b/fs/file.c index 1fbc5c0..6b1acdf 100644 --- a/fs/file.c +++ b/fs/file.c @@ -784,6 +784,11 @@ unsigned long __fdget_pos(unsigned int fd) return v; } +void __f_unlock_pos(struct file *f) +{ + mutex_unlock(&f->f_pos_lock); +} + /* * We only lock f_pos if we have threads or if the file might be * shared with another process. In both cases we'll have an elevated diff --git a/fs/open.c b/fs/open.c index 17cb6b1..938a658 100644 --- a/fs/open.c +++ b/fs/open.c @@ -713,7 +713,7 @@ static int do_dentry_open(struct file *f, } /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ - if (S_ISREG(inode->i_mode)) + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) f->f_mode |= FMODE_ATOMIC_POS; f->f_op = fops_get(inode->i_fop); diff --git a/fs/read_write.c b/fs/read_write.c index cf377cf..69c7c3c 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -302,18 +302,6 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) } EXPORT_SYMBOL(vfs_llseek); -static inline struct fd fdget_pos(int fd) -{ - return __to_fd(__fdget_pos(fd)); -} - -static inline void fdput_pos(struct fd f) -{ - if (f.flags & FDPUT_POS_UNLOCK) - mutex_unlock(&f.file->f_pos_lock); - fdput(f); -} - SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) { off_t retval; diff --git a/fs/readdir.c b/fs/readdir.c index bf583e8..d7308b8 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -112,7 +112,7 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, struct old_linux_dirent __user *, dirent, unsigned int, count) { int error; - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); struct readdir_callback buf = { .ctx.actor = fillonedir, .dirent = dirent @@ -125,7 +125,7 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, if (buf.result) error = buf.result; - fdput(f); + fdput_pos(f); return error; } @@ -209,7 +209,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, if (!access_ok(VERIFY_WRITE, dirent, count)) return -EFAULT; - f = fdget(fd); + f = fdget_pos(fd); if (!f.file) return -EBADF; @@ -223,7 +223,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, else error = count - buf.count; } - fdput(f); + fdput_pos(f); return error; } @@ -290,7 +290,7 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, if (!access_ok(VERIFY_WRITE, dirent, count)) return -EFAULT; - f = fdget(fd); + f = fdget_pos(fd); if (!f.file) return -EBADF; @@ -305,6 +305,6 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, else error = count - buf.count; } - fdput(f); + fdput_pos(f); return error; } diff --git a/include/linux/file.h b/include/linux/file.h index f87d308..7444f5f 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -44,6 +44,7 @@ extern struct file *fget_raw(unsigned int fd); extern unsigned long __fdget(unsigned int fd); extern unsigned long __fdget_raw(unsigned int fd); extern unsigned long __fdget_pos(unsigned int fd); +extern void __f_unlock_pos(struct file *); static inline struct fd __to_fd(unsigned long v) { @@ -60,6 +61,18 @@ static inline struct fd fdget_raw(unsigned int fd) return __to_fd(__fdget_raw(fd)); } +static inline struct fd fdget_pos(int fd) +{ + return __to_fd(__fdget_pos(fd)); +} + +static inline void fdput_pos(struct fd f) +{ + if (f.flags & FDPUT_POS_UNLOCK) + __f_unlock_pos(f.file); + fdput(f); +} + extern int f_dupfd(unsigned int from, struct file *file, unsigned flags); extern int replace_fd(unsigned fd, struct file *file, unsigned flags); extern void set_close_on_exec(unsigned int fd, int flag); -- cgit v0.10.2 From 6192269444ebfbfb42e23c7a6a93c76ffe4b5e51 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 23:08:32 -0400 Subject: introduce a parallel variant of ->iterate() New method: ->iterate_shared(). Same arguments as in ->iterate(), called with the directory locked only shared. Once all filesystems switch, the old one will be gone. Signed-off-by: Al Viro diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 1567a53..12c57ab 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -557,3 +557,21 @@ in your dentry operations instead. will not happen in parallel ("same" in the sense of your ->d_compare()). Lookups on different names in the same directory can and do happen in parallel now. +-- +[recommended] + ->iterate_shared() is added; it's a parallel variant of ->iterate(). + Exclusion on struct file level is still provided (as well as that + between it and lseek on the same struct file), but if your directory + has been opened several times, you can get these called in parallel. + Exclusion between that method and all directory-modifying ones is + still provided, of course. + + Often enough ->iterate() can serve as ->iterate_shared() without any + changes - it is a read-only operation, after all. If you have any + per-inode or per-dentry in-core data structures modified by ->iterate(), + you might need something to serialize the access to them. If you + do dcache pre-seeding, you'll need to switch to d_alloc_parallel() for + that; look for in-tree examples. + + Old method is only used if the new one is absent; eventually it will + be removed. Switch while you still can; the old one won't stay. diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 42e731b..6fb8672 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -424,16 +424,22 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx) BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); host_file = cfi->cfi_container; - if (host_file->f_op->iterate) { + if (host_file->f_op->iterate || host_file->f_op->iterate_shared) { struct inode *host_inode = file_inode(host_file); - - inode_lock(host_inode); ret = -ENOENT; if (!IS_DEADDIR(host_inode)) { - ret = host_file->f_op->iterate(host_file, ctx); - file_accessed(host_file); + if (host_file->f_op->iterate_shared) { + inode_lock_shared(host_inode); + ret = host_file->f_op->iterate_shared(host_file, ctx); + file_accessed(host_file); + inode_unlock_shared(host_inode); + } else { + inode_lock(host_inode); + ret = host_file->f_op->iterate(host_file, ctx); + file_accessed(host_file); + inode_unlock(host_inode); + } } - inode_unlock(host_inode); return ret; } /* Venus: we must read Venus dirents from a file */ diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 402c5ca..207ba8d 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -312,7 +312,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child) goto out; error = -EINVAL; - if (!file->f_op->iterate) + if (!file->f_op->iterate && !file->f_op->iterate_shared) goto out_close; buffer.sequence = 0; diff --git a/fs/readdir.c b/fs/readdir.c index d7308b8..a86c6c0 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -24,15 +24,21 @@ int iterate_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); + bool shared = false; int res = -ENOTDIR; - if (!file->f_op->iterate) + if (file->f_op->iterate_shared) + shared = true; + else if (!file->f_op->iterate) goto out; res = security_file_permission(file, MAY_READ); if (res) goto out; - inode_lock(inode); + if (shared) + inode_lock_shared(inode); + else + inode_lock(inode); // res = mutex_lock_killable(&inode->i_mutex); // if (res) // goto out; @@ -40,12 +46,18 @@ int iterate_dir(struct file *file, struct dir_context *ctx) res = -ENOENT; if (!IS_DEADDIR(inode)) { ctx->pos = file->f_pos; - res = file->f_op->iterate(file, ctx); + if (shared) + res = file->f_op->iterate_shared(file, ctx); + else + res = file->f_op->iterate(file, ctx); file->f_pos = ctx->pos; fsnotify_access(file); file_accessed(file); } - inode_unlock(inode); + if (shared) + inode_unlock_shared(inode); + else + inode_unlock(inode); out: return res; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 3018f31..3dc0258 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1674,6 +1674,7 @@ struct file_operations { ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iterate) (struct file *, struct dir_context *); + int (*iterate_shared) (struct file *, struct dir_context *); unsigned int (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); -- cgit v0.10.2 From 3781764b5cb109152af180a4f17cd3a24d5bc51f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 16:31:31 -0400 Subject: proc_fill_cache(): switch to d_alloc_parallel() ... making it usable with directory locked shared Signed-off-by: Al Viro diff --git a/fs/proc/base.c b/fs/proc/base.c index b1755b2..15b60f1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1819,12 +1819,17 @@ bool proc_fill_cache(struct file *file, struct dir_context *ctx, child = d_hash_and_lookup(dir, &qname); if (!child) { - child = d_alloc(dir, &qname); - if (!child) - goto end_instantiate; - if (instantiate(d_inode(dir), child, task, ptr) < 0) { - dput(child); + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + child = d_alloc_parallel(dir, &qname, &wq); + if (IS_ERR(child)) goto end_instantiate; + if (d_in_lookup(child)) { + int err = instantiate(d_inode(dir), child, task, ptr); + d_lookup_done(child); + if (err < 0) { + dput(child); + goto end_instantiate; + } } } inode = d_inode(child); -- cgit v0.10.2 From 76aab3ab61f3051362f20fc26e2c50a65d6ba904 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 16:36:09 -0400 Subject: proc_sys_fill_cache(): switch to d_alloc_parallel() make it usable with directory locked shared Signed-off-by: Al Viro diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index fe5b6e6..d69e238 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -627,18 +627,19 @@ static bool proc_sys_fill_cache(struct file *file, child = d_lookup(dir, &qname); if (!child) { - child = d_alloc(dir, &qname); - if (child) { + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); + child = d_alloc_parallel(dir, &qname, &wq); + if (IS_ERR(child)) + return false; + if (d_in_lookup(child)) { inode = proc_sys_make_inode(dir->d_sb, head, table); if (!inode) { + d_lookup_done(child); dput(child); return false; - } else { - d_set_d_op(child, &proc_sys_dentry_operations); - d_add(child, inode); } - } else { - return false; + d_set_d_op(child, &proc_sys_dentry_operations); + d_add(child, inode); } } inode = d_inode(child); -- cgit v0.10.2 From f50752eaa0b0be05431897a973b2ab05229cc2f5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 17:13:54 -0400 Subject: switch all procfs directories ->iterate_shared() Signed-off-by: Al Viro diff --git a/fs/proc/base.c b/fs/proc/base.c index 15b60f1..beb45d4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2159,8 +2159,8 @@ out: static const struct file_operations proc_map_files_operations = { .read = generic_read_dir, - .iterate = proc_map_files_readdir, - .llseek = default_llseek, + .iterate_shared = proc_map_files_readdir, + .llseek = generic_file_llseek, }; #ifdef CONFIG_CHECKPOINT_RESTORE @@ -2507,8 +2507,8 @@ static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) static const struct file_operations proc_attr_dir_operations = { .read = generic_read_dir, - .iterate = proc_attr_dir_readdir, - .llseek = default_llseek, + .iterate_shared = proc_attr_dir_readdir, + .llseek = generic_file_llseek, }; static struct dentry *proc_attr_dir_lookup(struct inode *dir, @@ -2915,8 +2915,8 @@ static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) static const struct file_operations proc_tgid_base_operations = { .read = generic_read_dir, - .iterate = proc_tgid_base_readdir, - .llseek = default_llseek, + .iterate_shared = proc_tgid_base_readdir, + .llseek = generic_file_llseek, }; static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) @@ -3263,8 +3263,8 @@ static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *den static const struct file_operations proc_tid_base_operations = { .read = generic_read_dir, - .iterate = proc_tid_base_readdir, - .llseek = default_llseek, + .iterate_shared = proc_tid_base_readdir, + .llseek = generic_file_llseek, }; static const struct inode_operations proc_tid_base_inode_operations = { @@ -3474,6 +3474,6 @@ static const struct inode_operations proc_task_inode_operations = { static const struct file_operations proc_task_operations = { .read = generic_read_dir, - .iterate = proc_task_readdir, - .llseek = default_llseek, + .iterate_shared = proc_task_readdir, + .llseek = generic_file_llseek, }; diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 56afa5e..01df23c 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -276,8 +276,8 @@ static int proc_readfd(struct file *file, struct dir_context *ctx) const struct file_operations proc_fd_operations = { .read = generic_read_dir, - .iterate = proc_readfd, - .llseek = default_llseek, + .iterate_shared = proc_readfd, + .llseek = generic_file_llseek, }; static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry, @@ -361,6 +361,6 @@ const struct inode_operations proc_fdinfo_inode_operations = { const struct file_operations proc_fdinfo_operations = { .read = generic_read_dir, - .iterate = proc_readfdinfo, - .llseek = default_llseek, + .iterate_shared = proc_readfdinfo, + .llseek = generic_file_llseek, }; diff --git a/fs/proc/generic.c b/fs/proc/generic.c index ff3ffc7..c633476 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -318,7 +318,7 @@ int proc_readdir(struct file *file, struct dir_context *ctx) static const struct file_operations proc_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = proc_readdir, + .iterate_shared = proc_readdir, }; /* diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 72cb26f..51b8b0a 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -139,7 +139,8 @@ out: const struct file_operations proc_ns_dir_operations = { .read = generic_read_dir, - .iterate = proc_ns_dir_readdir, + .iterate_shared = proc_ns_dir_readdir, + .llseek = generic_file_llseek, }; static struct dentry *proc_ns_dir_lookup(struct inode *dir, diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 350984a..c8bbc68 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -179,7 +179,7 @@ static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx) const struct file_operations proc_net_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = proc_tgid_net_readdir, + .iterate_shared = proc_tgid_net_readdir, }; static __net_init int proc_net_ns_init(struct net *net) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d69e238..5e57c3e 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -790,7 +790,7 @@ static const struct file_operations proc_sys_file_operations = { static const struct file_operations proc_sys_dir_file_operations = { .read = generic_read_dir, - .iterate = proc_sys_readdir, + .iterate_shared = proc_sys_readdir, .llseek = generic_file_llseek, }; diff --git a/fs/proc/root.c b/fs/proc/root.c index 361ab4e..55bc7d6 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -226,8 +226,8 @@ static int proc_root_readdir(struct file *file, struct dir_context *ctx) */ static const struct file_operations proc_root_operations = { .read = generic_read_dir, - .iterate = proc_root_readdir, - .llseek = default_llseek, + .iterate_shared = proc_root_readdir, + .llseek = generic_file_llseek, }; /* -- cgit v0.10.2 From d9b3dbdcfd6213731c3eb985a3a83537e3894e12 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 17:30:32 -0400 Subject: fuse: switch to ->iterate_shared() Switch dcache pre-seeding on readdir to d_alloc_parallel(); nothing else is needed. Signed-off-by: Al Viro diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b618527..b941905 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1162,7 +1162,6 @@ static int fuse_direntplus_link(struct file *file, struct fuse_direntplus *direntplus, u64 attr_version) { - int err; struct fuse_entry_out *o = &direntplus->entry_out; struct fuse_dirent *dirent = &direntplus->dirent; struct dentry *parent = file->f_path.dentry; @@ -1172,6 +1171,7 @@ static int fuse_direntplus_link(struct file *file, struct inode *dir = d_inode(parent); struct fuse_conn *fc; struct inode *inode; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); if (!o->nodeid) { /* @@ -1204,65 +1204,61 @@ static int fuse_direntplus_link(struct file *file, name.hash = full_name_hash(name.name, name.len); dentry = d_lookup(parent, &name); - if (dentry) { + if (!dentry) { +retry: + dentry = d_alloc_parallel(parent, &name, &wq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + } + if (!d_in_lookup(dentry)) { + struct fuse_inode *fi; inode = d_inode(dentry); - if (!inode) { - d_drop(dentry); - } else if (get_node_id(inode) != o->nodeid || - ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { + if (!inode || + get_node_id(inode) != o->nodeid || + ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { d_invalidate(dentry); - } else if (is_bad_inode(inode)) { - err = -EIO; - goto out; - } else { - struct fuse_inode *fi; - fi = get_fuse_inode(inode); - spin_lock(&fc->lock); - fi->nlookup++; - spin_unlock(&fc->lock); - - fuse_change_attributes(inode, &o->attr, - entry_attr_timeout(o), - attr_version); - - /* - * The other branch to 'found' comes via fuse_iget() - * which bumps nlookup inside - */ - goto found; + dput(dentry); + goto retry; + } + if (is_bad_inode(inode)) { + dput(dentry); + return -EIO; } - dput(dentry); - } - - dentry = d_alloc(parent, &name); - err = -ENOMEM; - if (!dentry) - goto out; - inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, - &o->attr, entry_attr_timeout(o), attr_version); - if (!inode) - goto out; + fi = get_fuse_inode(inode); + spin_lock(&fc->lock); + fi->nlookup++; + spin_unlock(&fc->lock); - alias = d_splice_alias(inode, dentry); - err = PTR_ERR(alias); - if (IS_ERR(alias)) - goto out; + fuse_change_attributes(inode, &o->attr, + entry_attr_timeout(o), + attr_version); + /* + * The other branch comes via fuse_iget() + * which bumps nlookup inside + */ + } else { + inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, + &o->attr, entry_attr_timeout(o), + attr_version); + if (!inode) + inode = ERR_PTR(-ENOMEM); - if (alias) { - dput(dentry); - dentry = alias; + alias = d_splice_alias(inode, dentry); + d_lookup_done(dentry); + if (alias) { + dput(dentry); + dentry = alias; + } + if (IS_ERR(dentry)) + return PTR_ERR(dentry); } - -found: if (fc->readdirplus_auto) set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state); fuse_change_entry_timeout(dentry, o); - err = 0; -out: dput(dentry); - return err; + return 0; } static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, @@ -1892,7 +1888,7 @@ static const struct inode_operations fuse_dir_inode_operations = { static const struct file_operations fuse_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = fuse_readdir, + .iterate_shared = fuse_readdir, .open = fuse_dir_open, .release = fuse_dir_release, .fsync = fuse_dir_fsync, -- cgit v0.10.2 From 3125d2650cae97d8f313ab696cd0ed66916e767a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 17:40:47 -0400 Subject: cifs: switch to ->iterate_shared() Signed-off-by: Al Viro diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 8920156..aadb593 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1083,7 +1083,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { }; const struct file_operations cifs_dir_ops = { - .iterate = cifs_readdir, + .iterate_shared = cifs_readdir, .release = cifs_closedir, .read = generic_read_dir, .unlocked_ioctl = cifs_ioctl, diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 4cfb4d9f..867439c 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -80,18 +80,32 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, struct inode *inode; struct super_block *sb = parent->d_sb; struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); cifs_dbg(FYI, "%s: for %s\n", __func__, name->name); dentry = d_hash_and_lookup(parent, name); + if (!dentry) { + /* + * If we know that the inode will need to be revalidated + * immediately, then don't create a new dentry for it. + * We'll end up doing an on the wire call either way and + * this spares us an invalidation. + */ + if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL) + return; +retry: + dentry = d_alloc_parallel(parent, name, &wq); + } if (IS_ERR(dentry)) return; - - if (dentry) { + if (!d_in_lookup(dentry)) { inode = d_inode(dentry); if (inode) { - if (d_mountpoint(dentry)) - goto out; + if (d_mountpoint(dentry)) { + dput(dentry); + return; + } /* * If we're generating inode numbers, then we don't * want to clobber the existing one with the one that @@ -106,33 +120,22 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, (inode->i_mode & S_IFMT) == (fattr->cf_mode & S_IFMT)) { cifs_fattr_to_inode(inode, fattr); - goto out; + dput(dentry); + return; } } d_invalidate(dentry); dput(dentry); + goto retry; + } else { + inode = cifs_iget(sb, fattr); + if (!inode) + inode = ERR_PTR(-ENOMEM); + alias = d_splice_alias(inode, dentry); + d_lookup_done(dentry); + if (alias && !IS_ERR(alias)) + dput(alias); } - - /* - * If we know that the inode will need to be revalidated immediately, - * then don't create a new dentry for it. We'll end up doing an on - * the wire call either way and this spares us an invalidation. - */ - if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL) - return; - - dentry = d_alloc(parent, name); - if (!dentry) - return; - - inode = cifs_iget(sb, fattr); - if (!inode) - goto out; - - alias = d_splice_alias(inode, dentry); - if (alias && !IS_ERR(alias)) - dput(alias); -out: dput(dentry); } -- cgit v0.10.2 From 4e82901cd6d1af21ae232ae835c36d8230c809e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 19:52:15 -0400 Subject: dcache_{readdir,dir_lseek}() users: switch to ->iterate_shared no need to lock directory in dcache_dir_lseek(), while we are at it - per-struct file exclusion is enough. Signed-off-by: Al Viro diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 6ca5f05..5be15cf 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -238,7 +238,7 @@ const struct file_operations spufs_context_fops = { .release = spufs_dir_close, .llseek = dcache_dir_lseek, .read = generic_read_dir, - .iterate = dcache_readdir, + .iterate_shared = dcache_readdir, .fsync = noop_fsync, }; EXPORT_SYMBOL_GPL(spufs_context_fops); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 7ab9239..78bd802 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -39,7 +39,7 @@ const struct file_operations autofs4_root_operations = { .open = dcache_dir_open, .release = dcache_dir_close, .read = generic_read_dir, - .iterate = dcache_readdir, + .iterate_shared = dcache_readdir, .llseek = dcache_dir_lseek, .unlocked_ioctl = autofs4_root_ioctl, #ifdef CONFIG_COMPAT @@ -51,7 +51,7 @@ const struct file_operations autofs4_dir_operations = { .open = autofs4_dir_open, .release = dcache_dir_close, .read = generic_read_dir, - .iterate = dcache_readdir, + .iterate_shared = dcache_readdir, .llseek = dcache_dir_lseek, }; diff --git a/fs/libfs.c b/fs/libfs.c index 43dd280..dd8a074 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -89,7 +89,6 @@ EXPORT_SYMBOL(dcache_dir_close); loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) { struct dentry *dentry = file->f_path.dentry; - inode_lock(d_inode(dentry)); switch (whence) { case 1: offset += file->f_pos; @@ -97,7 +96,6 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) if (offset >= 0) break; default: - inode_unlock(d_inode(dentry)); return -EINVAL; } if (offset != file->f_pos) { @@ -124,7 +122,6 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence) spin_unlock(&dentry->d_lock); } } - inode_unlock(d_inode(dentry)); return offset; } EXPORT_SYMBOL(dcache_dir_lseek); @@ -190,7 +187,7 @@ const struct file_operations simple_dir_operations = { .release = dcache_dir_close, .llseek = dcache_dir_lseek, .read = generic_read_dir, - .iterate = dcache_readdir, + .iterate_shared = dcache_readdir, .fsync = noop_fsync, }; EXPORT_SYMBOL(simple_dir_operations); -- cgit v0.10.2 From 3b0a3c1ac1598722fc289da19219d14f2a37b31f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 23:42:46 -0400 Subject: simple local filesystems: switch to ->iterate_shared() no changes needed (XFS isn't simple, but it has the same parallelism in the interesting parts exercised from CXFS). Signed-off-by: Al Viro diff --git a/fs/affs/dir.c b/fs/affs/dir.c index ac4f318..f1e7294 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -20,7 +20,7 @@ static int affs_readdir(struct file *, struct dir_context *); const struct file_operations affs_dir_operations = { .read = generic_read_dir, .llseek = generic_file_llseek, - .iterate = affs_readdir, + .iterate_shared = affs_readdir, .fsync = affs_file_fsync, }; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index f0f4363..19efd11 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -716,7 +716,7 @@ not_empty: const struct file_operations ext2_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = ext2_readdir, + .iterate_shared = ext2_readdir, .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, diff --git a/fs/minix/dir.c b/fs/minix/dir.c index 33957c0..31dcd51 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -21,7 +21,7 @@ static int minix_readdir(struct file *, struct dir_context *); const struct file_operations minix_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = minix_readdir, + .iterate_shared = minix_readdir, .fsync = generic_file_fsync, }; diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index c0f0a3e..2661b77 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -23,7 +23,7 @@ static int sysv_readdir(struct file *, struct dir_context *); const struct file_operations sysv_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = sysv_readdir, + .iterate_shared = sysv_readdir, .fsync = generic_file_fsync, }; diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 4c07421..57dcced 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -653,7 +653,7 @@ not_empty: const struct file_operations ufs_dir_operations = { .read = generic_read_dir, - .iterate = ufs_readdir, + .iterate_shared = ufs_readdir, .fsync = generic_file_fsync, .llseek = generic_file_llseek, }; diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 569938a..345fd85 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1714,7 +1714,7 @@ const struct file_operations xfs_file_operations = { const struct file_operations xfs_dir_file_operations = { .open = xfs_dir_open, .read = generic_read_dir, - .iterate = xfs_file_readdir, + .iterate_shared = xfs_file_readdir, .llseek = generic_file_llseek, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT -- cgit v0.10.2 From 6ac087099edf09ca357e2f765e3e24677543897c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Apr 2016 00:02:50 -0400 Subject: path_openat(): take O_PATH handling out of do_last() do_last() and lookup_open() simpler that way and so does O_PATH itself. As it bloody well should: we find what the pathname resolves to, same way as in stat() et.al. and associate it with FMODE_PATH struct file. Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 8249852..8145b41 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3019,7 +3019,7 @@ static int lookup_open(struct nameidata *nd, struct path *path, goto out_no_open; } - if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) { + if (dir_inode->i_op->atomic_open) { return atomic_open(nd, dentry, path, file, op, got_write, need_lookup, opened); } @@ -3219,7 +3219,7 @@ finish_open: return error; } audit_inode(nd->name, nd->path.dentry, 0); - if (unlikely(d_is_symlink(nd->path.dentry)) && !(open_flag & O_PATH)) { + if (unlikely(d_is_symlink(nd->path.dentry))) { error = -ELOOP; goto out; } @@ -3239,11 +3239,9 @@ finish_open: got_write = true; } finish_open_created: - if (likely(!(open_flag & O_PATH))) { - error = may_open(&nd->path, acc_mode, open_flag); - if (error) - goto out; - } + error = may_open(&nd->path, acc_mode, open_flag); + if (error) + goto out; BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */ error = vfs_open(&nd->path, file, current_cred()); if (!error) { @@ -3357,6 +3355,18 @@ out: return error; } +static int do_o_path(struct nameidata *nd, unsigned flags, struct file *file) +{ + struct path path; + int error = path_lookupat(nd, flags, &path); + if (!error) { + audit_inode(nd->name, path.dentry, 0); + error = vfs_open(&path, file, current_cred()); + path_put(&path); + } + return error; +} + static struct file *path_openat(struct nameidata *nd, const struct open_flags *op, unsigned flags) { @@ -3376,6 +3386,13 @@ static struct file *path_openat(struct nameidata *nd, goto out2; } + if (unlikely(file->f_flags & O_PATH)) { + error = do_o_path(nd, flags, file); + if (!error) + opened |= FILE_OPENED; + goto out2; + } + s = path_init(nd, flags); if (IS_ERR(s)) { put_filp(file); -- cgit v0.10.2 From ce8644fcadc52da72e19ae9f0866fb3eb15d3df4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Apr 2016 14:17:56 -0400 Subject: lookup_open(): expand the call of vfs_create() Lift IS_DEADDIR handling up into the part common with atomic_open(), remove it from the latter. Collapse permission checks into the call of may_o_create(), getting it closer to atomic_open() case. Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 8145b41..f2d55c8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2838,12 +2838,6 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, BUG_ON(dentry->d_inode); - /* Don't create child dentry for a dead directory. */ - if (unlikely(IS_DEADDIR(dir))) { - error = -ENOENT; - goto out; - } - mode = op->mode; if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) mode &= ~current_umask(); @@ -3004,6 +2998,9 @@ static int lookup_open(struct nameidata *nd, struct path *path, int error; bool need_lookup = false; + if (unlikely(IS_DEADDIR(dir_inode))) + return -ENOENT; + *opened &= ~FILE_CREATED; dentry = lookup_dcache(&nd->last, dir, nd->flags); if (IS_ERR(dentry)) @@ -3049,13 +3046,19 @@ static int lookup_open(struct nameidata *nd, struct path *path, goto out_dput; } *opened |= FILE_CREATED; - error = security_path_mknod(&nd->path, dentry, mode, 0); + audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE); + error = may_o_create(&nd->path, dentry, mode); if (error) goto out_dput; - error = vfs_create(dir->d_inode, dentry, mode, - nd->flags & LOOKUP_EXCL); + if (!dir_inode->i_op->create) { + error = -EACCES; + goto out_dput; + } + error = dir_inode->i_op->create(dir_inode, dentry, mode, + op->open_flag & O_EXCL); if (error) goto out_dput; + fsnotify_create(dir_inode, dentry); } out_no_open: path->dentry = dentry; -- cgit v0.10.2 From 5249e411b4d456762641d98eb6aad22da4e0a30d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Apr 2016 13:25:33 -0400 Subject: atomic_open(): don't bother with EEXIST check - it's done in do_last() Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 85d19e6..eec7c0a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2914,11 +2914,6 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, error = create_error; goto out; } - } else { - if (excl && !(*opened & FILE_CREATED)) { - error = -EEXIST; - goto out; - } } goto looked_up; } -- cgit v0.10.2 From 9d0728e16e9269f368a6217b34166999c8ba2f3e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Apr 2016 16:52:17 -0400 Subject: atomic_open(): consolidate "overridden ENOENT" in open-yourself cases Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index eec7c0a..36520fc 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2908,13 +2908,6 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, } if (*opened & FILE_CREATED) fsnotify_create(dir, dentry); - if (!dentry->d_inode) { - WARN_ON(*opened & FILE_CREATED); - if (create_error) { - error = create_error; - goto out; - } - } goto looked_up; } @@ -2942,11 +2935,11 @@ no_open: if (IS_ERR(dentry)) return PTR_ERR(dentry); } +looked_up: if (create_error && !dentry->d_inode) { error = create_error; goto out; } -looked_up: path->dentry = dentry; path->mnt = nd->path.mnt; return 1; -- cgit v0.10.2 From 55db2fd9361424a6a5815e7796bcf03b19df437c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Apr 2016 02:36:39 -0400 Subject: atomic_open(): massage the create_error logics a bit Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 36520fc..ed48178 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2855,33 +2855,30 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, * Another problem is returing the "right" error value (e.g. for an * O_EXCL open we want to return EEXIST not EROFS). */ - if (((open_flag & (O_CREAT | O_TRUNC)) || - (open_flag & O_ACCMODE) != O_RDONLY) && unlikely(!got_write)) { - if (!(open_flag & O_CREAT)) { - /* - * No O_CREATE -> atomicity not a requirement -> fall - * back to lookup + open - */ - goto no_open; - } else if (open_flag & (O_EXCL | O_TRUNC)) { - /* Fall back and fail with the right error */ - create_error = -EROFS; - goto no_open; - } else { - /* No side effects, safe to clear O_CREAT */ - create_error = -EROFS; - open_flag &= ~O_CREAT; - } - } - if (open_flag & O_CREAT) { - error = may_o_create(&nd->path, dentry, mode); - if (error) { - create_error = error; - if (open_flag & O_EXCL) + if (unlikely(!got_write)) { + create_error = -EROFS; + if (open_flag & (O_EXCL | O_TRUNC)) { + /* Fall back and fail with the right error */ goto no_open; + } + /* No side effects, safe to clear O_CREAT */ open_flag &= ~O_CREAT; + } else { + create_error = may_o_create(&nd->path, dentry, mode); + if (create_error) { + if (open_flag & O_EXCL) + goto no_open; + open_flag &= ~O_CREAT; + } } + } else if ((open_flag & (O_TRUNC|O_WRONLY|O_RDWR)) && + unlikely(!got_write)) { + /* + * No O_CREATE -> atomicity not a requirement -> fall + * back to lookup + open + */ + goto no_open; } if (nd->flags & LOOKUP_DIRECTORY) -- cgit v0.10.2 From 47f9dbd3877d9d9757366278a65d9fb3de5c1c4e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Apr 2016 02:52:55 -0400 Subject: do_last(): get rid of duplicate ELOOP check may_open() will catch it Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index ed48178..6503767 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3195,10 +3195,6 @@ finish_open: return error; } audit_inode(nd->name, nd->path.dentry, 0); - if (unlikely(d_is_symlink(nd->path.dentry))) { - error = -ELOOP; - goto out; - } error = -EISDIR; if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) goto out; -- cgit v0.10.2 From fe9ec8291fcaa74badf0a67e3a82741b3ee40a9e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Apr 2016 03:14:20 -0400 Subject: do_last(): take fput() on error after opening to out: make it conditional on *opened & FILE_OPENED; in addition to getting rid of exit_fput: thing, it simplifies atomic_open() cleanup on may_open() failure. Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 6503767..38eab15 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2919,9 +2919,6 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, acc_mode = 0; } error = may_open(&file->f_path, acc_mode, open_flag); - if (error) - fput(file); - out: dput(dentry); return error; @@ -3225,18 +3222,13 @@ finish_open_created: } opened: error = open_check_o_direct(file); - if (error) - goto exit_fput; - error = ima_file_check(file, op->acc_mode, *opened); - if (error) - goto exit_fput; - - if (will_truncate) { + if (!error) + error = ima_file_check(file, op->acc_mode, *opened); + if (!error && will_truncate) error = handle_truncate(file); - if (error) - goto exit_fput; - } out: + if (unlikely(error) && (*opened & FILE_OPENED)) + fput(file); if (unlikely(error > 0)) { WARN_ON(1); error = -EINVAL; @@ -3246,10 +3238,6 @@ out: path_put(&save_parent); return error; -exit_fput: - fput(file); - goto out; - stale_open: /* If no saved parent or already retried then can't retry */ if (!save_parent.dentry || retried) -- cgit v0.10.2 From 0fb1ea0933153180df666f6694855ddfe9408e4a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Apr 2016 14:13:10 -0400 Subject: atomic_open(): delay open_to_namei_flags() until the method call nobody else needs that transformation. Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 38eab15..3951cc4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2828,7 +2828,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, int *opened) { struct inode *dir = nd->path.dentry->d_inode; - unsigned open_flag = open_to_namei_flags(op->open_flag); + unsigned open_flag = op->open_flag; umode_t mode; int error; int acc_mode; @@ -2886,8 +2886,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, file->f_path.dentry = DENTRY_NOT_SET; file->f_path.mnt = nd->path.mnt; - error = dir->i_op->atomic_open(dir, dentry, file, open_flag, mode, - opened); + error = dir->i_op->atomic_open(dir, dentry, file, + open_to_namei_flags(open_flag), + mode, opened); if (error < 0) { if (create_error && error == -ENOENT) error = create_error; -- cgit v0.10.2 From b3d58eaffb98e1b5bbf2d6756c59398213caba57 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Apr 2016 15:47:45 -0400 Subject: atomic_open(): be paranoid about may_open() return value It should never return positives; however, with Linux S&M crowd involved, no bogosity is impossible. Results would be unpleasant... Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 3951cc4..8d562a7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2920,6 +2920,8 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, acc_mode = 0; } error = may_open(&file->f_path, acc_mode, open_flag); + if (WARN_ON(error > 0)) + error = -EINVAL; out: dput(dentry); return error; -- cgit v0.10.2 From 1643b43fbd0524e7da7259075032936c8fb68a05 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Apr 2016 19:14:10 -0400 Subject: lookup_open(): lift the "fallback to !O_CREAT" logics from atomic_open() Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 8d562a7..4359d22 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2824,63 +2824,19 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode) static int atomic_open(struct nameidata *nd, struct dentry *dentry, struct path *path, struct file *file, const struct open_flags *op, - bool got_write, bool need_lookup, + int open_flag, umode_t mode, int *opened) { struct inode *dir = nd->path.dentry->d_inode; - unsigned open_flag = op->open_flag; - umode_t mode; int error; int acc_mode; - int create_error = 0; struct dentry *const DENTRY_NOT_SET = (void *) -1UL; bool excl; - BUG_ON(dentry->d_inode); - - mode = op->mode; - if ((open_flag & O_CREAT) && !IS_POSIXACL(dir)) - mode &= ~current_umask(); - excl = (open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT); if (excl) open_flag &= ~O_TRUNC; - /* - * Checking write permission is tricky, bacuse we don't know if we are - * going to actually need it: O_CREAT opens should work as long as the - * file exists. But checking existence breaks atomicity. The trick is - * to check access and if not granted clear O_CREAT from the flags. - * - * Another problem is returing the "right" error value (e.g. for an - * O_EXCL open we want to return EEXIST not EROFS). - */ - if (open_flag & O_CREAT) { - if (unlikely(!got_write)) { - create_error = -EROFS; - if (open_flag & (O_EXCL | O_TRUNC)) { - /* Fall back and fail with the right error */ - goto no_open; - } - /* No side effects, safe to clear O_CREAT */ - open_flag &= ~O_CREAT; - } else { - create_error = may_o_create(&nd->path, dentry, mode); - if (create_error) { - if (open_flag & O_EXCL) - goto no_open; - open_flag &= ~O_CREAT; - } - } - } else if ((open_flag & (O_TRUNC|O_WRONLY|O_RDWR)) && - unlikely(!got_write)) { - /* - * No O_CREATE -> atomicity not a requirement -> fall - * back to lookup + open - */ - goto no_open; - } - if (nd->flags & LOOKUP_DIRECTORY) open_flag |= O_DIRECTORY; @@ -2889,11 +2845,8 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, error = dir->i_op->atomic_open(dir, dentry, file, open_to_namei_flags(open_flag), mode, opened); - if (error < 0) { - if (create_error && error == -ENOENT) - error = create_error; + if (error < 0) goto out; - } if (error) { /* returned 1, that is */ if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { @@ -2906,7 +2859,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, } if (*opened & FILE_CREATED) fsnotify_create(dir, dentry); - goto looked_up; + path->dentry = dentry; + path->mnt = nd->path.mnt; + return 1; } /* @@ -2925,21 +2880,6 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, out: dput(dentry); return error; - -no_open: - if (need_lookup) { - dentry = lookup_real(dir, dentry, nd->flags); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); - } -looked_up: - if (create_error && !dentry->d_inode) { - error = create_error; - goto out; - } - path->dentry = dentry; - path->mnt = nd->path.mnt; - return 1; } /* @@ -2967,9 +2907,11 @@ static int lookup_open(struct nameidata *nd, struct path *path, { struct dentry *dir = nd->path.dentry; struct inode *dir_inode = dir->d_inode; + int open_flag = op->open_flag; struct dentry *dentry; - int error; + int error, create_error = 0; bool need_lookup = false; + umode_t mode = op->mode; if (unlikely(IS_DEADDIR(dir_inode))) return -ENOENT; @@ -2989,50 +2931,74 @@ static int lookup_open(struct nameidata *nd, struct path *path, goto out_no_open; } + /* + * Checking write permission is tricky, bacuse we don't know if we are + * going to actually need it: O_CREAT opens should work as long as the + * file exists. But checking existence breaks atomicity. The trick is + * to check access and if not granted clear O_CREAT from the flags. + * + * Another problem is returing the "right" error value (e.g. for an + * O_EXCL open we want to return EEXIST not EROFS). + */ + if (open_flag & O_CREAT) { + if (!IS_POSIXACL(dir->d_inode)) + mode &= ~current_umask(); + if (unlikely(!got_write)) { + create_error = -EROFS; + open_flag &= ~O_CREAT; + if (open_flag & (O_EXCL | O_TRUNC)) + goto no_open; + /* No side effects, safe to clear O_CREAT */ + } else { + create_error = may_o_create(&nd->path, dentry, mode); + if (create_error) { + open_flag &= ~O_CREAT; + if (open_flag & O_EXCL) + goto no_open; + } + } + } else if ((open_flag & (O_TRUNC|O_WRONLY|O_RDWR)) && + unlikely(!got_write)) { + /* + * No O_CREATE -> atomicity not a requirement -> fall + * back to lookup + open + */ + goto no_open; + } + if (dir_inode->i_op->atomic_open) { - return atomic_open(nd, dentry, path, file, op, got_write, - need_lookup, opened); + error = atomic_open(nd, dentry, path, file, op, open_flag, + mode, opened); + if (unlikely(error == -ENOENT) && create_error) + error = create_error; + return error; } +no_open: if (need_lookup) { - BUG_ON(dentry->d_inode); - dentry = lookup_real(dir_inode, dentry, nd->flags); if (IS_ERR(dentry)) return PTR_ERR(dentry); } /* Negative dentry, just create the file */ - if (!dentry->d_inode && (op->open_flag & O_CREAT)) { - umode_t mode = op->mode; - if (!IS_POSIXACL(dir->d_inode)) - mode &= ~current_umask(); - /* - * This write is needed to ensure that a - * rw->ro transition does not occur between - * the time when the file is created and when - * a permanent write count is taken through - * the 'struct file' in finish_open(). - */ - if (!got_write) { - error = -EROFS; - goto out_dput; - } + if (!dentry->d_inode && (open_flag & O_CREAT)) { *opened |= FILE_CREATED; audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE); - error = may_o_create(&nd->path, dentry, mode); - if (error) - goto out_dput; if (!dir_inode->i_op->create) { error = -EACCES; goto out_dput; } error = dir_inode->i_op->create(dir_inode, dentry, mode, - op->open_flag & O_EXCL); + open_flag & O_EXCL); if (error) goto out_dput; fsnotify_create(dir_inode, dentry); } + if (unlikely(create_error) && !dentry->d_inode) { + error = create_error; + goto out_dput; + } out_no_open: path->dentry = dentry; path->mnt = nd->path.mnt; -- cgit v0.10.2 From 384f26e28fe4ddc75e7cf8e0e21595964d03d8eb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Apr 2016 02:03:55 -0400 Subject: atomic_open(): reorder and clean up a bit Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 4359d22..8d62a89 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2827,14 +2827,11 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, int open_flag, umode_t mode, int *opened) { + struct dentry *const DENTRY_NOT_SET = (void *) -1UL; struct inode *dir = nd->path.dentry->d_inode; int error; - int acc_mode; - struct dentry *const DENTRY_NOT_SET = (void *) -1UL; - bool excl; - excl = (open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT); - if (excl) + if (!(~open_flag & (O_EXCL | O_CREAT))) /* both O_EXCL and O_CREAT */ open_flag &= ~O_TRUNC; if (nd->flags & LOOKUP_DIRECTORY) @@ -2845,39 +2842,35 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, error = dir->i_op->atomic_open(dir, dentry, file, open_to_namei_flags(open_flag), mode, opened); - if (error < 0) - goto out; - - if (error) { /* returned 1, that is */ + if (!error) { + /* + * We didn't have the inode before the open, so check open + * permission here. + */ + int acc_mode = op->acc_mode; + if (*opened & FILE_CREATED) { + WARN_ON(!(open_flag & O_CREAT)); + fsnotify_create(dir, dentry); + acc_mode = 0; + } + error = may_open(&file->f_path, acc_mode, open_flag); + if (WARN_ON(error > 0)) + error = -EINVAL; + } else if (error > 0) { if (WARN_ON(file->f_path.dentry == DENTRY_NOT_SET)) { error = -EIO; - goto out; - } - if (file->f_path.dentry) { - dput(dentry); - dentry = file->f_path.dentry; + } else { + if (file->f_path.dentry) { + dput(dentry); + dentry = file->f_path.dentry; + } + if (*opened & FILE_CREATED) + fsnotify_create(dir, dentry); + path->dentry = dentry; + path->mnt = nd->path.mnt; + return 1; } - if (*opened & FILE_CREATED) - fsnotify_create(dir, dentry); - path->dentry = dentry; - path->mnt = nd->path.mnt; - return 1; } - - /* - * We didn't have the inode before the open, so check open permission - * here. - */ - acc_mode = op->acc_mode; - if (*opened & FILE_CREATED) { - WARN_ON(!(open_flag & O_CREAT)); - fsnotify_create(dir, dentry); - acc_mode = 0; - } - error = may_open(&file->f_path, acc_mode, open_flag); - if (WARN_ON(error > 0)) - error = -EINVAL; -out: dput(dentry); return error; } -- cgit v0.10.2 From 12fa5e240419237d848abe3d40809eabe9e8f4ca Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Apr 2016 11:19:43 -0400 Subject: lookup_open(): expand the call of real_lookup() ... and lose the duplicate IS_DEADDIR() - we'd already checked that. Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 8d62a89..b9dde84 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2969,9 +2969,16 @@ static int lookup_open(struct nameidata *nd, struct path *path, no_open: if (need_lookup) { - dentry = lookup_real(dir_inode, dentry, nd->flags); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); + struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry, + nd->flags); + if (unlikely(res)) { + if (IS_ERR(res)) { + error = PTR_ERR(res); + goto out_dput; + } + dput(dentry); + dentry = res; + } } /* Negative dentry, just create the file */ -- cgit v0.10.2 From 6fbd07146d99239547cd4970622c96e9cb0f3213 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Apr 2016 11:50:59 -0400 Subject: lookup_open(): put the dentry fed to ->lookup() or ->atomic_open() into in-lookup hash Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index b9dde84..b84e6b2 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2842,6 +2842,7 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry, error = dir->i_op->atomic_open(dir, dentry, file, open_to_namei_flags(open_flag), mode, opened); + d_lookup_done(dentry); if (!error) { /* * We didn't have the inode before the open, so check open @@ -2903,23 +2904,36 @@ static int lookup_open(struct nameidata *nd, struct path *path, int open_flag = op->open_flag; struct dentry *dentry; int error, create_error = 0; - bool need_lookup = false; umode_t mode = op->mode; + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); if (unlikely(IS_DEADDIR(dir_inode))) return -ENOENT; *opened &= ~FILE_CREATED; - dentry = lookup_dcache(&nd->last, dir, nd->flags); - if (IS_ERR(dentry)) - return PTR_ERR(dentry); + dentry = d_lookup(dir, &nd->last); + for (;;) { + if (!dentry) { + dentry = d_alloc_parallel(dir, &nd->last, &wq); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + } + if (d_in_lookup(dentry)) + break; - if (!dentry) { - dentry = d_alloc(dir, &nd->last); - if (unlikely(!dentry)) - return -ENOMEM; - need_lookup = true; - } else if (dentry->d_inode) { + if (!(dentry->d_flags & DCACHE_OP_REVALIDATE)) + break; + + error = d_revalidate(dentry, nd->flags); + if (likely(error > 0)) + break; + if (error) + goto out_dput; + d_invalidate(dentry); + dput(dentry); + dentry = NULL; + } + if (dentry->d_inode) { /* Cached positive dentry: will open in f_op->open */ goto out_no_open; } @@ -2968,9 +2982,10 @@ static int lookup_open(struct nameidata *nd, struct path *path, } no_open: - if (need_lookup) { + if (d_in_lookup(dentry)) { struct dentry *res = dir_inode->i_op->lookup(dir_inode, dentry, nd->flags); + d_lookup_done(dentry); if (unlikely(res)) { if (IS_ERR(res)) { error = PTR_ERR(res); -- cgit v0.10.2 From 9cf843e3f47c41440367062e92ab32e59ecb6a87 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Apr 2016 19:35:16 -0400 Subject: lookup_open(): lock the parent shared unless O_CREAT is given Signed-off-by: Al Viro diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 12c57ab..46f3bb7 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -575,3 +575,6 @@ in your dentry operations instead. Old method is only used if the new one is absent; eventually it will be removed. Switch while you still can; the old one won't stay. +-- +[mandatory] + ->atomic_open() calls without O_CREAT may happen in parallel. diff --git a/fs/namei.c b/fs/namei.c index b84e6b2..01069dd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3084,7 +3084,7 @@ static int do_last(struct nameidata *nd, } retry_lookup: - if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { + if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { error = mnt_want_write(nd->path.mnt); if (!error) got_write = true; @@ -3094,9 +3094,15 @@ retry_lookup: * dropping this one anyway. */ } - inode_lock(dir->d_inode); + if (open_flag & O_CREAT) + inode_lock(dir->d_inode); + else + inode_lock_shared(dir->d_inode); error = lookup_open(nd, &path, file, op, got_write, opened); - inode_unlock(dir->d_inode); + if (open_flag & O_CREAT) + inode_unlock(dir->d_inode); + else + inode_unlock_shared(dir->d_inode); if (error <= 0) { if (error) -- cgit v0.10.2 From 9ac3d3e8460e3fa6f3a9a39c2049904005016db6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Apr 2016 19:52:56 -0400 Subject: nfs: switch to ->iterate_shared() aside of the usual care about seeding dcache from readdir, we need to be careful about the pagecache evictions here. Signed-off-by: Al Viro diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 33eb817..cd3c754 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -57,7 +57,7 @@ static void nfs_readdir_clear_array(struct page*); const struct file_operations nfs_dir_operations = { .llseek = nfs_llseek_dir, .read = generic_read_dir, - .iterate = nfs_readdir, + .iterate_shared = nfs_readdir, .open = nfs_opendir, .release = nfs_closedir, .fsync = nfs_fsync_dir, @@ -145,6 +145,7 @@ struct nfs_cache_array_entry { }; struct nfs_cache_array { + atomic_t refcount; int size; int eof_index; u64 last_cookie; @@ -200,11 +201,20 @@ void nfs_readdir_clear_array(struct page *page) int i; array = kmap_atomic(page); - for (i = 0; i < array->size; i++) - kfree(array->array[i].string.name); + if (atomic_dec_and_test(&array->refcount)) + for (i = 0; i < array->size; i++) + kfree(array->array[i].string.name); kunmap_atomic(array); } +static bool grab_page(struct page *page) +{ + struct nfs_cache_array *array = kmap_atomic(page); + bool res = atomic_inc_not_zero(&array->refcount); + kunmap_atomic(array); + return res; +} + /* * the caller is responsible for freeing qstr.name * when called by nfs_readdir_add_to_array, the strings will be freed in @@ -470,6 +480,7 @@ static void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) { struct qstr filename = QSTR_INIT(entry->name, entry->len); + DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); struct dentry *dentry; struct dentry *alias; struct inode *dir = d_inode(parent); @@ -489,7 +500,13 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) filename.hash = full_name_hash(filename.name, filename.len); dentry = d_lookup(parent, &filename); - if (dentry != NULL) { +again: + if (!dentry) { + dentry = d_alloc_parallel(parent, &filename, &wq); + if (IS_ERR(dentry)) + return; + } + if (!d_in_lookup(dentry)) { /* Is there a mountpoint here? If so, just exit */ if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid, &entry->fattr->fsid)) @@ -503,26 +520,21 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) } else { d_invalidate(dentry); dput(dentry); + dentry = NULL; + goto again; } } - dentry = d_alloc(parent, &filename); - if (dentry == NULL) - return; - inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr, entry->label); - if (IS_ERR(inode)) - goto out; - alias = d_splice_alias(inode, dentry); - if (IS_ERR(alias)) - goto out; - else if (alias) { - nfs_set_verifier(alias, nfs_save_change_attribute(dir)); - dput(alias); - } else - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - + d_lookup_done(dentry); + if (alias) { + if (IS_ERR(alias)) + goto out; + dput(dentry); + dentry = alias; + } + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out: dput(dentry); } @@ -643,6 +655,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, goto out_label_free; } memset(array, 0, sizeof(struct nfs_cache_array)); + atomic_set(&array->refcount, 1); array->eof_index = -1; status = nfs_readdir_alloc_pages(pages, array_size); @@ -705,8 +718,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) static void cache_page_release(nfs_readdir_descriptor_t *desc) { - if (!desc->page->mapping) - nfs_readdir_clear_array(desc->page); + nfs_readdir_clear_array(desc->page); put_page(desc->page); desc->page = NULL; } @@ -714,8 +726,16 @@ void cache_page_release(nfs_readdir_descriptor_t *desc) static struct page *get_cache_page(nfs_readdir_descriptor_t *desc) { - return read_cache_page(file_inode(desc->file)->i_mapping, + struct page *page; + + for (;;) { + page = read_cache_page(file_inode(desc->file)->i_mapping, desc->page_index, (filler_t *)nfs_readdir_filler, desc); + if (IS_ERR(page) || grab_page(page)) + break; + put_page(page); + } + return page; } /* @@ -934,13 +954,11 @@ out: static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) { - struct inode *inode = file_inode(filp); struct nfs_open_dir_context *dir_ctx = filp->private_data; dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n", filp, offset, whence); - inode_lock(inode); switch (whence) { case 1: offset += filp->f_pos; @@ -948,16 +966,13 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) if (offset >= 0) break; default: - offset = -EINVAL; - goto out; + return -EINVAL; } if (offset != filp->f_pos) { filp->f_pos = offset; dir_ctx->dir_cookie = 0; dir_ctx->duped = 0; } -out: - inode_unlock(inode); return offset; } -- cgit v0.10.2 From 884be175351e73c515303118150f195dd611787c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Apr 2016 23:56:31 -0400 Subject: nfs: per-name sillyunlink exclusion use d_alloc_parallel() for sillyunlink/lookup exclusion and explicit rwsem (nfs_rmdir() being a writer and nfs_call_unlink() - a reader) for rmdir/sillyunlink one. That ought to make lookup/readdir/!O_CREAT atomic_open really parallel on NFS. Signed-off-by: Al Viro diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index cd3c754..aaf7bd0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -909,7 +909,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; - nfs_block_sillyrename(dentry); if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode)) res = nfs_revalidate_mapping(inode, file->f_mapping); if (res < 0) @@ -945,7 +944,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; } while (!desc->eof); out: - nfs_unblock_sillyrename(dentry); if (res > 0) res = 0; dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); @@ -1398,7 +1396,6 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ trace_nfs_lookup_enter(dir, dentry, flags); - nfs_block_sillyrename(parent); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label); if (error == -ENOENT) goto no_entry; @@ -1423,7 +1420,6 @@ no_entry: } nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unblock_sillyrename: - nfs_unblock_sillyrename(parent); trace_nfs_lookup_exit(dir, dentry, flags, error); nfs4_label_free(label); out: @@ -1535,9 +1531,7 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry, goto out; trace_nfs_atomic_open_enter(dir, ctx, open_flags); - nfs_block_sillyrename(dentry->d_parent); inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, opened); - nfs_unblock_sillyrename(dentry->d_parent); if (IS_ERR(inode)) { err = PTR_ERR(inode); trace_nfs_atomic_open_exit(dir, ctx, open_flags, err); @@ -1781,7 +1775,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) trace_nfs_rmdir_enter(dir, dentry); if (d_really_is_positive(dentry)) { - nfs_wait_on_sillyrename(dentry); + down_write(&NFS_I(d_inode(dentry))->rmdir_sem); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ switch (error) { @@ -1791,6 +1785,7 @@ int nfs_rmdir(struct inode *dir, struct dentry *dentry) case -ENOENT: nfs_dentry_handle_enoent(dentry); } + up_write(&NFS_I(d_inode(dentry))->rmdir_sem); } else error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); trace_nfs_rmdir_exit(dir, dentry, error); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 738c84a..52e7d68 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1958,9 +1958,7 @@ static void init_once(void *foo) nfsi->nrequests = 0; nfsi->commit_info.ncommit = 0; atomic_set(&nfsi->commit_info.rpcs_out, 0); - atomic_set(&nfsi->silly_count, 1); - INIT_HLIST_HEAD(&nfsi->silly_list); - init_waitqueue_head(&nfsi->waitqueue); + init_rwsem(&nfsi->rmdir_sem); nfs4_init_once(nfsi); } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7a7ac1d..084e857 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3777,7 +3777,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) static void nfs4_proc_unlink_rpc_prepare(struct rpc_task *task, struct nfs_unlinkdata *data) { - nfs4_setup_sequence(NFS_SERVER(data->dir), + nfs4_setup_sequence(NFS_SB(data->dentry->d_sb), &data->args.seq_args, &data->res.seq_res, task); diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 9f80a08..0b9e5cc 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -702,7 +702,7 @@ TRACE_EVENT(nfs_sillyrename_unlink, ), TP_fast_assign( - struct inode *dir = data->dir; + struct inode *dir = d_inode(data->dentry->d_parent); size_t len = data->args.name.len; __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 7d6deac..1868246 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -30,45 +30,11 @@ static void nfs_free_unlinkdata(struct nfs_unlinkdata *data) { - iput(data->dir); put_rpccred(data->cred); kfree(data->args.name.name); kfree(data); } -#define NAME_ALLOC_LEN(len) ((len+16) & ~15) -/** - * nfs_copy_dname - copy dentry name to data structure - * @dentry: pointer to dentry - * @data: nfs_unlinkdata - */ -static int nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data) -{ - char *str; - int len = dentry->d_name.len; - - str = kmemdup(dentry->d_name.name, NAME_ALLOC_LEN(len), GFP_KERNEL); - if (!str) - return -ENOMEM; - data->args.name.len = len; - data->args.name.name = str; - return 0; -} - -static void nfs_free_dname(struct nfs_unlinkdata *data) -{ - kfree(data->args.name.name); - data->args.name.name = NULL; - data->args.name.len = 0; -} - -static void nfs_dec_sillycount(struct inode *dir) -{ - struct nfs_inode *nfsi = NFS_I(dir); - if (atomic_dec_return(&nfsi->silly_count) == 1) - wake_up(&nfsi->waitqueue); -} - /** * nfs_async_unlink_done - Sillydelete post-processing * @task: rpc_task of the sillydelete @@ -78,7 +44,7 @@ static void nfs_dec_sillycount(struct inode *dir) static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) { struct nfs_unlinkdata *data = calldata; - struct inode *dir = data->dir; + struct inode *dir = d_inode(data->dentry->d_parent); trace_nfs_sillyrename_unlink(data, task->tk_status); if (!NFS_PROTO(dir)->unlink_done(task, dir)) @@ -95,17 +61,21 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) static void nfs_async_unlink_release(void *calldata) { struct nfs_unlinkdata *data = calldata; - struct super_block *sb = data->dir->i_sb; + struct dentry *dentry = data->dentry; + struct super_block *sb = dentry->d_sb; - nfs_dec_sillycount(data->dir); + up_read_non_owner(&NFS_I(d_inode(dentry->d_parent))->rmdir_sem); + d_lookup_done(dentry); nfs_free_unlinkdata(data); + dput(dentry); nfs_sb_deactive(sb); } static void nfs_unlink_prepare(struct rpc_task *task, void *calldata) { struct nfs_unlinkdata *data = calldata; - NFS_PROTO(data->dir)->unlink_rpc_prepare(task, data); + struct inode *dir = d_inode(data->dentry->d_parent); + NFS_PROTO(dir)->unlink_rpc_prepare(task, data); } static const struct rpc_call_ops nfs_unlink_ops = { @@ -114,7 +84,7 @@ static const struct rpc_call_ops nfs_unlink_ops = { .rpc_call_prepare = nfs_unlink_prepare, }; -static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data) +static void nfs_do_call_unlink(struct nfs_unlinkdata *data) { struct rpc_message msg = { .rpc_argp = &data->args, @@ -129,10 +99,31 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n .flags = RPC_TASK_ASYNC, }; struct rpc_task *task; + struct inode *dir = d_inode(data->dentry->d_parent); + nfs_sb_active(dir->i_sb); + data->args.fh = NFS_FH(dir); + nfs_fattr_init(data->res.dir_attr); + + NFS_PROTO(dir)->unlink_setup(&msg, dir); + + task_setup_data.rpc_client = NFS_CLIENT(dir); + task = rpc_run_task(&task_setup_data); + if (!IS_ERR(task)) + rpc_put_task_async(task); +} + +static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) +{ + struct inode *dir = d_inode(dentry->d_parent); struct dentry *alias; - alias = d_lookup(parent, &data->args.name); - if (alias != NULL) { + down_read_non_owner(&NFS_I(dir)->rmdir_sem); + alias = d_alloc_parallel(dentry->d_parent, &data->args.name, &data->wq); + if (IS_ERR(alias)) { + up_read_non_owner(&NFS_I(dir)->rmdir_sem); + return 0; + } + if (!d_in_lookup(alias)) { int ret; void *devname_garbage = NULL; @@ -140,10 +131,8 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n * Hey, we raced with lookup... See if we need to transfer * the sillyrename information to the aliased dentry. */ - nfs_free_dname(data); - ret = nfs_copy_dname(alias, data); spin_lock(&alias->d_lock); - if (ret == 0 && d_really_is_positive(alias) && + if (d_really_is_positive(alias) && !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { devname_garbage = alias->d_fsdata; alias->d_fsdata = data; @@ -152,8 +141,8 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n } else ret = 0; spin_unlock(&alias->d_lock); - nfs_dec_sillycount(dir); dput(alias); + up_read_non_owner(&NFS_I(dir)->rmdir_sem); /* * If we'd displaced old cached devname, free it. At that * point dentry is definitely not a root, so we won't need @@ -162,95 +151,18 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n kfree(devname_garbage); return ret; } - data->dir = igrab(dir); - if (!data->dir) { - nfs_dec_sillycount(dir); - return 0; - } - nfs_sb_active(dir->i_sb); - data->args.fh = NFS_FH(dir); - nfs_fattr_init(data->res.dir_attr); - - NFS_PROTO(dir)->unlink_setup(&msg, dir); - - task_setup_data.rpc_client = NFS_CLIENT(dir); - task = rpc_run_task(&task_setup_data); - if (!IS_ERR(task)) - rpc_put_task_async(task); + data->dentry = alias; + nfs_do_call_unlink(data); return 1; } -static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) -{ - struct dentry *parent; - struct inode *dir; - int ret = 0; - - - parent = dget_parent(dentry); - if (parent == NULL) - goto out_free; - dir = d_inode(parent); - /* Non-exclusive lock protects against concurrent lookup() calls */ - spin_lock(&dir->i_lock); - if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { - /* Deferred delete */ - hlist_add_head(&data->list, &NFS_I(dir)->silly_list); - spin_unlock(&dir->i_lock); - ret = 1; - goto out_dput; - } - spin_unlock(&dir->i_lock); - ret = nfs_do_call_unlink(parent, dir, data); -out_dput: - dput(parent); -out_free: - return ret; -} - -void nfs_wait_on_sillyrename(struct dentry *dentry) -{ - struct nfs_inode *nfsi = NFS_I(d_inode(dentry)); - - wait_event(nfsi->waitqueue, atomic_read(&nfsi->silly_count) <= 1); -} - -void nfs_block_sillyrename(struct dentry *dentry) -{ - struct nfs_inode *nfsi = NFS_I(d_inode(dentry)); - - wait_event(nfsi->waitqueue, atomic_cmpxchg(&nfsi->silly_count, 1, 0) == 1); -} - -void nfs_unblock_sillyrename(struct dentry *dentry) -{ - struct inode *dir = d_inode(dentry); - struct nfs_inode *nfsi = NFS_I(dir); - struct nfs_unlinkdata *data; - - atomic_inc(&nfsi->silly_count); - wake_up(&nfsi->waitqueue); - spin_lock(&dir->i_lock); - while (!hlist_empty(&nfsi->silly_list)) { - if (!atomic_inc_not_zero(&nfsi->silly_count)) - break; - data = hlist_entry(nfsi->silly_list.first, struct nfs_unlinkdata, list); - hlist_del(&data->list); - spin_unlock(&dir->i_lock); - if (nfs_do_call_unlink(dentry, dir, data) == 0) - nfs_free_unlinkdata(data); - spin_lock(&dir->i_lock); - } - spin_unlock(&dir->i_lock); -} - /** * nfs_async_unlink - asynchronous unlinking of a file * @dir: parent directory of dentry * @dentry: dentry to unlink */ static int -nfs_async_unlink(struct inode *dir, struct dentry *dentry) +nfs_async_unlink(struct dentry *dentry, struct qstr *name) { struct nfs_unlinkdata *data; int status = -ENOMEM; @@ -259,13 +171,18 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) data = kzalloc(sizeof(*data), GFP_KERNEL); if (data == NULL) goto out; + data->args.name.name = kstrdup(name->name, GFP_KERNEL); + if (!data->args.name.name) + goto out_free; + data->args.name.len = name->len; data->cred = rpc_lookup_cred(); if (IS_ERR(data->cred)) { status = PTR_ERR(data->cred); - goto out_free; + goto out_free_name; } data->res.dir_attr = &data->dir_attr; + init_waitqueue_head(&data->wq); status = -EBUSY; spin_lock(&dentry->d_lock); @@ -285,6 +202,8 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) out_unlock: spin_unlock(&dentry->d_lock); put_rpccred(data->cred); +out_free_name: + kfree(data->args.name.name); out_free: kfree(data); out: @@ -303,17 +222,15 @@ out: void nfs_complete_unlink(struct dentry *dentry, struct inode *inode) { - struct nfs_unlinkdata *data = NULL; + struct nfs_unlinkdata *data; spin_lock(&dentry->d_lock); - if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { - dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; - data = dentry->d_fsdata; - dentry->d_fsdata = NULL; - } + dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; + data = dentry->d_fsdata; + dentry->d_fsdata = NULL; spin_unlock(&dentry->d_lock); - if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))) + if (NFS_STALE(inode) || !nfs_call_unlink(dentry, data)) nfs_free_unlinkdata(data); } @@ -560,18 +477,10 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) /* queue unlink first. Can't do this from rpc_release as it * has to allocate memory */ - error = nfs_async_unlink(dir, dentry); + error = nfs_async_unlink(dentry, &sdentry->d_name); if (error) goto out_dput; - /* populate unlinkdata with the right dname */ - error = nfs_copy_dname(sdentry, - (struct nfs_unlinkdata *)dentry->d_fsdata); - if (error) { - nfs_cancel_async_unlink(dentry); - goto out_dput; - } - /* run the rename task, undo unlink if it fails */ task = nfs_async_rename(dir, dir, dentry, sdentry, nfs_complete_sillyrename); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 67300f8..fa167f2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -163,11 +163,9 @@ struct nfs_inode { /* Open contexts for shared mmap writes */ struct list_head open_files; - /* Number of in-flight sillydelete RPC calls */ - atomic_t silly_count; - /* List of deferred sillydelete requests */ - struct hlist_head silly_list; - wait_queue_head_t waitqueue; + /* Readers: in-flight sillydelete RPC calls */ + /* Writers: rmdir */ + struct rw_semaphore rmdir_sem; #if IS_ENABLED(CONFIG_NFS_V4) struct nfs4_cached_acl *nfs4_acl; @@ -492,9 +490,6 @@ extern void nfs_release_automount_timer(void); * linux/fs/nfs/unlink.c */ extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); -extern void nfs_wait_on_sillyrename(struct dentry *dentry); -extern void nfs_block_sillyrename(struct dentry *dentry); -extern void nfs_unblock_sillyrename(struct dentry *dentry); /* * linux/fs/nfs/write.c diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index d320906..ee8491d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1468,10 +1468,10 @@ struct nfs_pgio_completion_ops { }; struct nfs_unlinkdata { - struct hlist_node list; struct nfs_removeargs args; struct nfs_removeres res; - struct inode *dir; + struct dentry *dentry; + wait_queue_head_t wq; struct rpc_cred *cred; struct nfs_fattr dir_attr; long timeout; -- cgit v0.10.2 From a01b3007ffb9be0180e744f0d15b130b0a76a29f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 19:50:05 -0400 Subject: configfs_readdir(): make safe under shared lock Signed-off-by: Al Viro diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 48929c4..56fb261 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1633,11 +1633,9 @@ static int configfs_readdir(struct file *file, struct dir_context *ctx) if (!dir_emit_dots(file, ctx)) return 0; - if (ctx->pos == 2) { - spin_lock(&configfs_dirent_lock); + spin_lock(&configfs_dirent_lock); + if (ctx->pos == 2) list_move(q, &parent_sd->s_children); - spin_unlock(&configfs_dirent_lock); - } for (p = q->next; p != &parent_sd->s_children; p = p->next) { struct configfs_dirent *next; const char *name; @@ -1648,9 +1646,6 @@ static int configfs_readdir(struct file *file, struct dir_context *ctx) if (!next->s_element) continue; - name = configfs_get_name(next); - len = strlen(name); - /* * We'll have a dentry and an inode for * PINNED items and for open attribute @@ -1664,7 +1659,6 @@ static int configfs_readdir(struct file *file, struct dir_context *ctx) * they close it. Beyond that, we don't * care. */ - spin_lock(&configfs_dirent_lock); dentry = next->s_dentry; if (dentry) inode = d_inode(dentry); @@ -1674,15 +1668,18 @@ static int configfs_readdir(struct file *file, struct dir_context *ctx) if (!inode) ino = iunique(sb, 2); + name = configfs_get_name(next); + len = strlen(name); + if (!dir_emit(ctx, name, len, ino, dt_type(next))) return 0; spin_lock(&configfs_dirent_lock); list_move(q, p); - spin_unlock(&configfs_dirent_lock); p = q; ctx->pos++; } + spin_unlock(&configfs_dirent_lock); return 0; } @@ -1690,7 +1687,6 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence) { struct dentry * dentry = file->f_path.dentry; - inode_lock(d_inode(dentry)); switch (whence) { case 1: offset += file->f_pos; @@ -1698,7 +1694,6 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence) if (offset >= 0) break; default: - inode_unlock(d_inode(dentry)); return -EINVAL; } if (offset != file->f_pos) { @@ -1724,7 +1719,6 @@ static loff_t configfs_dir_lseek(struct file *file, loff_t offset, int whence) spin_unlock(&configfs_dirent_lock); } } - inode_unlock(d_inode(dentry)); return offset; } @@ -1733,7 +1727,7 @@ const struct file_operations configfs_dir_operations = { .release = configfs_dir_close, .llseek = configfs_dir_lseek, .read = generic_read_dir, - .iterate = configfs_readdir, + .iterate_shared = configfs_readdir, }; /** -- cgit v0.10.2 From 8cb0d2c1c7dea61e9b976cadedeae228f1180cbd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 19:59:01 -0400 Subject: kernfs: no point locking directory around that generic_file_llseek() Signed-off-by: Al Viro diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index 03b688d..4f61d19 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -1643,22 +1643,9 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx) return 0; } -static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset, - int whence) -{ - struct inode *inode = file_inode(file); - loff_t ret; - - inode_lock(inode); - ret = generic_file_llseek(file, offset, whence); - inode_unlock(inode); - - return ret; -} - const struct file_operations kernfs_dir_fops = { .read = generic_read_dir, - .iterate = kernfs_fop_readdir, + .iterate_shared = kernfs_fop_readdir, .release = kernfs_dir_fop_release, - .llseek = kernfs_dir_fop_llseek, + .llseek = generic_file_llseek, }; -- cgit v0.10.2 From 060ff688cadac398a17f09bd87b16fd0bf943899 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 20 Apr 2016 20:49:14 -0400 Subject: lustre: don't need to lock inode in directory lseek Note that lustre has its private mutex protecting directory pagecache; if they ever remove it, they'll need to be careful with PageChecked() use. Signed-off-by: Al Viro diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c index e4c8288..7a0a67f 100644 --- a/drivers/staging/lustre/lustre/llite/dir.c +++ b/drivers/staging/lustre/lustre/llite/dir.c @@ -1865,7 +1865,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) int api32 = ll_need_32bit_api(sbi); loff_t ret = -EINVAL; - inode_lock(inode); switch (origin) { case SEEK_SET: break; @@ -1903,7 +1902,6 @@ static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin) goto out; out: - inode_unlock(inode); return ret; } @@ -1922,7 +1920,7 @@ const struct file_operations ll_dir_operations = { .open = ll_dir_open, .release = ll_dir_release, .read = generic_read_dir, - .iterate = ll_readdir, + .iterate_shared = ll_readdir, .unlocked_ioctl = ll_dir_ioctl, .fsync = ll_fsync, }; -- cgit v0.10.2 From c51da20c48b76ef1114d14b6b6ff190e11afab0e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Apr 2016 22:37:34 -0400 Subject: more trivial ->iterate_shared conversions Signed-off-by: Al Viro diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 3ec6113..34a5bc2 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -70,7 +70,7 @@ static int bfs_readdir(struct file *f, struct dir_context *ctx) const struct file_operations bfs_dir_operations = { .read = generic_read_dir, - .iterate = bfs_readdir, + .iterate_shared = bfs_readdir, .fsync = generic_file_fsync, .llseek = generic_file_llseek, }; diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 3a32ddf..7919967 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -561,7 +561,7 @@ static const struct address_space_operations cramfs_aops = { static const struct file_operations cramfs_directory_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = cramfs_readdir, + .iterate_shared = cramfs_readdir, }; static const struct inode_operations cramfs_dir_inode_operations = { diff --git a/fs/efs/dir.c b/fs/efs/dir.c index ce63b24..a7be96e 100644 --- a/fs/efs/dir.c +++ b/fs/efs/dir.c @@ -12,7 +12,7 @@ static int efs_readdir(struct file *, struct dir_context *); const struct file_operations efs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = efs_readdir, + .iterate_shared = efs_readdir, }; const struct inode_operations efs_dir_inode_operations = { @@ -100,4 +100,3 @@ static int efs_readdir(struct file *file, struct dir_context *ctx) ctx->pos = (block << EFS_DIRBSIZE_BITS) | slot; return 0; } - diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index a7661aa..f69a1b5 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c @@ -657,5 +657,5 @@ not_empty: const struct file_operations exofs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = exofs_readdir, + .iterate_shared = exofs_readdir, }; diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index a49e0cf..6d576b9 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c @@ -58,7 +58,7 @@ const struct inode_operations vxfs_dir_inode_ops = { const struct file_operations vxfs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = vxfs_readdir, + .iterate_shared = vxfs_readdir, }; static inline u_long diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 7a9579e..84c4bf3 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -40,7 +40,7 @@ static int jffs2_rename (struct inode *, struct dentry *, const struct file_operations jffs2_dir_operations = { .read = generic_read_dir, - .iterate = jffs2_readdir, + .iterate_shared=jffs2_readdir, .unlocked_ioctl=jffs2_ioctl, .fsync = jffs2_fsync, .llseek = generic_file_llseek, diff --git a/fs/libfs.c b/fs/libfs.c index dd8a074..8765ff1 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1166,7 +1166,7 @@ static int empty_dir_readdir(struct file *file, struct dir_context *ctx) static const struct file_operations empty_dir_operations = { .llseek = empty_dir_llseek, .read = generic_read_dir, - .iterate = empty_dir_readdir, + .iterate_shared = empty_dir_readdir, .fsync = noop_fsync, }; diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index ddbed2b..c65deca 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -791,7 +791,7 @@ const struct inode_operations logfs_dir_iops = { const struct file_operations logfs_dir_fops = { .fsync = logfs_fsync, .unlocked_ioctl = logfs_ioctl, - .iterate = logfs_readdir, + .iterate_shared = logfs_readdir, .read = generic_read_dir, .llseek = default_llseek, }; diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 2eaed92..6723d45 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -661,7 +661,7 @@ not_empty: const struct file_operations nilfs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = nilfs_readdir, + .iterate_shared = nilfs_readdir, .unlocked_ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = nilfs_compat_ioctl, diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index f833bf8..c8cbf3b 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -452,6 +452,6 @@ const struct inode_operations omfs_dir_inops = { const struct file_operations omfs_dir_operations = { .read = generic_read_dir, - .iterate = omfs_readdir, + .iterate_shared = omfs_readdir, .llseek = generic_file_llseek, }; diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index b61b883..c7a8699 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -166,7 +166,7 @@ static int openpromfs_readdir(struct file *, struct dir_context *); static const struct file_operations openprom_operations = { .read = generic_read_dir, - .iterate = openpromfs_readdir, + .iterate_shared = openpromfs_readdir, .llseek = generic_file_llseek, }; diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index b218f96..781056a 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -71,7 +71,7 @@ const struct file_operations qnx4_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = qnx4_readdir, + .iterate_shared = qnx4_readdir, .fsync = generic_file_fsync, }; diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c index 144ceda..27637e0 100644 --- a/fs/qnx6/dir.c +++ b/fs/qnx6/dir.c @@ -272,7 +272,7 @@ found: const struct file_operations qnx6_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = qnx6_readdir, + .iterate_shared = qnx6_readdir, .fsync = generic_file_fsync, }; diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 3abd400..45aa05e 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -20,7 +20,7 @@ static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end, const struct file_operations reiserfs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = reiserfs_readdir, + .iterate_shared = reiserfs_readdir, .fsync = reiserfs_dir_fsync, .unlocked_ioctl = reiserfs_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 795992a..34a5356 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1195,7 +1195,7 @@ const struct file_operations ubifs_dir_operations = { .llseek = generic_file_llseek, .release = ubifs_dir_release, .read = generic_read_dir, - .iterate = ubifs_readdir, + .iterate_shared = ubifs_readdir, .fsync = ubifs_fsync, .unlocked_ioctl = ubifs_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/udf/dir.c b/fs/udf/dir.c index b51b371..4c5593a 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -202,7 +202,7 @@ out: const struct file_operations udf_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = udf_readdir, + .iterate_shared = udf_readdir, .unlocked_ioctl = udf_ioctl, .fsync = generic_file_fsync, }; -- cgit v0.10.2 From d375570fa83761161bb3d9120e0260dec66f051d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Apr 2016 23:08:45 -0400 Subject: romfs, squashfs: switch to ->iterate_shared() don't need to lock directory in ->llseek(), either Signed-off-by: Al Viro diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 6b00ca3..d0f8a38 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c @@ -280,8 +280,8 @@ error: static const struct file_operations romfs_dir_operations = { .read = generic_read_dir, - .iterate = romfs_readdir, - .llseek = default_llseek, + .iterate_shared = romfs_readdir, + .llseek = generic_file_llseek, }; static const struct inode_operations romfs_dir_inode_operations = { diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index d8c2d74..a5845f9 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c @@ -231,6 +231,6 @@ failed_read: const struct file_operations squashfs_dir_ops = { .read = generic_read_dir, - .iterate = squashfs_readdir, - .llseek = default_llseek, + .iterate_shared = squashfs_readdir, + .llseek = generic_file_llseek, }; -- cgit v0.10.2 From 98d4b8d8f07128c127f767c0a8726db948b28e96 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 30 Apr 2016 23:26:25 -0400 Subject: fat: switch to ->iterate_shared() ... and make that weird ioctl lock directory only shared. Signed-off-by: Al Viro diff --git a/fs/fat/dir.c b/fs/fat/dir.c index d0b95c9..663e428 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -769,7 +769,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *file, buf.dirent = dirent; buf.result = 0; - inode_lock(inode); + inode_lock_shared(inode); buf.ctx.pos = file->f_pos; ret = -ENOENT; if (!IS_DEADDIR(inode)) { @@ -777,7 +777,7 @@ static int fat_ioctl_readdir(struct inode *inode, struct file *file, short_only, both ? &buf : NULL); file->f_pos = buf.ctx.pos; } - inode_unlock(inode); + inode_unlock_shared(inode); if (ret >= 0) ret = buf.result; return ret; @@ -861,7 +861,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, const struct file_operations fat_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = fat_readdir, + .iterate_shared = fat_readdir, .unlocked_ioctl = fat_dir_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = fat_compat_dir_ioctl, -- cgit v0.10.2 From 5963ded8fecedbab6b99274126434c7a22c45c84 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 1 May 2016 00:08:03 -0400 Subject: 9p: switch to ->iterate_shared() Signed-off-by: Al Viro diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 5cc00e5..b0405d6 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -246,7 +246,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) const struct file_operations v9fs_dir_operations = { .read = generic_read_dir, .llseek = generic_file_llseek, - .iterate = v9fs_dir_readdir, + .iterate_shared = v9fs_dir_readdir, .open = v9fs_file_open, .release = v9fs_dir_release, }; @@ -254,7 +254,7 @@ const struct file_operations v9fs_dir_operations = { const struct file_operations v9fs_dir_operations_dotl = { .read = generic_read_dir, .llseek = generic_file_llseek, - .iterate = v9fs_dir_readdir_dotl, + .iterate_shared = v9fs_dir_readdir_dotl, .open = v9fs_file_open, .release = v9fs_dir_release, .fsync = v9fs_file_fsync_dotl, -- cgit v0.10.2 From 51a16a9cd538736784e8471602d867bf6a26d0d5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 4 May 2016 16:21:20 -0400 Subject: switch ecryptfs to ->iterate_shared Signed-off-by: Al Viro diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index f024040..7000b96 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -383,7 +383,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) #endif const struct file_operations ecryptfs_dir_fops = { - .iterate = ecryptfs_readdir, + .iterate_shared = ecryptfs_readdir, .read = generic_read_dir, .unlocked_ioctl = ecryptfs_unlocked_ioctl, #ifdef CONFIG_COMPAT -- cgit v0.10.2 From 5e261246ce337280087f5211fbc91764daa12286 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 4 May 2016 16:24:04 -0400 Subject: logfs: no need to lock directory in lseek Signed-off-by: Al Viro diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index c65deca..2d5336b 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -793,5 +793,5 @@ const struct file_operations logfs_dir_fops = { .unlocked_ioctl = logfs_ioctl, .iterate_shared = logfs_readdir, .read = generic_read_dir, - .llseek = default_llseek, + .llseek = generic_file_llseek, }; -- cgit v0.10.2 From 972b241f8441dc37a3f89dcd7e71d7f013873d13 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 4 May 2016 16:24:53 -0400 Subject: btrfs: switch to ->iterate_shared() Signed-off-by: Al Viro diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2aaba58..3e2ada1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -10181,7 +10181,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { static const struct file_operations btrfs_dir_file_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = btrfs_real_readdir, + .iterate_shared = btrfs_real_readdir, .unlocked_ioctl = btrfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = btrfs_ioctl, -- cgit v0.10.2 From e17a21d3bb067e561fc0112a3957f579af26d9d9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 5 May 2016 10:48:47 -0400 Subject: get_acorn_filename(): deobfuscate a bit Lots of Idiotic Silly Parentheses is -> that way... What that condition checks is that there's exactly 32 bytes between the end of name and the end of entire drectory record. Signed-off-by: Al Viro diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index b943cbd..dfd14e1 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -58,7 +58,7 @@ int get_acorn_filename(struct iso_directory_record *de, std = sizeof(struct iso_directory_record) + de->name_len[0]; if (std & 1) std++; - if ((*((unsigned char *) de) - std) != 32) + if (de->length[0] - std != 32) return retnamlen; chr = ((unsigned char *) de) + std; if (strncmp(chr, "ARCHIMEDES", 10)) -- cgit v0.10.2 From e899108994540f09dae236571d100af6e0749905 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 9 May 2016 12:53:03 -0400 Subject: isofs: switch to ->iterate_shared() Signed-off-by: Al Viro diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index dfd14e1..e759961 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -269,7 +269,7 @@ const struct file_operations isofs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = isofs_readdir, + .iterate_shared = isofs_readdir, }; /* -- cgit v0.10.2 From 22341d8f33084c575ce5b7ea63b07d7290b68e2d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 May 2016 14:24:06 -0400 Subject: befs: constify stuff a bit Signed-off-by: Al Viro diff --git a/fs/befs/befs.h b/fs/befs/befs.h index 35d19e8..e0f59263a 100644 --- a/fs/befs/befs.h +++ b/fs/befs/befs.h @@ -116,7 +116,7 @@ BEFS_I(const struct inode *inode) } static inline befs_blocknr_t -iaddr2blockno(struct super_block *sb, befs_inode_addr * iaddr) +iaddr2blockno(struct super_block *sb, const befs_inode_addr *iaddr) { return ((iaddr->allocation_group << BEFS_SB(sb)->ag_shift) + iaddr->start); @@ -141,7 +141,7 @@ befs_iaddrs_per_block(struct super_block *sb) } static inline int -befs_iaddr_is_empty(befs_inode_addr * iaddr) +befs_iaddr_is_empty(const befs_inode_addr *iaddr) { return (!iaddr->allocation_group) && (!iaddr->start) && (!iaddr->len); } diff --git a/fs/befs/btree.c b/fs/befs/btree.c index 22c16628..307645f9 100644 --- a/fs/befs/btree.c +++ b/fs/befs/btree.c @@ -88,15 +88,15 @@ struct befs_btree_node { static const befs_off_t befs_bt_inval = 0xffffffffffffffffULL; /* local functions */ -static int befs_btree_seekleaf(struct super_block *sb, befs_data_stream * ds, +static int befs_btree_seekleaf(struct super_block *sb, const befs_data_stream *ds, befs_btree_super * bt_super, struct befs_btree_node *this_node, befs_off_t * node_off); -static int befs_bt_read_super(struct super_block *sb, befs_data_stream * ds, +static int befs_bt_read_super(struct super_block *sb, const befs_data_stream *ds, befs_btree_super * sup); -static int befs_bt_read_node(struct super_block *sb, befs_data_stream * ds, +static int befs_bt_read_node(struct super_block *sb, const befs_data_stream *ds, struct befs_btree_node *node, befs_off_t node_off); @@ -134,7 +134,7 @@ static int befs_compare_strings(const void *key1, int keylen1, * On failure, BEFS_ERR is returned. */ static int -befs_bt_read_super(struct super_block *sb, befs_data_stream * ds, +befs_bt_read_super(struct super_block *sb, const befs_data_stream *ds, befs_btree_super * sup) { struct buffer_head *bh; @@ -193,7 +193,7 @@ befs_bt_read_super(struct super_block *sb, befs_data_stream * ds, */ static int -befs_bt_read_node(struct super_block *sb, befs_data_stream * ds, +befs_bt_read_node(struct super_block *sb, const befs_data_stream *ds, struct befs_btree_node *node, befs_off_t node_off) { uint off = 0; @@ -247,7 +247,7 @@ befs_bt_read_node(struct super_block *sb, befs_data_stream * ds, * actuall value stored with the key. */ int -befs_btree_find(struct super_block *sb, befs_data_stream * ds, +befs_btree_find(struct super_block *sb, const befs_data_stream *ds, const char *key, befs_off_t * value) { struct befs_btree_node *this_node; @@ -416,7 +416,7 @@ befs_find_key(struct super_block *sb, struct befs_btree_node *node, * until the (key_no)th key is found or the tree is out of keys. */ int -befs_btree_read(struct super_block *sb, befs_data_stream * ds, +befs_btree_read(struct super_block *sb, const befs_data_stream *ds, loff_t key_no, size_t bufsize, char *keybuf, size_t * keysize, befs_off_t * value) { @@ -548,7 +548,7 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, * Also checks for an empty tree. If there are no keys, returns BEFS_BT_EMPTY. */ static int -befs_btree_seekleaf(struct super_block *sb, befs_data_stream * ds, +befs_btree_seekleaf(struct super_block *sb, const befs_data_stream *ds, befs_btree_super *bt_super, struct befs_btree_node *this_node, befs_off_t * node_off) diff --git a/fs/befs/btree.h b/fs/befs/btree.h index 92e781e..f2a8f63 100644 --- a/fs/befs/btree.h +++ b/fs/befs/btree.h @@ -4,10 +4,10 @@ */ -int befs_btree_find(struct super_block *sb, befs_data_stream * ds, +int befs_btree_find(struct super_block *sb, const befs_data_stream *ds, const char *key, befs_off_t * value); -int befs_btree_read(struct super_block *sb, befs_data_stream * ds, +int befs_btree_read(struct super_block *sb, const befs_data_stream *ds, loff_t key_no, size_t bufsize, char *keybuf, size_t * keysize, befs_off_t * value); diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index ebd5071..dde0b79 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c @@ -21,16 +21,16 @@ const befs_inode_addr BAD_IADDR = { 0, 0, 0 }; static int befs_find_brun_direct(struct super_block *sb, - befs_data_stream * data, + const befs_data_stream *data, befs_blocknr_t blockno, befs_block_run * run); static int befs_find_brun_indirect(struct super_block *sb, - befs_data_stream * data, + const befs_data_stream *data, befs_blocknr_t blockno, befs_block_run * run); static int befs_find_brun_dblindirect(struct super_block *sb, - befs_data_stream * data, + const befs_data_stream *data, befs_blocknr_t blockno, befs_block_run * run); @@ -45,7 +45,7 @@ static int befs_find_brun_dblindirect(struct super_block *sb, * if you don't need to know offset just set @off = NULL. */ struct buffer_head * -befs_read_datastream(struct super_block *sb, befs_data_stream * ds, +befs_read_datastream(struct super_block *sb, const befs_data_stream *ds, befs_off_t pos, uint * off) { struct buffer_head *bh = NULL; @@ -87,7 +87,7 @@ befs_read_datastream(struct super_block *sb, befs_data_stream * ds, * 2001-11-15 Will Dyson */ int -befs_fblock2brun(struct super_block *sb, befs_data_stream * data, +befs_fblock2brun(struct super_block *sb, const befs_data_stream *data, befs_blocknr_t fblock, befs_block_run * run) { int err; @@ -122,8 +122,8 @@ befs_fblock2brun(struct super_block *sb, befs_data_stream * data, * Returns the number of bytes read */ size_t -befs_read_lsymlink(struct super_block * sb, befs_data_stream * ds, void *buff, - befs_off_t len) +befs_read_lsymlink(struct super_block *sb, const befs_data_stream *ds, + void *buff, befs_off_t len) { befs_off_t bytes_read = 0; /* bytes readed */ u16 plen; @@ -163,7 +163,7 @@ befs_read_lsymlink(struct super_block * sb, befs_data_stream * ds, void *buff, */ befs_blocknr_t -befs_count_blocks(struct super_block * sb, befs_data_stream * ds) +befs_count_blocks(struct super_block *sb, const befs_data_stream *ds) { befs_blocknr_t blocks; befs_blocknr_t datablocks; /* File data blocks */ @@ -243,11 +243,11 @@ befs_count_blocks(struct super_block * sb, befs_data_stream * ds) 2001-11-15 Will Dyson */ static int -befs_find_brun_direct(struct super_block *sb, befs_data_stream * data, +befs_find_brun_direct(struct super_block *sb, const befs_data_stream *data, befs_blocknr_t blockno, befs_block_run * run) { int i; - befs_block_run *array = data->direct; + const befs_block_run *array = data->direct; befs_blocknr_t sum; befs_blocknr_t max_block = data->max_direct_range >> BEFS_SB(sb)->block_shift; @@ -304,7 +304,8 @@ befs_find_brun_direct(struct super_block *sb, befs_data_stream * data, */ static int befs_find_brun_indirect(struct super_block *sb, - befs_data_stream * data, befs_blocknr_t blockno, + const befs_data_stream *data, + befs_blocknr_t blockno, befs_block_run * run) { int i, j; @@ -412,7 +413,8 @@ befs_find_brun_indirect(struct super_block *sb, */ static int befs_find_brun_dblindirect(struct super_block *sb, - befs_data_stream * data, befs_blocknr_t blockno, + const befs_data_stream *data, + befs_blocknr_t blockno, befs_block_run * run) { int dblindir_indx; diff --git a/fs/befs/datastream.h b/fs/befs/datastream.h index 45e8a3c..91ba820 100644 --- a/fs/befs/datastream.h +++ b/fs/befs/datastream.h @@ -4,16 +4,17 @@ */ struct buffer_head *befs_read_datastream(struct super_block *sb, - befs_data_stream * ds, befs_off_t pos, - uint * off); + const befs_data_stream *ds, + befs_off_t pos, uint * off); -int befs_fblock2brun(struct super_block *sb, befs_data_stream * data, +int befs_fblock2brun(struct super_block *sb, const befs_data_stream *data, befs_blocknr_t fblock, befs_block_run * run); -size_t befs_read_lsymlink(struct super_block *sb, befs_data_stream * data, +size_t befs_read_lsymlink(struct super_block *sb, const befs_data_stream *data, void *buff, befs_off_t len); -befs_blocknr_t befs_count_blocks(struct super_block *sb, befs_data_stream * ds); +befs_blocknr_t befs_count_blocks(struct super_block *sb, + const befs_data_stream *ds); extern const befs_inode_addr BAD_IADDR; diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index cc0e082..f168bc8f 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -157,7 +157,7 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct inode *inode = NULL; struct super_block *sb = dir->i_sb; - befs_data_stream *ds = &BEFS_I(dir)->i_data.ds; + const befs_data_stream *ds = &BEFS_I(dir)->i_data.ds; befs_off_t offset; int ret; int utfnamelen; @@ -207,7 +207,7 @@ befs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; - befs_data_stream *ds = &BEFS_I(inode)->i_data.ds; + const befs_data_stream *ds = &BEFS_I(inode)->i_data.ds; befs_off_t value; int result; size_t keysize; -- cgit v0.10.2 From e23e9aa75283ac093421ca71339ec4eb1afbe8be Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 May 2016 14:24:57 -0400 Subject: befs: switch to ->iterate_shared() Signed-off-by: Al Viro diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index f168bc8f..71112aa 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -66,7 +66,7 @@ static struct kmem_cache *befs_inode_cachep; static const struct file_operations befs_dir_operations = { .read = generic_read_dir, - .iterate = befs_readdir, + .iterate_shared = befs_readdir, .llseek = generic_file_llseek, }; -- cgit v0.10.2 From 29884eff1f0f7b03417eaaffa898f9d9d53b0203 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 May 2016 14:27:44 -0400 Subject: afs: switch to ->iterate_shared() Signed-off-by: Al Viro diff --git a/fs/afs/dir.c b/fs/afs/dir.c index cdf8fbb..eba5410 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -43,7 +43,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, const struct file_operations afs_dir_file_operations = { .open = afs_dir_open, .release = afs_release, - .iterate = afs_readdir, + .iterate_shared = afs_readdir, .lock = afs_lock, .llseek = generic_file_llseek, }; -- cgit v0.10.2 From e77d0c63f00137add5d7966d9bd9fd5115ce0841 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 10 May 2016 16:41:13 -0400 Subject: f2fs: switch to ->iterate_shared() Signed-off-by: Al Viro diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index af81957..9e46151 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -902,7 +902,7 @@ static int f2fs_dir_open(struct inode *inode, struct file *filp) const struct file_operations f2fs_dir_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, - .iterate = f2fs_readdir, + .iterate_shared = f2fs_readdir, .fsync = f2fs_sync_file, .open = f2fs_dir_open, .unlocked_ioctl = f2fs_ioctl, -- cgit v0.10.2 From 1d1bb236bc2ffb3586d6cc73e58c0d8351758123 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 May 2016 17:00:20 -0400 Subject: gfs2: switch to ->iterate_shared() protected by glock and already used without locking the directory by gfs2_get_name() Signed-off-by: Al Viro diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 208efc7..a3e7358 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -1119,7 +1119,7 @@ const struct file_operations gfs2_file_fops = { }; const struct file_operations gfs2_dir_fops = { - .iterate = gfs2_readdir, + .iterate_shared = gfs2_readdir, .unlocked_ioctl = gfs2_ioctl, .open = gfs2_open, .release = gfs2_release, @@ -1147,7 +1147,7 @@ const struct file_operations gfs2_file_fops_nolock = { }; const struct file_operations gfs2_dir_fops_nolock = { - .iterate = gfs2_readdir, + .iterate_shared = gfs2_readdir, .unlocked_ioctl = gfs2_ioctl, .open = gfs2_open, .release = gfs2_release, -- cgit v0.10.2 From e82c3147554785414a10a2d424590758646972b2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 May 2016 19:35:57 -0400 Subject: hpfs: handle allocation failures in hpfs_add_pos() pr_err() is nice, but we'd better propagate the error to caller and not proceed to violate the invariants (namely, "every file with f_pos tied to directory block should have its address visible in per-inode array"). Signed-off-by: Al Viro diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index e57a53c..77b92ff 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -44,7 +44,11 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence) else goto fail; if (pos == 12) goto fail; } - hpfs_add_pos(i, &filp->f_pos); + if (unlikely(hpfs_add_pos(i, &filp->f_pos) < 0)) { + hpfs_unlock(s); + inode_unlock(i); + return -ENOMEM; + } ok: filp->f_pos = new_off; hpfs_unlock(s); @@ -141,8 +145,10 @@ static int hpfs_readdir(struct file *file, struct dir_context *ctx) ctx->pos = 1; } if (ctx->pos == 1) { + ret = hpfs_add_pos(inode, &file->f_pos); + if (unlikely(ret < 0)) + goto out; ctx->pos = ((loff_t) hpfs_de_as_down_as_possible(inode->i_sb, hpfs_inode->i_dno) << 4) + 1; - hpfs_add_pos(inode, &file->f_pos); file->f_version = inode->i_version; } next_pos = ctx->pos; diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c index 2923a7b..86ab7e7 100644 --- a/fs/hpfs/dnode.c +++ b/fs/hpfs/dnode.c @@ -21,7 +21,7 @@ static loff_t get_pos(struct dnode *d, struct hpfs_dirent *fde) return ((loff_t)le32_to_cpu(d->self) << 4) | (loff_t)1; } -void hpfs_add_pos(struct inode *inode, loff_t *pos) +int hpfs_add_pos(struct inode *inode, loff_t *pos) { struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); int i = 0; @@ -29,11 +29,12 @@ void hpfs_add_pos(struct inode *inode, loff_t *pos) if (hpfs_inode->i_rddir_off) for (; hpfs_inode->i_rddir_off[i]; i++) - if (hpfs_inode->i_rddir_off[i] == pos) return; + if (hpfs_inode->i_rddir_off[i] == pos) + return 0; if (!(i&0x0f)) { if (!(ppos = kmalloc((i+0x11) * sizeof(loff_t*), GFP_NOFS))) { pr_err("out of memory for position list\n"); - return; + return -ENOMEM; } if (hpfs_inode->i_rddir_off) { memcpy(ppos, hpfs_inode->i_rddir_off, i * sizeof(loff_t)); @@ -43,6 +44,7 @@ void hpfs_add_pos(struct inode *inode, loff_t *pos) } hpfs_inode->i_rddir_off[i] = pos; hpfs_inode->i_rddir_off[i + 1] = NULL; + return 0; } void hpfs_del_pos(struct inode *inode, loff_t *pos) diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 975654a..aebb78f 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -242,7 +242,7 @@ extern const struct file_operations hpfs_dir_ops; /* dnode.c */ -void hpfs_add_pos(struct inode *, loff_t *); +int hpfs_add_pos(struct inode *, loff_t *); void hpfs_del_pos(struct inode *, loff_t *); struct hpfs_dirent *hpfs_add_de(struct super_block *, struct dnode *, const unsigned char *, unsigned, secno); -- cgit v0.10.2 From 7d674b3195b854eec26724b8e85c97da79cdcf9d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 May 2016 19:44:04 -0400 Subject: hpfs: switch to ->iterate_shared() NOTE: the only reason we can do that without ->i_rdir_offs races is that hpfs_lock() serializes everything in there anyway. It's not that hard to get rid of, but not as part of this series... Signed-off-by: Al Viro diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 77b92ff..7b9150c 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -330,7 +330,7 @@ const struct file_operations hpfs_dir_ops = { .llseek = hpfs_dir_lseek, .read = generic_read_dir, - .iterate = hpfs_readdir, + .iterate_shared = hpfs_readdir, .release = hpfs_dir_release, .fsync = hpfs_file_fsync, .unlocked_ioctl = hpfs_ioctl, -- cgit v0.10.2 From 552a9d489f1412beb5914f0c64a54d921a9c6624 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 May 2016 19:49:30 -0400 Subject: hostfs: switch to ->iterate_shared() Signed-off-by: Al Viro diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 7016653..5c57654 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -398,7 +398,7 @@ static const struct file_operations hostfs_file_fops = { static const struct file_operations hostfs_dir_fops = { .llseek = generic_file_llseek, - .iterate = hostfs_readdir, + .iterate_shared = hostfs_readdir, .read = generic_read_dir, .open = hostfs_open, .fsync = hostfs_fsync, -- cgit v0.10.2 From 323ee8fc544d407eb053471b9607f95f987f5f12 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 May 2016 20:02:09 -0400 Subject: hfsplus: switch to ->iterate_shared() We need to protect the list of hfsplus_readdir_data against parallel insertions (in readdir) and removals (in release). Add a spinlock for that. Note that it has nothing to do with protection of hfsplus_readdir_data->key - we have an exclusion between hfsplus_readdir() and hfsplus_delete_cat() on directory lock and between several hfsplus_readdir() for the same struct file on ->f_pos_lock. The spinlock is strictly for list changes. Signed-off-by: Al Viro diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 022974a..fb707e8 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -374,12 +374,15 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC); } + /* we only need to take spinlock for exclusion with ->release() */ + spin_lock(&HFSPLUS_I(dir)->open_dir_lock); list_for_each(pos, &HFSPLUS_I(dir)->open_dir_list) { struct hfsplus_readdir_data *rd = list_entry(pos, struct hfsplus_readdir_data, list); if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0) rd->file->f_pos--; } + spin_unlock(&HFSPLUS_I(dir)->open_dir_lock); err = hfs_brec_remove(&fd); if (err) diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index a4e867e..42e1286 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -271,8 +271,14 @@ next: } file->private_data = rd; rd->file = file; + spin_lock(&HFSPLUS_I(inode)->open_dir_lock); list_add(&rd->list, &HFSPLUS_I(inode)->open_dir_list); + spin_unlock(&HFSPLUS_I(inode)->open_dir_lock); } + /* + * Can be done after the list insertion; exclusion with + * hfsplus_delete_cat() is provided by directory lock. + */ memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); out: kfree(strbuf); @@ -284,9 +290,9 @@ static int hfsplus_dir_release(struct inode *inode, struct file *file) { struct hfsplus_readdir_data *rd = file->private_data; if (rd) { - inode_lock(inode); + spin_lock(&HFSPLUS_I(inode)->open_dir_lock); list_del(&rd->list); - inode_unlock(inode); + spin_unlock(&HFSPLUS_I(inode)->open_dir_lock); kfree(rd); } return 0; @@ -569,7 +575,7 @@ const struct inode_operations hfsplus_dir_inode_operations = { const struct file_operations hfsplus_dir_operations = { .fsync = hfsplus_file_fsync, .read = generic_read_dir, - .iterate = hfsplus_readdir, + .iterate_shared = hfsplus_readdir, .unlocked_ioctl = hfsplus_ioctl, .llseek = generic_file_llseek, .release = hfsplus_dir_release, diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index f91a1fa..fdc3446 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -244,6 +244,7 @@ struct hfsplus_inode_info { u8 userflags; /* BSD user file flags */ u32 subfolders; /* Subfolder count (HFSX only) */ struct list_head open_dir_list; + spinlock_t open_dir_lock; loff_t phys_size; struct inode vfs_inode; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index b28f398..037f738 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -374,6 +374,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode) hip = HFSPLUS_I(inode); INIT_LIST_HEAD(&hip->open_dir_list); + spin_lock_init(&hip->open_dir_lock); mutex_init(&hip->extents_lock); atomic_set(&hip->opencnt, 0); hip->extent_state = 0; diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index c359113..755bf30 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -67,6 +67,7 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) return inode; INIT_LIST_HEAD(&HFSPLUS_I(inode)->open_dir_list); + spin_lock_init(&HFSPLUS_I(inode)->open_dir_lock); mutex_init(&HFSPLUS_I(inode)->extents_lock); HFSPLUS_I(inode)->flags = 0; HFSPLUS_I(inode)->extent_state = 0; -- cgit v0.10.2 From 9717a91b01feda644f45fd63624a641385ef8f2d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 May 2016 20:13:50 -0400 Subject: hfs: switch to ->iterate_shared() exact parallel of hfsplus analogue Signed-off-by: Al Viro diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index 1eb5d41..98cde8b 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c @@ -240,10 +240,13 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, struct qstr *str) } } + /* we only need to take spinlock for exclusion with ->release() */ + spin_lock(&HFS_I(dir)->open_dir_lock); list_for_each_entry(rd, &HFS_I(dir)->open_dir_list, list) { if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0) rd->file->f_pos--; } + spin_unlock(&HFS_I(dir)->open_dir_lock); res = hfs_brec_remove(&fd); if (res) diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index e9f2b85..163190e 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -161,8 +161,14 @@ static int hfs_readdir(struct file *file, struct dir_context *ctx) } file->private_data = rd; rd->file = file; + spin_lock(&HFS_I(inode)->open_dir_lock); list_add(&rd->list, &HFS_I(inode)->open_dir_list); + spin_unlock(&HFS_I(inode)->open_dir_lock); } + /* + * Can be done after the list insertion; exclusion with + * hfs_delete_cat() is provided by directory lock. + */ memcpy(&rd->key, &fd.key, sizeof(struct hfs_cat_key)); out: hfs_find_exit(&fd); @@ -173,9 +179,9 @@ static int hfs_dir_release(struct inode *inode, struct file *file) { struct hfs_readdir_data *rd = file->private_data; if (rd) { - inode_lock(inode); + spin_lock(&HFS_I(inode)->open_dir_lock); list_del(&rd->list); - inode_unlock(inode); + spin_unlock(&HFS_I(inode)->open_dir_lock); kfree(rd); } return 0; @@ -303,7 +309,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry, const struct file_operations hfs_dir_operations = { .read = generic_read_dir, - .iterate = hfs_readdir, + .iterate_shared = hfs_readdir, .llseek = generic_file_llseek, .release = hfs_dir_release, }; diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 79daa09..fa3eed8 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -69,6 +69,7 @@ struct hfs_inode_info { struct hfs_cat_key cat_key; struct list_head open_dir_list; + spinlock_t open_dir_lock; struct inode *rsrc_inode; struct mutex extents_lock; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index cb1e5fa..ba533c7 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -187,6 +187,7 @@ struct inode *hfs_new_inode(struct inode *dir, struct qstr *name, umode_t mode) mutex_init(&HFS_I(inode)->extents_lock); INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); + spin_lock_init(&HFS_I(inode)->open_dir_lock); hfs_cat_build_key(sb, (btree_key *)&HFS_I(inode)->cat_key, dir->i_ino, name); inode->i_ino = HFS_SB(sb)->next_id++; inode->i_mode = mode; @@ -318,6 +319,7 @@ static int hfs_read_inode(struct inode *inode, void *data) HFS_I(inode)->rsrc_inode = NULL; mutex_init(&HFS_I(inode)->extents_lock); INIT_LIST_HEAD(&HFS_I(inode)->open_dir_list); + spin_lock_init(&HFS_I(inode)->open_dir_lock); /* Initialize the inode */ inode->i_uid = hsb->s_uid; -- cgit v0.10.2 From ae05327a00fd47c34dfe25294b359a3f3fef96e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 May 2016 20:36:01 -0400 Subject: ext4: switch to ->iterate_shared() Note that we need relax_dir() equivalent for directories locked shared. Signed-off-by: Al Viro diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 561d730..5d00bf0 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -266,7 +266,7 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx) ctx->pos += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize); } - if ((ctx->pos < inode->i_size) && !dir_relax(inode)) + if ((ctx->pos < inode->i_size) && !dir_relax_shared(inode)) goto done; brelse(bh); bh = NULL; @@ -644,7 +644,7 @@ int ext4_check_all_de(struct inode *dir, struct buffer_head *bh, void *buf, const struct file_operations ext4_dir_operations = { .llseek = ext4_dir_llseek, .read = generic_read_dir, - .iterate = ext4_readdir, + .iterate_shared = ext4_readdir, .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, diff --git a/include/linux/fs.h b/include/linux/fs.h index 3dc0258..e87245a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3134,6 +3134,13 @@ static inline bool dir_relax(struct inode *inode) return !IS_DEADDIR(inode); } +static inline bool dir_relax_shared(struct inode *inode) +{ + inode_unlock_shared(inode); + inode_lock_shared(inode); + return !IS_DEADDIR(inode); +} + extern bool path_noexec(const struct path *path); extern void inode_nohighmem(struct inode *inode); -- cgit v0.10.2