From c2bd6c11cd05fed1eeb83230e87351357d72bb48 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Apr 2012 23:52:50 -0400 Subject: switch do_fsync() to fget_light() Signed-off-by: Al Viro diff --git a/fs/sync.c b/fs/sync.c index 0e8db93..11e3d1c 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -188,11 +188,12 @@ static int do_fsync(unsigned int fd, int datasync) { struct file *file; int ret = -EBADF; + int fput_needed; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (file) { ret = vfs_fsync(file, datasync); - fput(file); + fput_light(file, fput_needed); } return ret; } -- cgit v0.10.2 From 863ced7fe762f80e67bc9171e47c7d80032cce12 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:40:32 -0400 Subject: switch readdir/getdents to fget_light/fput_light Signed-off-by: Al Viro diff --git a/fs/compat.c b/fs/compat.c index 0781e61..9f77486 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -871,12 +871,12 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, { int error; struct file *file; + int fput_needed; struct compat_readdir_callback buf; - error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) - goto out; + return -EBADF; buf.result = 0; buf.dirent = dirent; @@ -885,8 +885,7 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, if (buf.result) error = buf.result; - fput(file); -out: + fput_light(file, fput_needed); return error; } @@ -953,16 +952,15 @@ asmlinkage long compat_sys_getdents(unsigned int fd, struct file * file; struct compat_linux_dirent __user * lastdirent; struct compat_getdents_callback buf; + int fput_needed; int error; - error = -EFAULT; if (!access_ok(VERIFY_WRITE, dirent, count)) - goto out; + return -EFAULT; - error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) - goto out; + return -EBADF; buf.current_dir = dirent; buf.previous = NULL; @@ -979,8 +977,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, else error = count - buf.count; } - fput(file); -out: + fput_light(file, fput_needed); return error; } @@ -1041,16 +1038,15 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, struct file * file; struct linux_dirent64 __user * lastdirent; struct compat_getdents_callback64 buf; + int fput_needed; int error; - error = -EFAULT; if (!access_ok(VERIFY_WRITE, dirent, count)) - goto out; + return -EFAULT; - error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) - goto out; + return -EBADF; buf.current_dir = dirent; buf.previous = NULL; @@ -1068,8 +1064,7 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, else error = count - buf.count; } - fput(file); -out: + fput_light(file, fput_needed); return error; } #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ diff --git a/fs/readdir.c b/fs/readdir.c index cc0a822..39e3370 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -108,11 +108,11 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, int error; struct file * file; struct readdir_callback buf; + int fput_needed; - error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) - goto out; + return -EBADF; buf.result = 0; buf.dirent = dirent; @@ -121,8 +121,7 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, if (buf.result) error = buf.result; - fput(file); -out: + fput_light(file, fput_needed); return error; } @@ -195,16 +194,15 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, struct file * file; struct linux_dirent __user * lastdirent; struct getdents_callback buf; + int fput_needed; int error; - error = -EFAULT; if (!access_ok(VERIFY_WRITE, dirent, count)) - goto out; + return -EFAULT; - error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) - goto out; + return -EBADF; buf.current_dir = dirent; buf.previous = NULL; @@ -221,8 +219,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, else error = count - buf.count; } - fput(file); -out: + fput_light(file, fput_needed); return error; } @@ -278,16 +275,15 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, struct file * file; struct linux_dirent64 __user * lastdirent; struct getdents_callback64 buf; + int fput_needed; int error; - error = -EFAULT; if (!access_ok(VERIFY_WRITE, dirent, count)) - goto out; + return -EFAULT; - error = -EBADF; - file = fget(fd); + file = fget_light(fd, &fput_needed); if (!file) - goto out; + return -EBADF; buf.current_dir = dirent; buf.previous = NULL; @@ -305,7 +301,6 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, else error = count - buf.count; } - fput(file); -out: + fput_light(file, fput_needed); return error; } -- cgit v0.10.2 From 7449af1e8b795abf4ef829ac507861f34dca30b4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:41:25 -0400 Subject: switch xattr syscalls to fget_light/fput_light Signed-off-by: Al Viro diff --git a/fs/xattr.c b/fs/xattr.c index 3c8c1cc..1d7ac37 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -399,11 +399,12 @@ SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname, SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, const void __user *,value, size_t, size, int, flags) { + int fput_needed; struct file *f; struct dentry *dentry; int error = -EBADF; - f = fget(fd); + f = fget_light(fd, &fput_needed); if (!f) return error; dentry = f->f_path.dentry; @@ -413,7 +414,7 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name, error = setxattr(dentry, name, value, size, flags); mnt_drop_write_file(f); } - fput(f); + fput_light(f, fput_needed); return error; } @@ -486,15 +487,16 @@ SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname, SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name, void __user *, value, size_t, size) { + int fput_needed; struct file *f; ssize_t error = -EBADF; - f = fget(fd); + f = fget_light(fd, &fput_needed); if (!f) return error; audit_inode(NULL, f->f_path.dentry); error = getxattr(f->f_path.dentry, name, value, size); - fput(f); + fput_light(f, fput_needed); return error; } @@ -566,15 +568,16 @@ SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list, SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size) { + int fput_needed; struct file *f; ssize_t error = -EBADF; - f = fget(fd); + f = fget_light(fd, &fput_needed); if (!f) return error; audit_inode(NULL, f->f_path.dentry); error = listxattr(f->f_path.dentry, list, size); - fput(f); + fput_light(f, fput_needed); return error; } @@ -634,11 +637,12 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname, SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) { + int fput_needed; struct file *f; struct dentry *dentry; int error = -EBADF; - f = fget(fd); + f = fget_light(fd, &fput_needed); if (!f) return error; dentry = f->f_path.dentry; @@ -648,7 +652,7 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name) error = removexattr(dentry, name); mnt_drop_write_file(f); } - fput(f); + fput_light(f, fput_needed); return error; } -- cgit v0.10.2 From 545ec2c7945bf7d22d0779e7dc9bf16f7dd9ae34 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:42:19 -0400 Subject: switch fcntl to fget_raw_light/fput_light Signed-off-by: Al Viro diff --git a/fs/fcntl.c b/fs/fcntl.c index d078b75..81b70e6 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -442,28 +442,24 @@ static int check_fcntl_cmd(unsigned cmd) SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct file *filp; + int fput_needed; long err = -EBADF; - filp = fget_raw(fd); + filp = fget_raw_light(fd, &fput_needed); if (!filp) goto out; if (unlikely(filp->f_mode & FMODE_PATH)) { - if (!check_fcntl_cmd(cmd)) { - fput(filp); - goto out; - } + if (!check_fcntl_cmd(cmd)) + goto out1; } err = security_file_fcntl(filp, cmd, arg); - if (err) { - fput(filp); - return err; - } + if (!err) + err = do_fcntl(fd, cmd, arg, filp); - err = do_fcntl(fd, cmd, arg, filp); - - fput(filp); +out1: + fput_light(filp, fput_needed); out: return err; } @@ -473,26 +469,21 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, unsigned long, arg) { struct file * filp; - long err; + long err = -EBADF; + int fput_needed; - err = -EBADF; - filp = fget_raw(fd); + filp = fget_raw_light(fd, &fput_needed); if (!filp) goto out; if (unlikely(filp->f_mode & FMODE_PATH)) { - if (!check_fcntl_cmd(cmd)) { - fput(filp); - goto out; - } + if (!check_fcntl_cmd(cmd)) + goto out1; } err = security_file_fcntl(filp, cmd, arg); - if (err) { - fput(filp); - return err; - } - err = -EBADF; + if (err) + goto out1; switch (cmd) { case F_GETLK64: @@ -507,7 +498,8 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, err = do_fcntl(fd, cmd, arg, filp); break; } - fput(filp); +out1: + fput_light(filp, fput_needed); out: return err; } -- cgit v0.10.2 From 20ba5d736f5a42abbee3e14384ff2d0fdaef2e6d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:44:12 -0400 Subject: switch signalfd4() to fget_light/fput_light Signed-off-by: Al Viro diff --git a/fs/signalfd.c b/fs/signalfd.c index 7ae2a57..9f35a37 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -269,12 +269,13 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, if (ufd < 0) kfree(ctx); } else { - struct file *file = fget(ufd); + int fput_needed; + struct file *file = fget_light(ufd, &fput_needed); if (!file) return -EBADF; ctx = file->private_data; if (file->f_op != &signalfd_fops) { - fput(file); + fput_light(file, fput_needed); return -EINVAL; } spin_lock_irq(¤t->sighand->siglock); @@ -282,7 +283,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, spin_unlock_irq(¤t->sighand->siglock); wake_up(¤t->sighand->signalfd_wqh); - fput(file); + fput_light(file, fput_needed); } return ufd; -- cgit v0.10.2 From bdc689594bf3ce967bc3a17ba5db3f23222dede0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:46:53 -0400 Subject: switch flock to fget_light/fput_light Signed-off-by: Al Viro diff --git a/fs/locks.c b/fs/locks.c index 4f441e4..814c51d 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1636,12 +1636,13 @@ EXPORT_SYMBOL(flock_lock_file_wait); SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) { struct file *filp; + int fput_needed; struct file_lock *lock; int can_sleep, unlock; int error; error = -EBADF; - filp = fget(fd); + filp = fget_light(fd, &fput_needed); if (!filp) goto out; @@ -1674,7 +1675,7 @@ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd) locks_free_lock(lock); out_putf: - fput(filp); + fput_light(filp, fput_needed); out: return error; } -- cgit v0.10.2 From 0aa2ee5f0a341a7fc081a499b221d29784ed711d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:47:27 -0400 Subject: switch statfs to fget_light/fput_light Signed-off-by: Al Viro diff --git a/fs/statfs.c b/fs/statfs.c index 43e6b6f..95ad5c0 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -87,11 +87,12 @@ int user_statfs(const char __user *pathname, struct kstatfs *st) int fd_statfs(int fd, struct kstatfs *st) { - struct file *file = fget(fd); + int fput_needed; + struct file *file = fget_light(fd, &fput_needed); int error = -EBADF; if (file) { error = vfs_statfs(&file->f_path, st); - fput(file); + fput_light(file, fput_needed); } return error; } -- cgit v0.10.2 From c217a2a004d98d09dfceec3a023c563ed800e833 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 21 Apr 2012 18:47:57 -0400 Subject: switch utimes() to fget_light/fput_light Signed-off-by: Al Viro diff --git a/fs/utimes.c b/fs/utimes.c index ba653f3..fa4dbe4 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -140,18 +140,19 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times, goto out; if (filename == NULL && dfd != AT_FDCWD) { + int fput_needed; struct file *file; if (flags & AT_SYMLINK_NOFOLLOW) goto out; - file = fget(dfd); + file = fget_light(dfd, &fput_needed); error = -EBADF; if (!file) goto out; error = utimes_common(&file->f_path, times); - fput(file); + fput_light(file, fput_needed); } else { struct path path; int lookup_flags = 0; -- cgit v0.10.2 From 77ba78776e90e8de541f13b326e284c74286252f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2012 06:24:04 -0400 Subject: xfs: switch to proper __bitwise type for KM_... flags Signed-off-by: Al Viro diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index a907de5..4a7286c 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -46,7 +46,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) } void * -kmem_alloc(size_t size, unsigned int __nocast flags) +kmem_alloc(size_t size, xfs_km_flags_t flags) { int retries = 0; gfp_t lflags = kmem_flags_convert(flags); @@ -65,7 +65,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags) } void * -kmem_zalloc(size_t size, unsigned int __nocast flags) +kmem_zalloc(size_t size, xfs_km_flags_t flags) { void *ptr; @@ -87,7 +87,7 @@ kmem_free(const void *ptr) void * kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, - unsigned int __nocast flags) + xfs_km_flags_t flags) { void *new; @@ -102,7 +102,7 @@ kmem_realloc(const void *ptr, size_t newsize, size_t oldsize, } void * -kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) +kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags) { int retries = 0; gfp_t lflags = kmem_flags_convert(flags); @@ -121,7 +121,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags) } void * -kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags) +kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags) { void *ptr; diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index ab7c53f..b2f2620 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -27,10 +27,11 @@ * General memory allocation interfaces */ -#define KM_SLEEP 0x0001u -#define KM_NOSLEEP 0x0002u -#define KM_NOFS 0x0004u -#define KM_MAYFAIL 0x0008u +typedef unsigned __bitwise xfs_km_flags_t; +#define KM_SLEEP ((__force xfs_km_flags_t)0x0001u) +#define KM_NOSLEEP ((__force xfs_km_flags_t)0x0002u) +#define KM_NOFS ((__force xfs_km_flags_t)0x0004u) +#define KM_MAYFAIL ((__force xfs_km_flags_t)0x0008u) /* * We use a special process flag to avoid recursive callbacks into @@ -38,7 +39,7 @@ * warnings, so we explicitly skip any generic ones (silly of us). */ static inline gfp_t -kmem_flags_convert(unsigned int __nocast flags) +kmem_flags_convert(xfs_km_flags_t flags) { gfp_t lflags; @@ -54,9 +55,9 @@ kmem_flags_convert(unsigned int __nocast flags) return lflags; } -extern void *kmem_alloc(size_t, unsigned int __nocast); -extern void *kmem_zalloc(size_t, unsigned int __nocast); -extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast); +extern void *kmem_alloc(size_t, xfs_km_flags_t); +extern void *kmem_zalloc(size_t, xfs_km_flags_t); +extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t); extern void kmem_free(const void *); static inline void *kmem_zalloc_large(size_t size) @@ -107,7 +108,7 @@ kmem_zone_destroy(kmem_zone_t *zone) kmem_cache_destroy(zone); } -extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast); -extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); +extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t); +extern void *kmem_zone_zalloc(kmem_zone_t *, xfs_km_flags_t); #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 6b965bf..f30d980 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -3152,7 +3152,7 @@ xlog_ticket_alloc( int cnt, char client, bool permanent, - int alloc_flags) + xfs_km_flags_t alloc_flags) { struct xlog_ticket *tic; uint num_headers; diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 735ff1e..5bc3326 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -555,7 +555,7 @@ extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int); extern kmem_zone_t *xfs_log_ticket_zone; struct xlog_ticket *xlog_ticket_alloc(struct log *log, int unit_bytes, int count, char client, bool permanent, - int alloc_flags); + xfs_km_flags_t alloc_flags); static inline void diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index cdf896f..fdf3245 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -584,7 +584,7 @@ xfs_trans_t * _xfs_trans_alloc( xfs_mount_t *mp, uint type, - uint memflags) + xfs_km_flags_t memflags) { xfs_trans_t *tp; diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 7ab99e1..7c37b53 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -443,7 +443,7 @@ typedef struct xfs_trans { * XFS transaction mechanism exported interfaces. */ xfs_trans_t *xfs_trans_alloc(struct xfs_mount *, uint); -xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, uint); +xfs_trans_t *_xfs_trans_alloc(struct xfs_mount *, uint, xfs_km_flags_t); xfs_trans_t *xfs_trans_dup(xfs_trans_t *); int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint, uint, uint); -- cgit v0.10.2 From 6d42e7e9f6d86ed4dfacde75a6cf515068f9749c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2012 14:25:07 -0400 Subject: ubifs: use generic_fillattr() don't open-code it... Signed-off-by: Al Viro diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 62a2727..a6d42ef 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1127,16 +1127,7 @@ int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct ubifs_inode *ui = ubifs_inode(inode); mutex_lock(&ui->ui_mutex); - stat->dev = inode->i_sb->s_dev; - stat->ino = inode->i_ino; - stat->mode = inode->i_mode; - stat->nlink = inode->i_nlink; - stat->uid = inode->i_uid; - stat->gid = inode->i_gid; - stat->rdev = inode->i_rdev; - stat->atime = inode->i_atime; - stat->mtime = inode->i_mtime; - stat->ctime = inode->i_ctime; + generic_fillattr(inode, stat); stat->blksize = UBIFS_BLOCK_SIZE; stat->size = ui->ui_size; -- cgit v0.10.2 From b0b0382bb4904965a9e9fca77ad87514dfda0d1c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2012 14:34:06 -0400 Subject: ->encode_fh() API change pass inode + parent's inode or NULL instead of dentry + bool saying whether we want the parent or not. NOTE: that needs ceph fix folded in. Signed-off-by: Al Viro diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index e887ee6..614f34a 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -13,15 +13,14 @@ parent_root_objectid) / 4) #define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4) -static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, - int connectable) +static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len, + struct inode *parent) { struct btrfs_fid *fid = (struct btrfs_fid *)fh; - struct inode *inode = dentry->d_inode; int len = *max_len; int type; - if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) { + if (parent && (len < BTRFS_FID_SIZE_CONNECTABLE)) { *max_len = BTRFS_FID_SIZE_CONNECTABLE; return 255; } else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) { @@ -36,19 +35,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, fid->root_objectid = BTRFS_I(inode)->root->objectid; fid->gen = inode->i_generation; - if (connectable && !S_ISDIR(inode->i_mode)) { - struct inode *parent; + if (parent) { u64 parent_root_id; - spin_lock(&dentry->d_lock); - - parent = dentry->d_parent->d_inode; fid->parent_objectid = BTRFS_I(parent)->location.objectid; fid->parent_gen = parent->i_generation; parent_root_id = BTRFS_I(parent)->root->objectid; - spin_unlock(&dentry->d_lock); - if (parent_root_id != fid->root_objectid) { fid->parent_root_objectid = parent_root_id; len = BTRFS_FID_SIZE_CONNECTABLE_ROOT; diff --git a/fs/ceph/export.c b/fs/ceph/export.c index fbb2a64..4f9234c 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -247,7 +247,9 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, } const struct export_operations ceph_export_ops = { +#ifdef CEPH_BREAKAGE_FIXED .encode_fh = ceph_encode_fh, +#endif .fh_to_dentry = ceph_fh_to_dentry, .fh_to_parent = ceph_fh_to_parent, }; diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index b05acb7..b0201ca 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -304,24 +304,23 @@ out: /** * export_encode_fh - default export_operations->encode_fh function - * @dentry: the dentry to encode + * @inode: the object to encode * @fh: where to store the file handle fragment * @max_len: maximum length to store there - * @connectable: whether to store parent information + * @parent: parent directory inode, if wanted * * This default encode_fh function assumes that the 32 inode number * is suitable for locating an inode, and that the generation number * can be used to check that it is still valid. It places them in the * filehandle fragment where export_decode_fh expects to find them. */ -static int export_encode_fh(struct dentry *dentry, struct fid *fid, - int *max_len, int connectable) +static int export_encode_fh(struct inode *inode, struct fid *fid, + int *max_len, struct inode *parent) { - struct inode * inode = dentry->d_inode; int len = *max_len; int type = FILEID_INO32_GEN; - if (connectable && (len < 4)) { + if (parent && (len < 4)) { *max_len = 4; return 255; } else if (len < 2) { @@ -332,14 +331,9 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid, len = 2; fid->i32.ino = inode->i_ino; fid->i32.gen = inode->i_generation; - if (connectable && !S_ISDIR(inode->i_mode)) { - struct inode *parent; - - spin_lock(&dentry->d_lock); - parent = dentry->d_parent->d_inode; + if (parent) { fid->i32.parent_ino = parent->i_ino; fid->i32.parent_gen = parent->i_generation; - spin_unlock(&dentry->d_lock); len = 4; type = FILEID_INO32_GEN_PARENT; } @@ -352,11 +346,22 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len, { const struct export_operations *nop = dentry->d_sb->s_export_op; int error; + struct dentry *p = NULL; + struct inode *inode = dentry->d_inode, *parent = NULL; + if (connectable && !S_ISDIR(inode->i_mode)) { + p = dget_parent(dentry); + /* + * note that while p might've ceased to be our parent already, + * it's still pinned by and still positive. + */ + parent = p->d_inode; + } if (nop->encode_fh) - error = nop->encode_fh(dentry, fid->raw, max_len, connectable); + error = nop->encode_fh(inode, fid->raw, max_len, parent); else - error = export_encode_fh(dentry, fid, max_len, connectable); + error = export_encode_fh(inode, fid, max_len, parent); + dput(p); return error; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index b3d290c..7edfaad 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -752,10 +752,9 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, } static int -fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable) +fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent) { int len = *lenp; - struct inode *inode = de->d_inode; u32 ipos_h, ipos_m, ipos_l; if (len < 5) { @@ -771,9 +770,9 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable) fh[1] = inode->i_generation; fh[2] = ipos_h; fh[3] = ipos_m | MSDOS_I(inode)->i_logstart; - spin_lock(&de->d_lock); - fh[4] = ipos_l | MSDOS_I(de->d_parent->d_inode)->i_logstart; - spin_unlock(&de->d_lock); + fh[4] = ipos_l; + if (parent) + fh[4] |= MSDOS_I(parent)->i_logstart; return 3; } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 56f6dcf..42678a3 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -627,12 +627,10 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, return ERR_PTR(err); } -static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, - int connectable) +static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len, + struct inode *parent) { - struct inode *inode = dentry->d_inode; - bool encode_parent = connectable && !S_ISDIR(inode->i_mode); - int len = encode_parent ? 6 : 3; + int len = parent ? 6 : 3; u64 nodeid; u32 generation; @@ -648,14 +646,9 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, fh[1] = (u32)(nodeid & 0xffffffff); fh[2] = generation; - if (encode_parent) { - struct inode *parent; - - spin_lock(&dentry->d_lock); - parent = dentry->d_parent->d_inode; + if (parent) { nodeid = get_fuse_inode(parent)->nodeid; generation = parent->i_generation; - spin_unlock(&dentry->d_lock); fh[3] = (u32)(nodeid >> 32); fh[4] = (u32)(nodeid & 0xffffffff); @@ -663,7 +656,7 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len, } *max_len = len; - return encode_parent ? 0x82 : 0x81; + return parent ? 0x82 : 0x81; } static struct dentry *fuse_fh_to_dentry(struct super_block *sb, diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 70ba891..e8ed6d4 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c @@ -28,15 +28,14 @@ #define GFS2_LARGE_FH_SIZE 8 #define GFS2_OLD_FH_SIZE 10 -static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, - int connectable) +static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len, + struct inode *parent) { __be32 *fh = (__force __be32 *)p; - struct inode *inode = dentry->d_inode; struct super_block *sb = inode->i_sb; struct gfs2_inode *ip = GFS2_I(inode); - if (connectable && (*len < GFS2_LARGE_FH_SIZE)) { + if (parent && (*len < GFS2_LARGE_FH_SIZE)) { *len = GFS2_LARGE_FH_SIZE; return 255; } else if (*len < GFS2_SMALL_FH_SIZE) { @@ -50,14 +49,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); *len = GFS2_SMALL_FH_SIZE; - if (!connectable || inode == sb->s_root->d_inode) + if (!parent || inode == sb->s_root->d_inode) return *len; - spin_lock(&dentry->d_lock); - inode = dentry->d_parent->d_inode; - ip = GFS2_I(inode); - igrab(inode); - spin_unlock(&dentry->d_lock); + ip = GFS2_I(parent); fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32); fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF); @@ -65,8 +60,6 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len, fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF); *len = GFS2_LARGE_FH_SIZE; - iput(inode); - return *len; } diff --git a/fs/isofs/export.c b/fs/isofs/export.c index dd4687f..aa4356d 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -107,12 +107,11 @@ static struct dentry *isofs_export_get_parent(struct dentry *child) } static int -isofs_export_encode_fh(struct dentry *dentry, +isofs_export_encode_fh(struct inode *inode, __u32 *fh32, int *max_len, - int connectable) + struct inode *parent) { - struct inode * inode = dentry->d_inode; struct iso_inode_info * ei = ISOFS_I(inode); int len = *max_len; int type = 1; @@ -124,7 +123,7 @@ isofs_export_encode_fh(struct dentry *dentry, * offset of the inode and the upper 16 bits of fh32[1] to * hold the offset of the parent. */ - if (connectable && (len < 5)) { + if (parent && (len < 5)) { *max_len = 5; return 255; } else if (len < 3) { @@ -136,16 +135,12 @@ isofs_export_encode_fh(struct dentry *dentry, fh32[0] = ei->i_iget5_block; fh16[2] = (__u16)ei->i_iget5_offset; /* fh16 [sic] */ fh32[2] = inode->i_generation; - if (connectable && !S_ISDIR(inode->i_mode)) { - struct inode *parent; + if (parent) { struct iso_inode_info *eparent; - spin_lock(&dentry->d_lock); - parent = dentry->d_parent->d_inode; eparent = ISOFS_I(parent); fh32[3] = eparent->i_iget5_block; fh16[3] = (__u16)eparent->i_iget5_offset; /* fh16 [sic] */ fh32[4] = parent->i_generation; - spin_unlock(&dentry->d_lock); len = 5; type = 2; } diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 0bb2c20..b728479 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -508,31 +508,29 @@ static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh, return nilfs_get_dentry(sb, fid->cno, fid->parent_ino, fid->parent_gen); } -static int nilfs_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp, - int connectable) +static int nilfs_encode_fh(struct inode *inode, __u32 *fh, int *lenp, + struct inode *parent) { struct nilfs_fid *fid = (struct nilfs_fid *)fh; - struct inode *inode = dentry->d_inode; struct nilfs_root *root = NILFS_I(inode)->i_root; int type; - if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE || - (connectable && *lenp < NILFS_FID_SIZE_CONNECTABLE)) + if (parent && *lenp < NILFS_FID_SIZE_CONNECTABLE) { + *lenp = NILFS_FID_SIZE_CONNECTABLE; + return 255; + } + if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE) { + *lenp = NILFS_FID_SIZE_NON_CONNECTABLE; return 255; + } fid->cno = root->cno; fid->ino = inode->i_ino; fid->gen = inode->i_generation; - if (connectable && !S_ISDIR(inode->i_mode)) { - struct inode *parent; - - spin_lock(&dentry->d_lock); - parent = dentry->d_parent->d_inode; + if (parent) { fid->parent_ino = parent->i_ino; fid->parent_gen = parent->i_generation; - spin_unlock(&dentry->d_lock); - type = FILEID_NILFS_WITH_PARENT; *lenp = NILFS_FID_SIZE_CONNECTABLE; } else { diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 745db42..322216a 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -177,21 +177,23 @@ bail: return parent; } -static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, - int connectable) +static int ocfs2_encode_fh(struct inode *inode, u32 *fh_in, int *max_len, + struct inode *parent) { - struct inode *inode = dentry->d_inode; int len = *max_len; int type = 1; u64 blkno; u32 generation; __le32 *fh = (__force __le32 *) fh_in; +#ifdef TRACE_HOOKS_ARE_NOT_BRAINDEAD_IN_YOUR_OPINION +#error "You go ahead and fix that mess, then. Somehow" trace_ocfs2_encode_fh_begin(dentry, dentry->d_name.len, dentry->d_name.name, fh, len, connectable); +#endif - if (connectable && (len < 6)) { + if (parent && (len < 6)) { *max_len = 6; type = 255; goto bail; @@ -211,12 +213,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, fh[1] = cpu_to_le32((u32)(blkno & 0xffffffff)); fh[2] = cpu_to_le32(generation); - if (connectable && !S_ISDIR(inode->i_mode)) { - struct inode *parent; - - spin_lock(&dentry->d_lock); - - parent = dentry->d_parent->d_inode; + if (parent) { blkno = OCFS2_I(parent)->ip_blkno; generation = parent->i_generation; @@ -224,8 +221,6 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len, fh[4] = cpu_to_le32((u32)(blkno & 0xffffffff)); fh[5] = cpu_to_le32(generation); - spin_unlock(&dentry->d_lock); - len = 6; type = 2; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 59d0687..a6d4268 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1592,13 +1592,12 @@ struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, (fh_type == 6) ? fid->raw[5] : 0); } -int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, - int need_parent) +int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, + struct inode *parent) { - struct inode *inode = dentry->d_inode; int maxlen = *lenp; - if (need_parent && (maxlen < 5)) { + if (parent && (maxlen < 5)) { *lenp = 5; return 255; } else if (maxlen < 3) { @@ -1610,20 +1609,15 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); data[2] = inode->i_generation; *lenp = 3; - /* no room for directory info? return what we've stored so far */ - if (maxlen < 5 || !need_parent) - return 3; - - spin_lock(&dentry->d_lock); - inode = dentry->d_parent->d_inode; - data[3] = inode->i_ino; - data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id); - *lenp = 5; - if (maxlen >= 6) { - data[5] = inode->i_generation; - *lenp = 6; - } - spin_unlock(&dentry->d_lock); + if (parent) { + data[3] = parent->i_ino; + data[4] = le32_to_cpu(INODE_PKEY(parent)->k_dir_id); + *lenp = 5; + if (maxlen >= 6) { + data[5] = parent->i_generation; + *lenp = 6; + } + } return *lenp; } diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index a59d271..14a4f9d 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -2611,8 +2611,8 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); -int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp, - int connectable); +int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp, + struct inode *parent); int reiserfs_truncate_file(struct inode *, int update_timestamps); void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset, diff --git a/fs/udf/namei.c b/fs/udf/namei.c index a165c66..1802417 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1260,16 +1260,15 @@ static struct dentry *udf_fh_to_parent(struct super_block *sb, fid->udf.parent_partref, fid->udf.parent_generation); } -static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, - int connectable) +static int udf_encode_fh(struct inode *inode, __u32 *fh, int *lenp, + struct inode *parent) { int len = *lenp; - struct inode *inode = de->d_inode; struct kernel_lb_addr location = UDF_I(inode)->i_location; struct fid *fid = (struct fid *)fh; int type = FILEID_UDF_WITHOUT_PARENT; - if (connectable && (len < 5)) { + if (parent && (len < 5)) { *lenp = 5; return 255; } else if (len < 3) { @@ -1282,14 +1281,11 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp, fid->udf.partref = location.partitionReferenceNum; fid->udf.generation = inode->i_generation; - if (connectable && !S_ISDIR(inode->i_mode)) { - spin_lock(&de->d_lock); - inode = de->d_parent->d_inode; - location = UDF_I(inode)->i_location; + if (parent) { + location = UDF_I(parent)->i_location; fid->udf.parent_block = location.logicalBlockNum; fid->udf.parent_partref = location.partitionReferenceNum; fid->udf.parent_generation = inode->i_generation; - spin_unlock(&de->d_lock); *lenp = 5; type = FILEID_UDF_WITH_PARENT; } diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 2d25d19..4267922 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -52,19 +52,18 @@ static int xfs_fileid_length(int fileid_type) STATIC int xfs_fs_encode_fh( - struct dentry *dentry, - __u32 *fh, - int *max_len, - int connectable) + struct inode *inode, + __u32 *fh, + int *max_len, + struct inode *parent) { struct fid *fid = (struct fid *)fh; struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh; - struct inode *inode = dentry->d_inode; int fileid_type; int len; /* Directories don't need their parent encoded, they have ".." */ - if (S_ISDIR(inode->i_mode) || !connectable) + if (!parent) fileid_type = FILEID_INO32_GEN; else fileid_type = FILEID_INO32_GEN_PARENT; @@ -96,20 +95,16 @@ xfs_fs_encode_fh( switch (fileid_type) { case FILEID_INO32_GEN_PARENT: - spin_lock(&dentry->d_lock); - fid->i32.parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino; - fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation; - spin_unlock(&dentry->d_lock); + fid->i32.parent_ino = XFS_I(parent)->i_ino; + fid->i32.parent_gen = parent->i_generation; /*FALLTHRU*/ case FILEID_INO32_GEN: fid->i32.ino = XFS_I(inode)->i_ino; fid->i32.gen = inode->i_generation; break; case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG: - spin_lock(&dentry->d_lock); - fid64->parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino; - fid64->parent_gen = dentry->d_parent->d_inode->i_generation; - spin_unlock(&dentry->d_lock); + fid64->parent_ino = XFS_I(parent)->i_ino; + fid64->parent_gen = parent->i_generation; /*FALLTHRU*/ case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG: fid64->ino = XFS_I(inode)->i_ino; diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 3a4cef5..12291a7 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -165,8 +165,8 @@ struct fid { */ struct export_operations { - int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, - int connectable); + int (*encode_fh)(struct inode *inode, __u32 *fh, int *max_len, + struct inode *parent); struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid, int fh_len, int fh_type); struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid, diff --git a/mm/cleancache.c b/mm/cleancache.c index 5646c74..32e6f41 100644 --- a/mm/cleancache.c +++ b/mm/cleancache.c @@ -80,7 +80,7 @@ EXPORT_SYMBOL(__cleancache_init_shared_fs); static int cleancache_get_key(struct inode *inode, struct cleancache_filekey *key) { - int (*fhfn)(struct dentry *, __u32 *fh, int *, int); + int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *); int len = 0, maxlen = CLEANCACHE_KEY_MAX; struct super_block *sb = inode->i_sb; @@ -88,9 +88,7 @@ static int cleancache_get_key(struct inode *inode, if (sb->s_export_op != NULL) { fhfn = sb->s_export_op->encode_fh; if (fhfn) { - struct dentry d; - d.d_inode = inode; - len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0); + len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL); if (len <= 0 || len == 255) return -1; if (maxlen > CLEANCACHE_KEY_MAX) diff --git a/mm/shmem.c b/mm/shmem.c index be5af34..3711422 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2033,11 +2033,9 @@ static struct dentry *shmem_fh_to_dentry(struct super_block *sb, return dentry; } -static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, - int connectable) +static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len, + struct inode *parent) { - struct inode *inode = dentry->d_inode; - if (*len < 3) { *len = 3; return 255; -- cgit v0.10.2 From c862868bb455694704c255481369c40d7185eb25 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 5 Apr 2012 12:07:36 -0700 Subject: ceph: move encode_fh to new API Use parent_inode has a flag for whether nfsd wants a connectable fh, but generate one opportunistically so that we can take advantage of the additional info in there. Signed-off-by: Sage Weil Signed-off-by: Al Viro diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 4f9234c..8e1b60e 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -40,38 +40,49 @@ struct ceph_nfs_confh { u32 parent_name_hash; } __attribute__ ((packed)); -static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, - int connectable) +/* + * The presence of @parent_inode here tells us whether NFS wants a + * connectable file handle. However, we want to make a connectionable + * file handle unconditionally so that the MDS gets as much of a hint + * as possible. That means we only use @parent_dentry to indicate + * whether nfsd wants a connectable fh, and whether we should indicate + * failure from a too-small @max_len. + */ +static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, + struct inode *parent_inode) { int type; struct ceph_nfs_fh *fh = (void *)rawfh; struct ceph_nfs_confh *cfh = (void *)rawfh; - struct dentry *parent; - struct inode *inode = dentry->d_inode; int connected_handle_length = sizeof(*cfh)/4; int handle_length = sizeof(*fh)/4; + struct dentry *dentry = d_find_alias(inode); + struct dentry *parent; /* don't re-export snaps */ if (ceph_snap(inode) != CEPH_NOSNAP) return -EINVAL; - spin_lock(&dentry->d_lock); - parent = dentry->d_parent; - if (*max_len >= connected_handle_length) { + /* if we found an alias, generate a connectable fh */ + if (*max_len >= connected_handle_length && dentry) { dout("encode_fh %p connectable\n", dentry); - cfh->ino = ceph_ino(dentry->d_inode); + spin_lock(&dentry->d_lock); + parent = dentry->d_parent; + cfh->ino = ceph_ino(inode); cfh->parent_ino = ceph_ino(parent->d_inode); cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode, dentry); *max_len = connected_handle_length; type = 2; + spin_unlock(&dentry->d_lock); } else if (*max_len >= handle_length) { - if (connectable) { + if (parent_inode) { + /* nfsd wants connectable */ *max_len = connected_handle_length; type = 255; } else { dout("encode_fh %p\n", dentry); - fh->ino = ceph_ino(dentry->d_inode); + fh->ino = ceph_ino(inode); *max_len = handle_length; type = 1; } @@ -79,7 +90,6 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, *max_len = handle_length; type = 255; } - spin_unlock(&dentry->d_lock); return type; } @@ -247,9 +257,7 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb, } const struct export_operations ceph_export_ops = { -#ifdef CEPH_BREAKAGE_FIXED .encode_fh = ceph_encode_fh, -#endif .fh_to_dentry = ceph_fh_to_dentry, .fh_to_parent = ceph_fh_to_parent, }; -- cgit v0.10.2 From cc1dad7183e4cb7f5d313b6942f2059fc0eabab6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2012 19:40:47 -0400 Subject: selinuxfs snprintf() misuses a) %d does _not_ produce a page worth of output b) snprintf() doesn't return negatives - it used to in old glibc, but that's the kernel... Signed-off-by: Al Viro diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 4e93f9e..3ad2902 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1259,12 +1259,8 @@ static int sel_make_bools(void) if (!inode) goto out; - ret = -EINVAL; - len = snprintf(page, PAGE_SIZE, "/%s/%s", BOOL_DIR_NAME, names[i]); - if (len < 0) - goto out; - ret = -ENAMETOOLONG; + len = snprintf(page, PAGE_SIZE, "/%s/%s", BOOL_DIR_NAME, names[i]); if (len >= PAGE_SIZE) goto out; @@ -1557,19 +1553,10 @@ static inline u32 sel_ino_to_perm(unsigned long ino) static ssize_t sel_read_class(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - ssize_t rc, len; - char *page; unsigned long ino = file->f_path.dentry->d_inode->i_ino; - - page = (char *)__get_free_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - - len = snprintf(page, PAGE_SIZE, "%d", sel_ino_to_class(ino)); - rc = simple_read_from_buffer(buf, count, ppos, page, len); - free_page((unsigned long)page); - - return rc; + char res[TMPBUFLEN]; + ssize_t len = snprintf(res, sizeof(res), "%d", sel_ino_to_class(ino)); + return simple_read_from_buffer(buf, count, ppos, res, len); } static const struct file_operations sel_class_ops = { @@ -1580,19 +1567,10 @@ static const struct file_operations sel_class_ops = { static ssize_t sel_read_perm(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - ssize_t rc, len; - char *page; unsigned long ino = file->f_path.dentry->d_inode->i_ino; - - page = (char *)__get_free_page(GFP_KERNEL); - if (!page) - return -ENOMEM; - - len = snprintf(page, PAGE_SIZE, "%d", sel_ino_to_perm(ino)); - rc = simple_read_from_buffer(buf, count, ppos, page, len); - free_page((unsigned long)page); - - return rc; + char res[TMPBUFLEN]; + ssize_t len = snprintf(res, sizeof(res), "%d", sel_ino_to_perm(ino)); + return simple_read_from_buffer(buf, count, ppos, res, len); } static const struct file_operations sel_perm_ops = { -- cgit v0.10.2 From af569596a9b85626564149c5c4c0c17d05baa2da Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 2 Apr 2012 20:02:53 -0400 Subject: kill v9fs_dentry_from_dir_inode() In *all* callers we have a dentry of child of that directory. Just use ->d_parent of that one, for fsck sake... Signed-off-by: Al Viro diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index a1e6c99..e3dd2a1 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -68,24 +68,6 @@ static gid_t v9fs_get_fsgid_for_create(struct inode *dir_inode) return current_fsgid(); } -/** - * v9fs_dentry_from_dir_inode - helper function to get the dentry from - * dir inode. - * - */ - -static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode) -{ - struct dentry *dentry; - - spin_lock(&inode->i_lock); - /* Directory should have only one entry. */ - BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry)); - dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias); - spin_unlock(&inode->i_lock); - return dentry; -} - static int v9fs_test_inode_dotl(struct inode *inode, void *data) { struct v9fs_inode *v9inode = V9FS_I(inode); @@ -415,7 +397,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, if (dir->i_mode & S_ISGID) omode |= S_ISGID; - dir_dentry = v9fs_dentry_from_dir_inode(dir); + dir_dentry = dentry->d_parent; dfid = v9fs_fid_lookup(dir_dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); @@ -793,7 +775,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, dir->i_ino, old_dentry->d_name.name, dentry->d_name.name); v9ses = v9fs_inode2v9ses(dir); - dir_dentry = v9fs_dentry_from_dir_inode(dir); + dir_dentry = dentry->d_parent; dfid = v9fs_fid_lookup(dir_dentry); if (IS_ERR(dfid)) return PTR_ERR(dfid); @@ -858,7 +840,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode, return -EINVAL; v9ses = v9fs_inode2v9ses(dir); - dir_dentry = v9fs_dentry_from_dir_inode(dir); + dir_dentry = dentry->d_parent; dfid = v9fs_fid_lookup(dir_dentry); if (IS_ERR(dfid)) { err = PTR_ERR(dfid); -- cgit v0.10.2 From 66f8f50920472f9b6d0a797a29dc8a8ada0b24c3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 6 Apr 2012 01:40:50 -0400 Subject: affs: bury unused macros ... unused since 2.4.4. Signed-off-by: Al Viro diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 45a0ce4..1fceb32 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -18,14 +18,6 @@ #define AFFS_GET_HASHENTRY(data,hashkey) be32_to_cpu(((struct dir_front *)data)->hashtable[hashkey]) #define AFFS_BLOCK(sb, bh, blk) (AFFS_HEAD(bh)->table[AFFS_SB(sb)->s_hashsize-1-(blk)]) -#ifdef __LITTLE_ENDIAN -#define BO_EXBITS 0x18UL -#elif defined(__BIG_ENDIAN) -#define BO_EXBITS 0x00UL -#else -#error Endianness must be known for affs to work. -#endif - #define AFFS_HEAD(bh) ((struct affs_head *)(bh)->b_data) #define AFFS_TAIL(sb, bh) ((struct affs_tail *)((bh)->b_data+(sb)->s_blocksize-sizeof(struct affs_tail))) #define AFFS_ROOT_HEAD(bh) ((struct affs_root_head *)(bh)->b_data) -- cgit v0.10.2 From 8515841086d14594b24cdc8febdcc7fd1bbc313e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 Apr 2012 18:47:13 -0400 Subject: ocfs2: trivial endianness misannotations Signed-off-by: Al Viro diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c index 3a3ed4b..fbec0be 100644 --- a/fs/ocfs2/dlm/dlmast.c +++ b/fs/ocfs2/dlm/dlmast.c @@ -293,7 +293,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data, struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf; char *name; struct list_head *iter, *head=NULL; - u64 cookie; + __be64 cookie; u32 flags; u8 node; diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index a5952ce..de854cc 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -679,7 +679,7 @@ struct dlm_query_join_packet { }; union dlm_query_join_response { - u32 intval; + __be32 intval; struct dlm_query_join_packet packet; }; @@ -755,8 +755,8 @@ struct dlm_query_region { struct dlm_node_info { u8 ni_nodenum; u8 pad1; - u16 ni_ipv4_port; - u32 ni_ipv4_address; + __be16 ni_ipv4_port; + __be32 ni_ipv4_address; }; struct dlm_query_nodeinfo { diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 92f2ead..9e89d70 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -818,7 +818,7 @@ static void dlm_query_join_packet_to_wire(struct dlm_query_join_packet *packet, union dlm_query_join_response response; response.packet = *packet; - *wire = cpu_to_be32(response.intval); + *wire = be32_to_cpu(response.intval); } static void dlm_query_join_wire_to_packet(u32 wire, -- cgit v0.10.2 From f6a5690324d5ab9c33bbc0a6b4cc59c7fa34eeec Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 Apr 2012 19:52:19 -0400 Subject: ocfs2: deal with __user misannotations Signed-off-by: Al Viro diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index a1a1bfd..d96f7f8 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -864,7 +864,7 @@ int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, if (status) break; - reqp = (struct ocfs2_info_request *)(unsigned long)req_addr; + reqp = (struct ocfs2_info_request __user *)(unsigned long)req_addr; if (!reqp) { status = -EINVAL; goto bail; @@ -888,9 +888,11 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) struct ocfs2_space_resv sr; struct ocfs2_new_group_input input; struct reflink_arguments args; - const char *old_path, *new_path; + const char __user *old_path; + const char __user *new_path; bool preserve; struct ocfs2_info info; + void __user *argp = (void __user *)arg; switch (cmd) { case OCFS2_IOC_GETFLAGS: @@ -937,17 +939,15 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return ocfs2_group_add(inode, &input); case OCFS2_IOC_REFLINK: - if (copy_from_user(&args, (struct reflink_arguments *)arg, - sizeof(args))) + if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; - old_path = (const char *)(unsigned long)args.old_path; - new_path = (const char *)(unsigned long)args.new_path; + old_path = (const char __user *)(unsigned long)args.old_path; + new_path = (const char __user *)(unsigned long)args.new_path; preserve = (args.preserve != 0); return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); case OCFS2_IOC_INFO: - if (copy_from_user(&info, (struct ocfs2_info __user *)arg, - sizeof(struct ocfs2_info))) + if (copy_from_user(&info, argp, sizeof(struct ocfs2_info))) return -EFAULT; return ocfs2_info_handle(inode, &info, 0); @@ -960,22 +960,20 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&range, (struct fstrim_range *)arg, - sizeof(range))) + if (copy_from_user(&range, argp, sizeof(range))) return -EFAULT; ret = ocfs2_trim_fs(sb, &range); if (ret < 0) return ret; - if (copy_to_user((struct fstrim_range *)arg, &range, - sizeof(range))) + if (copy_to_user(argp, &range, sizeof(range))) return -EFAULT; return 0; } case OCFS2_IOC_MOVE_EXT: - return ocfs2_ioctl_move_extents(filp, (void __user *)arg); + return ocfs2_ioctl_move_extents(filp, argp); default: return -ENOTTY; } @@ -988,6 +986,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) struct reflink_arguments args; struct inode *inode = file->f_path.dentry->d_inode; struct ocfs2_info info; + void __user *argp = (void __user *)arg; switch (cmd) { case OCFS2_IOC32_GETFLAGS: @@ -1006,16 +1005,14 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) case FITRIM: break; case OCFS2_IOC_REFLINK: - if (copy_from_user(&args, (struct reflink_arguments *)arg, - sizeof(args))) + if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; preserve = (args.preserve != 0); return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), compat_ptr(args.new_path), preserve); case OCFS2_IOC_INFO: - if (copy_from_user(&info, (struct ocfs2_info __user *)arg, - sizeof(struct ocfs2_info))) + if (copy_from_user(&info, argp, sizeof(struct ocfs2_info))) return -EFAULT; return ocfs2_info_handle(inode, &info, 1); diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index b1e3fce..6083432 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -1082,8 +1082,7 @@ int ocfs2_ioctl_move_extents(struct file *filp, void __user *argp) context->file = filp; if (argp) { - if (copy_from_user(&range, (struct ocfs2_move_extents *)argp, - sizeof(range))) { + if (copy_from_user(&range, argp, sizeof(range))) { status = -EFAULT; goto out; } @@ -1138,8 +1137,7 @@ out: * length and new_offset even if failure happens somewhere. */ if (argp) { - if (copy_to_user((struct ocfs2_move_extents *)argp, &range, - sizeof(range))) + if (copy_to_user(argp, &range, sizeof(range))) status = -EFAULT; } -- cgit v0.10.2 From 1db5df98faaf7aa6c25bc7d9703342d13678452a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 12 Apr 2012 19:58:53 -0400 Subject: ocfs2: kill endianness abuses in blockcheck.c ocfs2_block_check is for little-endian contents; if we just want to its fields converted to host-endian in a couple of functions, just put those values into local u32 and u16... Signed-off-by: Al Viro diff --git a/fs/ocfs2/blockcheck.c b/fs/ocfs2/blockcheck.c index c7ee03c..0725e60 100644 --- a/fs/ocfs2/blockcheck.c +++ b/fs/ocfs2/blockcheck.c @@ -422,45 +422,46 @@ int ocfs2_block_check_validate(void *data, size_t blocksize, struct ocfs2_blockcheck_stats *stats) { int rc = 0; - struct ocfs2_block_check check; + u32 bc_crc32e; + u16 bc_ecc; u32 crc, ecc; ocfs2_blockcheck_inc_check(stats); - check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); - check.bc_ecc = le16_to_cpu(bc->bc_ecc); + bc_crc32e = le32_to_cpu(bc->bc_crc32e); + bc_ecc = le16_to_cpu(bc->bc_ecc); memset(bc, 0, sizeof(struct ocfs2_block_check)); /* Fast path - if the crc32 validates, we're good to go */ crc = crc32_le(~0, data, blocksize); - if (crc == check.bc_crc32e) + if (crc == bc_crc32e) goto out; ocfs2_blockcheck_inc_failure(stats); mlog(ML_ERROR, "CRC32 failed: stored: 0x%x, computed 0x%x. Applying ECC.\n", - (unsigned int)check.bc_crc32e, (unsigned int)crc); + (unsigned int)bc_crc32e, (unsigned int)crc); /* Ok, try ECC fixups */ ecc = ocfs2_hamming_encode_block(data, blocksize); - ocfs2_hamming_fix_block(data, blocksize, ecc ^ check.bc_ecc); + ocfs2_hamming_fix_block(data, blocksize, ecc ^ bc_ecc); /* And check the crc32 again */ crc = crc32_le(~0, data, blocksize); - if (crc == check.bc_crc32e) { + if (crc == bc_crc32e) { ocfs2_blockcheck_inc_recover(stats); goto out; } mlog(ML_ERROR, "Fixed CRC32 failed: stored: 0x%x, computed 0x%x\n", - (unsigned int)check.bc_crc32e, (unsigned int)crc); + (unsigned int)bc_crc32e, (unsigned int)crc); rc = -EIO; out: - bc->bc_crc32e = cpu_to_le32(check.bc_crc32e); - bc->bc_ecc = cpu_to_le16(check.bc_ecc); + bc->bc_crc32e = cpu_to_le32(bc_crc32e); + bc->bc_ecc = cpu_to_le16(bc_ecc); return rc; } @@ -528,7 +529,8 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, struct ocfs2_blockcheck_stats *stats) { int i, rc = 0; - struct ocfs2_block_check check; + u32 bc_crc32e; + u16 bc_ecc; u32 crc, ecc, fix; BUG_ON(nr < 0); @@ -538,21 +540,21 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, ocfs2_blockcheck_inc_check(stats); - check.bc_crc32e = le32_to_cpu(bc->bc_crc32e); - check.bc_ecc = le16_to_cpu(bc->bc_ecc); + bc_crc32e = le32_to_cpu(bc->bc_crc32e); + bc_ecc = le16_to_cpu(bc->bc_ecc); memset(bc, 0, sizeof(struct ocfs2_block_check)); /* Fast path - if the crc32 validates, we're good to go */ for (i = 0, crc = ~0; i < nr; i++) crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); - if (crc == check.bc_crc32e) + if (crc == bc_crc32e) goto out; ocfs2_blockcheck_inc_failure(stats); mlog(ML_ERROR, "CRC32 failed: stored: %u, computed %u. Applying ECC.\n", - (unsigned int)check.bc_crc32e, (unsigned int)crc); + (unsigned int)bc_crc32e, (unsigned int)crc); /* Ok, try ECC fixups */ for (i = 0, ecc = 0; i < nr; i++) { @@ -565,7 +567,7 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, bhs[i]->b_size * 8, bhs[i]->b_size * 8 * i); } - fix = ecc ^ check.bc_ecc; + fix = ecc ^ bc_ecc; for (i = 0; i < nr; i++) { /* * Try the fix against each buffer. It will only affect @@ -578,19 +580,19 @@ int ocfs2_block_check_validate_bhs(struct buffer_head **bhs, int nr, /* And check the crc32 again */ for (i = 0, crc = ~0; i < nr; i++) crc = crc32_le(crc, bhs[i]->b_data, bhs[i]->b_size); - if (crc == check.bc_crc32e) { + if (crc == bc_crc32e) { ocfs2_blockcheck_inc_recover(stats); goto out; } mlog(ML_ERROR, "Fixed CRC32 failed: stored: %u, computed %u\n", - (unsigned int)check.bc_crc32e, (unsigned int)crc); + (unsigned int)bc_crc32e, (unsigned int)crc); rc = -EIO; out: - bc->bc_crc32e = cpu_to_le32(check.bc_crc32e); - bc->bc_ecc = cpu_to_le16(check.bc_ecc); + bc->bc_crc32e = cpu_to_le32(bc_crc32e); + bc->bc_ecc = cpu_to_le16(bc_ecc); return rc; } -- cgit v0.10.2 From 528c032764f4d3c6cb5f5ece090d9d5882655982 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Apr 2012 11:03:55 -0400 Subject: btrfs: trivial endianness annotations Signed-off-by: Al Viro diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 202008e..eb45350 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -75,7 +75,8 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, return ERR_PTR(-ENOENT); } - inode->i_mapping->flags &= ~__GFP_FS; + mapping_set_gfp_mask(inode->i_mapping, + mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); return inode; } @@ -365,7 +366,7 @@ static int io_ctl_prepare_pages(struct io_ctl *io_ctl, struct inode *inode, static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) { - u64 *val; + __le64 *val; io_ctl_map_page(io_ctl, 1); @@ -388,7 +389,7 @@ static void io_ctl_set_generation(struct io_ctl *io_ctl, u64 generation) static int io_ctl_check_generation(struct io_ctl *io_ctl, u64 generation) { - u64 *gen; + __le64 *gen; /* * Skip the crc area. If we don't check crcs then we just have a 64bit diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index 12f5147..ad993bc 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c @@ -95,7 +95,7 @@ EXPORT_SYMBOL(ulist_reinit); * * The allocated ulist will be returned in an initialized state. */ -struct ulist *ulist_alloc(unsigned long gfp_mask) +struct ulist *ulist_alloc(gfp_t gfp_mask) { struct ulist *ulist = kmalloc(sizeof(*ulist), gfp_mask); @@ -144,7 +144,7 @@ EXPORT_SYMBOL(ulist_free); * unaltered. */ int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, - unsigned long gfp_mask) + gfp_t gfp_mask) { int i; diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h index 2e25dec..ad85b0e 100644 --- a/fs/btrfs/ulist.h +++ b/fs/btrfs/ulist.h @@ -59,10 +59,10 @@ struct ulist { void ulist_init(struct ulist *ulist); void ulist_fini(struct ulist *ulist); void ulist_reinit(struct ulist *ulist); -struct ulist *ulist_alloc(unsigned long gfp_mask); +struct ulist *ulist_alloc(gfp_t gfp_mask); void ulist_free(struct ulist *ulist); int ulist_add(struct ulist *ulist, u64 val, unsigned long aux, - unsigned long gfp_mask); + gfp_t gfp_mask); struct ulist_node *ulist_next(struct ulist *ulist, struct ulist_node *prev); #endif -- cgit v0.10.2 From de5e2b36289e6c81c3f7dcb9eef38d78de1f8b5c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 13 Apr 2012 01:24:37 -0400 Subject: hpfs: endianness bugs a couple of le32 and le16 used with wrong le..._to_cpu(), plus idiotic use of le32_to_cpu() on 1-bit bitfield Signed-off-by: Al Viro diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c index d8b84d1..cd098e3 100644 --- a/fs/hpfs/ea.c +++ b/fs/hpfs/ea.c @@ -246,7 +246,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, if (le16_to_cpu(fnode->ea_offs) < 0xc4 || le16_to_cpu(fnode->ea_offs) + le16_to_cpu(fnode->acl_size_s) + le16_to_cpu(fnode->ea_size_s) > 0x200) { hpfs_error(s, "fnode %08lx: ea_offs == %03x, ea_size_s == %03x", (unsigned long)inode->i_ino, - le32_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s)); + le16_to_cpu(fnode->ea_offs), le16_to_cpu(fnode->ea_size_s)); return; } if ((le16_to_cpu(fnode->ea_size_s) || !le32_to_cpu(fnode->ea_size_l)) && diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 54f6ecc..08e85b0 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -572,7 +572,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) mark_buffer_dirty(bh2); } - if (le32_to_cpu(spareblock->hotfixes_used) || le32_to_cpu(spareblock->n_spares_used)) { + if (spareblock->hotfixes_used || le32_to_cpu(spareblock->n_spares_used)) { if (errs >= 2) { printk("HPFS: Hotfixes not supported here, try chkdsk\n"); mark_dirty(s, 0); @@ -645,7 +645,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) root->i_mtime.tv_nsec = 0; root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date)); root->i_ctime.tv_nsec = 0; - hpfs_i(root)->i_ea_size = le16_to_cpu(de->ea_size); + hpfs_i(root)->i_ea_size = le32_to_cpu(de->ea_size); hpfs_i(root)->i_parent_dir = root->i_ino; if (root->i_size == -1) root->i_size = 2048; -- cgit v0.10.2 From 185553b22436fe754f4ae8ec11344e822bb83717 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 17 Apr 2012 17:03:25 -0700 Subject: fs: fix inode.c kernel-doc warnings Fix kernel-doc warnings in fs/inode.c: Warning(fs/inode.c:1493): No description found for parameter 'path' Warning(fs/inode.c:1493): Excess function parameter 'mnt' description in 'touch_atime' Warning(fs/inode.c:1493): Excess function parameter 'dentry' description in 'touch_atime' Signed-off-by: Randy Dunlap Signed-off-by: Al Viro diff --git a/fs/inode.c b/fs/inode.c index 6bc8761..183ddd6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1489,8 +1489,7 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, /** * touch_atime - update the access time - * @mnt: mount the inode is accessed on - * @dentry: dentry accessed + * @path: the &struct path to update * * Update the accessed time on an inode and mark it for writeback. * This function automatically handles read only file systems and media, -- cgit v0.10.2 From 4085e155b14a89ee36f7bfc5bd07294b0c34b0e6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 6 Apr 2012 13:21:09 -0400 Subject: hpfs: get rid of bitfields endianness wanking in extended_attribute Signed-off-by: Al Viro diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c index 08b503e..3cb4d9c 100644 --- a/fs/hpfs/anode.c +++ b/fs/hpfs/anode.c @@ -483,8 +483,8 @@ void hpfs_remove_fnode(struct super_block *s, fnode_secno fno) else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno)); ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) - if (ea->indirect) - hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); + if (ea_indirect(ea)) + hpfs_ea_remove(s, ea_sec(ea), ea_in_anode(ea), ea_len(ea)); hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l)); brelse(bh); hpfs_free_sectors(s, fno, 1); diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c index cd098e3..7cd0092 100644 --- a/fs/hpfs/ea.c +++ b/fs/hpfs/ea.c @@ -23,15 +23,15 @@ void hpfs_ea_ext_remove(struct super_block *s, secno a, int ano, unsigned len) return; } if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return; - if (ea->indirect) { + if (ea_indirect(ea)) { if (ea_valuelen(ea) != 8) { - hpfs_error(s, "ea->indirect set while ea->valuelen!=8, %s %08x, pos %08x", + hpfs_error(s, "ea_indirect(ea) set while ea->valuelen!=8, %s %08x, pos %08x", ano ? "anode" : "sectors", a, pos); return; } if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 9, ex+4)) return; - hpfs_ea_remove(s, ea_sec(ea), ea->anode, ea_len(ea)); + hpfs_ea_remove(s, ea_sec(ea), ea_in_anode(ea), ea_len(ea)); } pos += ea->namelen + ea_valuelen(ea) + 5; } @@ -81,7 +81,7 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, struct extended_attribute *ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) if (!strcmp(ea->name, key)) { - if (ea->indirect) + if (ea_indirect(ea)) goto indirect; if (ea_valuelen(ea) >= size) return -EINVAL; @@ -101,10 +101,10 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, return -EIO; } if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return -EIO; - if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea->indirect ? 8 : 0), ex + 4)) + if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea_indirect(ea) ? 8 : 0), ex + 4)) return -EIO; if (!strcmp(ea->name, key)) { - if (ea->indirect) + if (ea_indirect(ea)) goto indirect; if (ea_valuelen(ea) >= size) return -EINVAL; @@ -119,7 +119,7 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, indirect: if (ea_len(ea) >= size) return -EINVAL; - if (hpfs_ea_read(s, ea_sec(ea), ea->anode, 0, ea_len(ea), buf)) + if (hpfs_ea_read(s, ea_sec(ea), ea_in_anode(ea), 0, ea_len(ea), buf)) return -EIO; buf[ea_len(ea)] = 0; return 0; @@ -136,8 +136,8 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si struct extended_attribute *ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) if (!strcmp(ea->name, key)) { - if (ea->indirect) - return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); + if (ea_indirect(ea)) + return get_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), *size = ea_len(ea)); if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { printk("HPFS: out of memory for EA\n"); return NULL; @@ -159,11 +159,11 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si return NULL; } if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return NULL; - if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea->indirect ? 8 : 0), ex + 4)) + if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea_indirect(ea) ? 8 : 0), ex + 4)) return NULL; if (!strcmp(ea->name, key)) { - if (ea->indirect) - return get_indirect_ea(s, ea->anode, ea_sec(ea), *size = ea_len(ea)); + if (ea_indirect(ea)) + return get_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), *size = ea_len(ea)); if (!(ret = kmalloc((*size = ea_valuelen(ea)) + 1, GFP_NOFS))) { printk("HPFS: out of memory for EA\n"); return NULL; @@ -199,9 +199,9 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, struct extended_attribute *ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) if (!strcmp(ea->name, key)) { - if (ea->indirect) { + if (ea_indirect(ea)) { if (ea_len(ea) == size) - set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); + set_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), data, size); } else if (ea_valuelen(ea) == size) { memcpy(ea_data(ea), data, size); } @@ -220,12 +220,12 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, return; } if (hpfs_ea_read(s, a, ano, pos, 4, ex)) return; - if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea->indirect ? 8 : 0), ex + 4)) + if (hpfs_ea_read(s, a, ano, pos + 4, ea->namelen + 1 + (ea_indirect(ea) ? 8 : 0), ex + 4)) return; if (!strcmp(ea->name, key)) { - if (ea->indirect) { + if (ea_indirect(ea)) { if (ea_len(ea) == size) - set_indirect_ea(s, ea->anode, ea_sec(ea), data, size); + set_indirect_ea(s, ea_in_anode(ea), ea_sec(ea), data, size); } else { if (ea_valuelen(ea) == size) diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index 8b0650a..ca90bde 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -528,32 +528,23 @@ struct anode run, or in multiple runs. Flags in the fnode tell whether the EA list is immediate, in a single run, or in multiple runs. */ +enum {EA_indirect = 1, EA_anode = 2, EA_needea = 128 }; struct extended_attribute { -#ifdef __LITTLE_ENDIAN - u8 indirect: 1; /* 1 -> value gives sector number + u8 flags; /* bit 0 set -> value gives sector number where real value starts */ - u8 anode: 1; /* 1 -> sector is an anode - that points to fragmented value */ - u8 flag23456: 5; - u8 needea: 1; /* required ea */ -#else - u8 needea: 1; /* required ea */ - u8 flag23456: 5; - u8 anode: 1; /* 1 -> sector is an anode + /* bit 1 set -> sector is an anode that points to fragmented value */ - u8 indirect: 1; /* 1 -> value gives sector number - where real value starts */ -#endif + /* bit 7 set -> required ea */ u8 namelen; /* length of name, bytes */ u8 valuelen_lo; /* length of value, bytes */ u8 valuelen_hi; /* length of value, bytes */ - u8 name[0]; + u8 name[]; /* u8 name[namelen]; ascii attrib name u8 nul; terminating '\0', not counted u8 value[valuelen]; value, arbitrary - if this.indirect, valuelen is 8 and the value is + if this.flags & 1, valuelen is 8 and the value is u32 length; real length of value, bytes secno secno; sector address where it starts if this.anode, the above sector number is the root of an anode tree @@ -561,6 +552,16 @@ struct extended_attribute */ }; +static inline bool ea_indirect(struct extended_attribute *ea) +{ + return ea->flags & EA_indirect; +} + +static inline bool ea_in_anode(struct extended_attribute *ea) +{ + return ea->flags & EA_anode; +} + /* Local Variables: comment-column: 40 -- cgit v0.10.2 From c4c995430a94e7d94526fcb347c4ba4b2ae82500 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 6 Apr 2012 14:30:07 -0400 Subject: hpfs: get rid of bitfields in struct fnode Signed-off-by: Al Viro diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c index 3cb4d9c..ec5f8b9 100644 --- a/fs/hpfs/anode.c +++ b/fs/hpfs/anode.c @@ -479,13 +479,13 @@ void hpfs_remove_fnode(struct super_block *s, fnode_secno fno) struct extended_attribute *ea; struct extended_attribute *ea_end; if (!(fnode = hpfs_map_fnode(s, fno, &bh))) return; - if (!fnode->dirflag) hpfs_remove_btree(s, &fnode->btree); + if (!fnode_is_dir(fnode)) hpfs_remove_btree(s, &fnode->btree); else hpfs_remove_dtree(s, le32_to_cpu(fnode->u.external[0].disk_secno)); ea_end = fnode_end_ea(fnode); for (ea = fnode_ea(fnode); ea < ea_end; ea = next_ea(ea)) if (ea_indirect(ea)) hpfs_ea_remove(s, ea_sec(ea), ea_in_anode(ea), ea_len(ea)); - hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l)); + hpfs_ea_ext_remove(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l)); brelse(bh); hpfs_free_sectors(s, fno, 1); } diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 2fa0089..b8472f8 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -87,7 +87,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) ret = -EIOERROR; goto out; } - if (!fno->dirflag) { + if (!fnode_is_dir(fno)) { e = 1; hpfs_error(inode->i_sb, "not a directory, fnode %08lx", (unsigned long)inode->i_ino); diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c index 1e0e2ac..6bf9fde 100644 --- a/fs/hpfs/dnode.c +++ b/fs/hpfs/dnode.c @@ -1015,7 +1015,7 @@ struct hpfs_dirent *map_fnode_dirent(struct super_block *s, fnode_secno fno, kfree(name2); return NULL; } - if (!upf->dirflag) { + if (!fnode_is_dir(upf)) { brelse(bh); hpfs_error(s, "fnode %08x has non-directory parent %08x", fno, le32_to_cpu(f->up)); kfree(name2); diff --git a/fs/hpfs/ea.c b/fs/hpfs/ea.c index 7cd0092..bcaafcd 100644 --- a/fs/hpfs/ea.c +++ b/fs/hpfs/ea.c @@ -91,7 +91,7 @@ int hpfs_read_ea(struct super_block *s, struct fnode *fnode, char *key, } a = le32_to_cpu(fnode->ea_secno); len = le32_to_cpu(fnode->ea_size_l); - ano = fnode->ea_anode; + ano = fnode_in_anode(fnode); pos = 0; while (pos < len) { ea = (struct extended_attribute *)ex; @@ -148,7 +148,7 @@ char *hpfs_get_ea(struct super_block *s, struct fnode *fnode, char *key, int *si } a = le32_to_cpu(fnode->ea_secno); len = le32_to_cpu(fnode->ea_size_l); - ano = fnode->ea_anode; + ano = fnode_in_anode(fnode); pos = 0; while (pos < len) { char ex[4 + 255 + 1 + 8]; @@ -209,7 +209,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, } a = le32_to_cpu(fnode->ea_secno); len = le32_to_cpu(fnode->ea_size_l); - ano = fnode->ea_anode; + ano = fnode_in_anode(fnode); pos = 0; while (pos < len) { char ex[4 + 255 + 1 + 8]; @@ -276,7 +276,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, fnode->ea_size_l = cpu_to_le32(le16_to_cpu(fnode->ea_size_s)); fnode->ea_size_s = cpu_to_le16(0); fnode->ea_secno = cpu_to_le32(n); - fnode->ea_anode = cpu_to_le32(0); + fnode->flags &= ~FNODE_anode; mark_buffer_dirty(bh); brelse(bh); } @@ -288,9 +288,9 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, secno q = hpfs_alloc_sector(s, fno, 1, 0); if (!q) goto bail; fnode->ea_secno = cpu_to_le32(q); - fnode->ea_anode = 0; + fnode->flags &= ~FNODE_anode; len++; - } else if (!fnode->ea_anode) { + } else if (!fnode_in_anode(fnode)) { if (hpfs_alloc_if_possible(s, le32_to_cpu(fnode->ea_secno) + len)) { len++; } else { @@ -310,7 +310,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, anode->u.external[0].length = cpu_to_le32(len); mark_buffer_dirty(bh); brelse(bh); - fnode->ea_anode = 1; + fnode->flags |= FNODE_anode; fnode->ea_secno = cpu_to_le32(a_s);*/ secno new_sec; int i; @@ -338,7 +338,7 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, len = (pos + 511) >> 9; } } - if (fnode->ea_anode) { + if (fnode_in_anode(fnode)) { if (hpfs_add_sector_to_btree(s, le32_to_cpu(fnode->ea_secno), 0, len) != -1) { len++; @@ -351,16 +351,16 @@ void hpfs_set_ea(struct inode *inode, struct fnode *fnode, const char *key, h[1] = strlen(key); h[2] = size & 0xff; h[3] = size >> 8; - if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail; - if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail; - if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode->ea_anode, le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail; + if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l), 4, h)) goto bail; + if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l) + 4, h[1] + 1, key)) goto bail; + if (hpfs_ea_write(s, le32_to_cpu(fnode->ea_secno), fnode_in_anode(fnode), le32_to_cpu(fnode->ea_size_l) + 5 + h[1], size, data)) goto bail; fnode->ea_size_l = cpu_to_le32(pos); ret: hpfs_i(inode)->i_ea_size += 5 + strlen(key) + size; return; bail: if (le32_to_cpu(fnode->ea_secno)) - if (fnode->ea_anode) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9); + if (fnode_in_anode(fnode)) hpfs_truncate_btree(s, le32_to_cpu(fnode->ea_secno), 1, (le32_to_cpu(fnode->ea_size_l) + 511) >> 9); else hpfs_free_sectors(s, le32_to_cpu(fnode->ea_secno) + ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9), len - ((le32_to_cpu(fnode->ea_size_l) + 511) >> 9)); else fnode->ea_secno = fnode->ea_size_l = cpu_to_le32(0); } diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index ca90bde..37cc448 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -436,6 +436,7 @@ struct bplus_header #define FNODE_MAGIC 0xf7e40aae +enum {FNODE_anode = cpu_to_le16(2), FNODE_dir = cpu_to_le16(256)}; struct fnode { u32 magic; /* f7e4 0aae */ @@ -451,26 +452,9 @@ struct fnode secno ea_secno; /* first sector of disk-resident ea's*/ u16 ea_size_s; /* length of fnode-resident ea's */ -#ifdef __LITTLE_ENDIAN - u8 flag0: 1; - u8 ea_anode: 1; /* 1 -> ea_secno is an anode */ - u8 flag234567: 6; -#else - u8 flag234567: 6; - u8 ea_anode: 1; /* 1 -> ea_secno is an anode */ - u8 flag0: 1; -#endif - -#ifdef __LITTLE_ENDIAN - u8 dirflag: 1; /* 1 -> directory. first & only extent - points to dnode. */ - u8 flag9012345: 7; -#else - u8 flag9012345: 7; - u8 dirflag: 1; /* 1 -> directory. first & only extent + __le16 flags; /* bit 1 set -> ea_secno is an anode */ + /* bit 8 set -> directory. first & only extent points to dnode. */ -#endif - struct bplus_header btree; /* b+ tree, 8 extents or 12 subtrees */ union { struct bplus_leaf_node external[8]; @@ -492,6 +476,16 @@ struct fnode via fnode + ea_offs. I think.) */ }; +static inline bool fnode_in_anode(struct fnode *p) +{ + return (p->flags & FNODE_anode) != 0; +} + +static inline bool fnode_is_dir(struct fnode *p) +{ + return (p->flags & FNODE_dir) != 0; +} + /* anode: 99.44% pure allocation tree */ diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index b43066c..ed671e0 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -110,7 +110,7 @@ void hpfs_read_inode(struct inode *i) } } } - if (fnode->dirflag) { + if (fnode_is_dir(fnode)) { int n_dnodes, n_subdirs; i->i_mode |= S_IFDIR; i->i_op = &hpfs_dir_iops; diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index a790821..fffcb33 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -130,7 +130,7 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea (unsigned long)ino); goto bail; } - if (!fnode->dirflag) { + if (!fnode_is_dir(fnode)) { if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes != (fnode->btree.internal ? 12 : 8)) { hpfs_error(s, diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index 30dd7b1..9083ef8 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -70,7 +70,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) fnode->len = len; memcpy(fnode->name, name, len > 15 ? 15 : len); fnode->up = cpu_to_le32(dir->i_ino); - fnode->dirflag = 1; + fnode->flags |= FNODE_dir; fnode->btree.n_free_nodes = 7; fnode->btree.n_used_nodes = 1; fnode->btree.first_free = cpu_to_le16(0x14); -- cgit v0.10.2 From 52576da3545e78c534d901a39f6f2391665c641b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 15:28:51 -0400 Subject: hpfs: bitmaps are little-endian annotate properly... Signed-off-by: Al Viro diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c index 7a5eb2c..cdb84a8 100644 --- a/fs/hpfs/alloc.c +++ b/fs/hpfs/alloc.c @@ -16,9 +16,9 @@ static int chk_if_allocated(struct super_block *s, secno sec, char *msg) { struct quad_buffer_head qbh; - u32 *bmp; + __le32 *bmp; if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "chk"))) goto fail; - if ((cpu_to_le32(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) { + if ((le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) >> (sec & 0x1f)) & 1) { hpfs_error(s, "sector '%s' - %08x not allocated in bitmap", msg, sec); goto fail1; } @@ -62,7 +62,7 @@ int hpfs_chk_sectors(struct super_block *s, secno start, int len, char *msg) static secno alloc_in_bmp(struct super_block *s, secno near, unsigned n, unsigned forward) { struct quad_buffer_head qbh; - unsigned *bmp; + __le32 *bmp; unsigned bs = near & ~0x3fff; unsigned nr = (near & 0x3fff) & ~(n - 1); /*unsigned mnr;*/ @@ -236,7 +236,7 @@ static secno alloc_in_dirband(struct super_block *s, secno near) int hpfs_alloc_if_possible(struct super_block *s, secno sec) { struct quad_buffer_head qbh; - u32 *bmp; + __le32 *bmp; if (!(bmp = hpfs_map_bitmap(s, sec >> 14, &qbh, "aip"))) goto end; if (le32_to_cpu(bmp[(sec & 0x3fff) >> 5]) & (1 << (sec & 0x1f))) { bmp[(sec & 0x3fff) >> 5] &= cpu_to_le32(~(1 << (sec & 0x1f))); @@ -254,7 +254,7 @@ int hpfs_alloc_if_possible(struct super_block *s, secno sec) void hpfs_free_sectors(struct super_block *s, secno sec, unsigned n) { struct quad_buffer_head qbh; - u32 *bmp; + __le32 *bmp; struct hpfs_sb_info *sbi = hpfs_sb(s); /*printk("2 - ");*/ if (!n) return; @@ -299,7 +299,7 @@ int hpfs_check_free_dnodes(struct super_block *s, int n) int n_bmps = (hpfs_sb(s)->sb_fs_size + 0x4000 - 1) >> 14; int b = hpfs_sb(s)->sb_c_bitmap & 0x0fffffff; int i, j; - u32 *bmp; + __le32 *bmp; struct quad_buffer_head qbh; if ((bmp = hpfs_map_dnode_bitmap(s, &qbh))) { for (j = 0; j < 512; j++) { @@ -351,7 +351,7 @@ void hpfs_free_dnode(struct super_block *s, dnode_secno dno) hpfs_free_sectors(s, dno, 4); } else { struct quad_buffer_head qbh; - u32 *bmp; + __le32 *bmp; unsigned ssec = (dno - hpfs_sb(s)->sb_dirband_start) / 4; if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) { return; diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index de94617..88f096d 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -178,7 +178,7 @@ static inline void copy_de(struct hpfs_dirent *dst, struct hpfs_dirent *src) dst->not_8x3 = n; } -static inline unsigned tstbits(u32 *bmp, unsigned b, unsigned n) +static inline unsigned tstbits(__le32 *bmp, unsigned b, unsigned n) { int i; if ((b >= 0x4000) || (b + n - 1 >= 0x4000)) return n; @@ -275,8 +275,8 @@ void hpfs_evict_inode(struct inode *); /* map.c */ -unsigned *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); -unsigned *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); +__le32 *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); +__le32 *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); unsigned char *hpfs_load_code_page(struct super_block *, secno); secno *hpfs_load_bitmap_directory(struct super_block *, secno bmp); struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **); diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index fffcb33..bbb174d 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -8,12 +8,12 @@ #include "hpfs_fn.h" -unsigned *hpfs_map_dnode_bitmap(struct super_block *s, struct quad_buffer_head *qbh) +__le32 *hpfs_map_dnode_bitmap(struct super_block *s, struct quad_buffer_head *qbh) { return hpfs_map_4sectors(s, hpfs_sb(s)->sb_dmap, qbh, 0); } -unsigned int *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, +__le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, struct quad_buffer_head *qbh, char *id) { secno sec; -- cgit v0.10.2 From 39413c6046de282a92739110cfafb8f1e862680d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 15:32:22 -0400 Subject: hpfs: annotate struct dnode little-endians... Signed-off-by: Al Viro diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index 37cc448..b4e035c 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -278,8 +278,8 @@ struct code_page_data #define DNODE_MAGIC 0x77e40aae struct dnode { - u32 magic; /* 77e4 0aae */ - u32 first_free; /* offset from start of dnode to + __le32 magic; /* 77e4 0aae */ + __le32 first_free; /* offset from start of dnode to first free dir entry */ #ifdef __LITTLE_ENDIAN u8 root_dnode: 1; /* Is it root dnode? */ @@ -293,9 +293,9 @@ struct dnode { u8 root_dnode: 1; /* Is it root dnode? */ #endif u8 increment_me2[3]; - secno up; /* (root dnode) directory's fnode + __le32 up; /* (root dnode) directory's fnode (nonroot) parent dnode */ - dnode_secno self; /* pointer to this dnode */ + __le32 self; /* pointer to this dnode */ u8 dirent[2028]; /* one or more dirents */ }; -- cgit v0.10.2 From ddc19e6e04c1131a48f5b9a25aa433bbd8430cdd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 15:59:35 -0400 Subject: hpfs: annotate btree nodes, get rid of bitfields mess Signed-off-by: Al Viro diff --git a/fs/hpfs/anode.c b/fs/hpfs/anode.c index ec5f8b9..4bae4a4 100644 --- a/fs/hpfs/anode.c +++ b/fs/hpfs/anode.c @@ -20,7 +20,7 @@ secno hpfs_bplus_lookup(struct super_block *s, struct inode *inode, int c1, c2 = 0; go_down: if (hpfs_sb(s)->sb_chk) if (hpfs_stop_cycles(s, a, &c1, &c2, "hpfs_bplus_lookup")) return -1; - if (btree->internal) { + if (bp_internal(btree)) { for (i = 0; i < btree->n_used_nodes; i++) if (le32_to_cpu(btree->u.internal[i].file_secno) > sec) { a = le32_to_cpu(btree->u.internal[i].down); @@ -82,7 +82,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi brelse(bh); return -1; } - if (btree->internal) { + if (bp_internal(btree)) { a = le32_to_cpu(btree->u.internal[n].down); btree->u.internal[n].file_secno = cpu_to_le32(-1); mark_buffer_dirty(bh); @@ -129,12 +129,12 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } if (a == node && fnod) { anode->up = cpu_to_le32(node); - anode->btree.fnode_parent = 1; + anode->btree.flags |= BP_fnode_parent; anode->btree.n_used_nodes = btree->n_used_nodes; anode->btree.first_free = btree->first_free; anode->btree.n_free_nodes = 40 - anode->btree.n_used_nodes; memcpy(&anode->u, &btree->u, btree->n_used_nodes * 12); - btree->internal = 1; + btree->flags |= BP_internal; btree->n_free_nodes = 11; btree->n_used_nodes = 1; btree->first_free = cpu_to_le16((char *)&(btree->u.internal[1]) - (char *)btree); @@ -184,7 +184,10 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi hpfs_free_sectors(s, ra, 1); if ((anode = hpfs_map_anode(s, na, &bh))) { anode->up = cpu_to_le32(up); - anode->btree.fnode_parent = up == node && fnod; + if (up == node && fnod) + anode->btree.flags |= BP_fnode_parent; + else + anode->btree.flags &= ~BP_fnode_parent; mark_buffer_dirty(bh); brelse(bh); } @@ -198,7 +201,7 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi if ((new_anode = hpfs_alloc_anode(s, a, &na, &bh))) { anode = new_anode; /*anode->up = cpu_to_le32(up != -1 ? up : ra);*/ - anode->btree.internal = 1; + anode->btree.flags |= BP_internal; anode->btree.n_used_nodes = 1; anode->btree.n_free_nodes = 59; anode->btree.first_free = cpu_to_le16(16); @@ -215,7 +218,8 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } if ((anode = hpfs_map_anode(s, na, &bh))) { anode->up = cpu_to_le32(node); - if (fnod) anode->btree.fnode_parent = 1; + if (fnod) + anode->btree.flags |= BP_fnode_parent; mark_buffer_dirty(bh); brelse(bh); } @@ -234,18 +238,19 @@ secno hpfs_add_sector_to_btree(struct super_block *s, secno node, int fnod, unsi } ranode->up = cpu_to_le32(node); memcpy(&ranode->btree, btree, le16_to_cpu(btree->first_free)); - if (fnod) ranode->btree.fnode_parent = 1; - ranode->btree.n_free_nodes = (ranode->btree.internal ? 60 : 40) - ranode->btree.n_used_nodes; - if (ranode->btree.internal) for (n = 0; n < ranode->btree.n_used_nodes; n++) { + if (fnod) + ranode->btree.flags |= BP_fnode_parent; + ranode->btree.n_free_nodes = (bp_internal(&ranode->btree) ? 60 : 40) - ranode->btree.n_used_nodes; + if (bp_internal(&ranode->btree)) for (n = 0; n < ranode->btree.n_used_nodes; n++) { struct anode *unode; if ((unode = hpfs_map_anode(s, le32_to_cpu(ranode->u.internal[n].down), &bh1))) { unode->up = cpu_to_le32(ra); - unode->btree.fnode_parent = 0; + unode->btree.flags &= ~BP_fnode_parent; mark_buffer_dirty(bh1); brelse(bh1); } } - btree->internal = 1; + btree->flags |= BP_internal; btree->n_free_nodes = fnod ? 10 : 58; btree->n_used_nodes = 2; btree->first_free = cpu_to_le16((char *)&btree->u.internal[2] - (char *)btree); @@ -278,7 +283,7 @@ void hpfs_remove_btree(struct super_block *s, struct bplus_header *btree) int d1, d2; go_down: d2 = 0; - while (btree1->internal) { + while (bp_internal(btree1)) { ano = le32_to_cpu(btree1->u.internal[pos].down); if (level) brelse(bh); if (hpfs_sb(s)->sb_chk) @@ -412,13 +417,13 @@ void hpfs_truncate_btree(struct super_block *s, secno f, int fno, unsigned secs) btree->n_free_nodes = 8; btree->n_used_nodes = 0; btree->first_free = cpu_to_le16(8); - btree->internal = 0; + btree->flags &= ~BP_internal; mark_buffer_dirty(bh); } else hpfs_free_sectors(s, f, 1); brelse(bh); return; } - while (btree->internal) { + while (bp_internal(btree)) { nodes = btree->n_used_nodes + btree->n_free_nodes; for (i = 0; i < btree->n_used_nodes; i++) if (le32_to_cpu(btree->u.internal[i].file_secno) >= secs) goto f; diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index b4e035c..49d9315 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -375,50 +375,36 @@ struct hpfs_dirent { struct bplus_leaf_node { - u32 file_secno; /* first file sector in extent */ - u32 length; /* length, sectors */ - secno disk_secno; /* first corresponding disk sector */ + __le32 file_secno; /* first file sector in extent */ + __le32 length; /* length, sectors */ + __le32 disk_secno; /* first corresponding disk sector */ }; struct bplus_internal_node { - u32 file_secno; /* subtree maps sectors < this */ - anode_secno down; /* pointer to subtree */ + __le32 file_secno; /* subtree maps sectors < this */ + __le32 down; /* pointer to subtree */ }; +enum { + BP_hbff = 1, + BP_fnode_parent = 0x20, + BP_binary_search = 0x40, + BP_internal = 0x80 +}; struct bplus_header { -#ifdef __LITTLE_ENDIAN - u8 hbff: 1; /* high bit of first free entry offset */ - u8 flag1234: 4; - u8 fnode_parent: 1; /* ? we're pointed to by an fnode, - the data btree or some ea or the - main ea bootage pointer ea_secno */ - /* also can get set in fnodes, which - may be a chkdsk glitch or may mean - this bit is irrelevant in fnodes, - or this interpretation is all wet */ - u8 binary_search: 1; /* suggest binary search (unused) */ - u8 internal: 1; /* 1 -> (internal) tree of anodes - 0 -> (leaf) list of extents */ -#else - u8 internal: 1; /* 1 -> (internal) tree of anodes - 0 -> (leaf) list of extents */ - u8 binary_search: 1; /* suggest binary search (unused) */ - u8 fnode_parent: 1; /* ? we're pointed to by an fnode, + u8 flags; /* bit 0 - high bit of first free entry offset + bit 5 - we're pointed to by an fnode, the data btree or some ea or the - main ea bootage pointer ea_secno */ - /* also can get set in fnodes, which - may be a chkdsk glitch or may mean - this bit is irrelevant in fnodes, - or this interpretation is all wet */ - u8 flag1234: 4; - u8 hbff: 1; /* high bit of first free entry offset */ -#endif + main ea bootage pointer ea_secno + bit 6 - suggest binary search (unused) + bit 7 - 1 -> (internal) tree of anodes + 0 -> (leaf) list of extents */ u8 fill[3]; u8 n_free_nodes; /* free nodes in following array */ u8 n_used_nodes; /* used nodes in following array */ - u16 first_free; /* offset from start of header to + __le16 first_free; /* offset from start of header to first free node in array */ union { struct bplus_internal_node internal[0]; /* (internal) 2-word entries giving @@ -428,6 +414,16 @@ struct bplus_header } u; }; +static inline bool bp_internal(struct bplus_header *bp) +{ + return bp->flags & BP_internal; +} + +static inline bool bp_fnode_parent(struct bplus_header *bp) +{ + return bp->flags & BP_fnode_parent; +} + /* fnode: root of allocation b+ tree, and EA's */ /* Every file and every directory has one fnode, pointed to by the directory diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index bbb174d..d8bed6d 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -132,14 +132,14 @@ struct fnode *hpfs_map_fnode(struct super_block *s, ino_t ino, struct buffer_hea } if (!fnode_is_dir(fnode)) { if ((unsigned)fnode->btree.n_used_nodes + (unsigned)fnode->btree.n_free_nodes != - (fnode->btree.internal ? 12 : 8)) { + (bp_internal(&fnode->btree) ? 12 : 8)) { hpfs_error(s, "bad number of nodes in fnode %08lx", (unsigned long)ino); goto bail; } if (le16_to_cpu(fnode->btree.first_free) != - 8 + fnode->btree.n_used_nodes * (fnode->btree.internal ? 8 : 12)) { + 8 + fnode->btree.n_used_nodes * (bp_internal(&fnode->btree) ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in fnode %08lx", (unsigned long)ino); @@ -187,12 +187,12 @@ struct anode *hpfs_map_anode(struct super_block *s, anode_secno ano, struct buff goto bail; } if ((unsigned)anode->btree.n_used_nodes + (unsigned)anode->btree.n_free_nodes != - (anode->btree.internal ? 60 : 40)) { + (bp_internal(&anode->btree) ? 60 : 40)) { hpfs_error(s, "bad number of nodes in anode %08x", ano); goto bail; } if (le16_to_cpu(anode->btree.first_free) != - 8 + anode->btree.n_used_nodes * (anode->btree.internal ? 8 : 12)) { + 8 + anode->btree.n_used_nodes * (bp_internal(&anode->btree) ? 8 : 12)) { hpfs_error(s, "bad first_free pointer in anode %08x", ano); goto bail; } -- cgit v0.10.2 From 2b9f1cc29ba0e56089fe04501ec6d3b49eee3c3e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 16:09:25 -0400 Subject: hpfs: annotate struct fnode Signed-off-by: Al Viro diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index 49d9315..b66c8b2 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -435,18 +435,18 @@ static inline bool bp_fnode_parent(struct bplus_header *bp) enum {FNODE_anode = cpu_to_le16(2), FNODE_dir = cpu_to_le16(256)}; struct fnode { - u32 magic; /* f7e4 0aae */ - u32 zero1[2]; /* read history */ + __le32 magic; /* f7e4 0aae */ + __le32 zero1[2]; /* read history */ u8 len, name[15]; /* true length, truncated name */ - fnode_secno up; /* pointer to file's directory fnode */ - secno acl_size_l; - secno acl_secno; - u16 acl_size_s; + __le32 up; /* pointer to file's directory fnode */ + __le32 acl_size_l; + __le32 acl_secno; + __le16 acl_size_s; u8 acl_anode; u8 zero2; /* history bit count */ - u32 ea_size_l; /* length of disk-resident ea's */ - secno ea_secno; /* first sector of disk-resident ea's*/ - u16 ea_size_s; /* length of fnode-resident ea's */ + __le32 ea_size_l; /* length of disk-resident ea's */ + __le32 ea_secno; /* first sector of disk-resident ea's*/ + __le16 ea_size_s; /* length of fnode-resident ea's */ __le16 flags; /* bit 1 set -> ea_secno is an anode */ /* bit 8 set -> directory. first & only extent @@ -457,15 +457,15 @@ struct fnode struct bplus_internal_node internal[12]; } u; - u32 file_size; /* file length, bytes */ - u32 n_needea; /* number of EA's with NEEDEA set */ + __le32 file_size; /* file length, bytes */ + __le32 n_needea; /* number of EA's with NEEDEA set */ u8 user_id[16]; /* unused */ - u16 ea_offs; /* offset from start of fnode + __le16 ea_offs; /* offset from start of fnode to first fnode-resident ea */ u8 dasd_limit_treshhold; u8 dasd_limit_delta; - u32 dasd_limit; - u32 dasd_usage; + __le32 dasd_limit; + __le32 dasd_usage; u8 ea[316]; /* zero or more EA's, packed together with no alignment padding. (Do not use this name, get here -- cgit v0.10.2 From 6ce2bbba5266c1dd5c27dd8af1887ed8ca564919 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 16:11:25 -0400 Subject: hpfs: annotate struct anode Signed-off-by: Al Viro diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index b66c8b2..f0dc109 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -489,9 +489,9 @@ static inline bool fnode_is_dir(struct fnode *p) struct anode { - u32 magic; /* 37e4 0aae */ - anode_secno self; /* pointer to this anode */ - secno up; /* parent anode or fnode */ + __le32 magic; /* 37e4 0aae */ + __le32 self; /* pointer to this anode */ + __le32 up; /* parent anode or fnode */ struct bplus_header btree; /* b+tree, 40 extents or 60 subtrees */ union { @@ -499,7 +499,7 @@ struct anode struct bplus_internal_node internal[60]; } u; - u32 fill[3]; /* unused */ + __le32 fill[3]; /* unused */ }; -- cgit v0.10.2 From 46287aa652fa8ea1edac41817ddc63332495ffc3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 16:20:49 -0400 Subject: hpfs: annotate struct hpfs_dirent Signed-off-by: Al Viro diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c index 6bf9fde..3228c52 100644 --- a/fs/hpfs/dnode.c +++ b/fs/hpfs/dnode.c @@ -153,7 +153,7 @@ static void set_last_pointer(struct super_block *s, struct dnode *d, dnode_secno } de->length = cpu_to_le16(36); de->down = 1; - *(dnode_secno *)((char *)de + 32) = cpu_to_le32(ptr); + *(__le32 *)((char *)de + 32) = cpu_to_le32(ptr); } } @@ -177,7 +177,7 @@ struct hpfs_dirent *hpfs_add_de(struct super_block *s, struct dnode *d, memmove((char *)de + d_size, de, (char *)de_end - (char *)de); memset(de, 0, d_size); if (down_ptr) { - *(dnode_secno *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr); + *(__le32 *)((char *)de + d_size - 4) = cpu_to_le32(down_ptr); de->down = 1; } de->length = cpu_to_le16(d_size); @@ -656,7 +656,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) del->down = 0; d1->first_free = cpu_to_le32(le32_to_cpu(d1->first_free) - 4); } else if (down) - *(dnode_secno *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); + *(__le32 *) ((void *) del + le16_to_cpu(del->length) - 4) = cpu_to_le32(down); } else goto endm; if (!(de_cp = kmalloc(le16_to_cpu(de_prev->length), GFP_NOFS))) { printk("HPFS: out of memory for dtree balancing\n"); @@ -672,7 +672,7 @@ static void delete_empty_dnode(struct inode *i, dnode_secno dno) de_prev->down = 1; dnode->first_free = cpu_to_le32(le32_to_cpu(dnode->first_free) + 4); } - *(dnode_secno *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown); + *(__le32 *) ((void *) de_prev + le16_to_cpu(de_prev->length) - 4) = cpu_to_le32(ndown); hpfs_mark_4buffers_dirty(&qbh); hpfs_brelse4(&qbh); for_all_poss(i, hpfs_pos_subst, ((loff_t)up << 4) | (p - 1), 4); diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index f0dc109..051ff45 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -300,7 +300,7 @@ struct dnode { }; struct hpfs_dirent { - u16 length; /* offset to next dirent */ + __le16 length; /* offset to next dirent */ #ifdef __LITTLE_ENDIAN u8 first: 1; /* set on phony ^A^A (".") entry */ @@ -346,12 +346,12 @@ struct hpfs_dirent { u8 read_only: 1; /* dos attrib */ #endif - fnode_secno fnode; /* fnode giving allocation info */ - time32_t write_date; /* mtime */ - u32 file_size; /* file length, bytes */ - time32_t read_date; /* atime */ - time32_t creation_date; /* ctime */ - u32 ea_size; /* total EA length, bytes */ + __le32 fnode; /* fnode giving allocation info */ + __le32 write_date; /* mtime */ + __le32 file_size; /* file length, bytes */ + __le32 read_date; /* atime */ + __le32 creation_date; /* ctime */ + __le32 ea_size; /* total EA length, bytes */ u8 no_of_acls; /* number of ACL's (low 3 bits) */ u8 ix; /* code page index (of filename), see struct code_page_data */ diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 88f096d..0672691 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -100,7 +100,7 @@ struct quad_buffer_head { static inline dnode_secno de_down_pointer (struct hpfs_dirent *de) { CHKCOND(de->down,("HPFS: de_down_pointer: !de->down\n")); - return le32_to_cpu(*(dnode_secno *) ((void *) de + le16_to_cpu(de->length) - 4)); + return le32_to_cpu(*(__le32 *) ((void *) de + le16_to_cpu(de->length) - 4)); } /* The first dir entry in a dnode */ -- cgit v0.10.2 From 77ee26e44c28823a29bc09091950544566ae7cea Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 16:26:46 -0400 Subject: hpfs: annotate ea Signed-off-by: Al Viro diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 0672691..1acb40f 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -148,12 +148,12 @@ static inline struct extended_attribute *next_ea(struct extended_attribute *ea) static inline secno ea_sec(struct extended_attribute *ea) { - return le32_to_cpu(get_unaligned((secno *)((char *)ea + 9 + ea->namelen))); + return le32_to_cpu(get_unaligned((__le32 *)((char *)ea + 9 + ea->namelen))); } static inline secno ea_len(struct extended_attribute *ea) { - return le32_to_cpu(get_unaligned((secno *)((char *)ea + 5 + ea->namelen))); + return le32_to_cpu(get_unaligned((__le32 *)((char *)ea + 5 + ea->namelen))); } static inline char *ea_data(struct extended_attribute *ea) -- cgit v0.10.2 From 28fe3c1963b0bafa56ec92df1987828090151d87 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 17 Apr 2012 16:41:13 -0400 Subject: hpfs: assorted endianness annotations Signed-off-by: Al Viro diff --git a/fs/hpfs/hpfs.h b/fs/hpfs/hpfs.h index 051ff45..cce025a 100644 --- a/fs/hpfs/hpfs.h +++ b/fs/hpfs/hpfs.h @@ -51,11 +51,11 @@ struct hpfs_boot_block u8 n_rootdir_entries[2]; u8 n_sectors_s[2]; u8 media_byte; - u16 sectors_per_fat; - u16 sectors_per_track; - u16 heads_per_cyl; - u32 n_hidden_sectors; - u32 n_sectors_l; /* size of partition */ + __le16 sectors_per_fat; + __le16 sectors_per_track; + __le16 heads_per_cyl; + __le32 n_hidden_sectors; + __le32 n_sectors_l; /* size of partition */ u8 drive_number; u8 mbz; u8 sig_28h; /* 28h */ @@ -63,7 +63,7 @@ struct hpfs_boot_block u8 vol_label[11]; u8 sig_hpfs[8]; /* "HPFS " */ u8 pad[448]; - u16 magic; /* aa55 */ + __le16 magic; /* aa55 */ }; @@ -75,28 +75,28 @@ struct hpfs_boot_block struct hpfs_super_block { - u32 magic; /* f995 e849 */ - u32 magic1; /* fa53 e9c5, more magic? */ + __le32 magic; /* f995 e849 */ + __le32 magic1; /* fa53 e9c5, more magic? */ u8 version; /* version of a filesystem usually 2 */ u8 funcversion; /* functional version - oldest version of filesystem that can understand this disk */ - u16 zero; /* 0 */ - fnode_secno root; /* fnode of root directory */ - secno n_sectors; /* size of filesystem */ - u32 n_badblocks; /* number of bad blocks */ - secno bitmaps; /* pointers to free space bit maps */ - u32 zero1; /* 0 */ - secno badblocks; /* bad block list */ - u32 zero3; /* 0 */ - time32_t last_chkdsk; /* date last checked, 0 if never */ - time32_t last_optimize; /* date last optimized, 0 if never */ - secno n_dir_band; /* number of sectors in dir band */ - secno dir_band_start; /* first sector in dir band */ - secno dir_band_end; /* last sector in dir band */ - secno dir_band_bitmap; /* free space map, 1 dnode per bit */ + __le16 zero; /* 0 */ + __le32 root; /* fnode of root directory */ + __le32 n_sectors; /* size of filesystem */ + __le32 n_badblocks; /* number of bad blocks */ + __le32 bitmaps; /* pointers to free space bit maps */ + __le32 zero1; /* 0 */ + __le32 badblocks; /* bad block list */ + __le32 zero3; /* 0 */ + __le32 last_chkdsk; /* date last checked, 0 if never */ + __le32 last_optimize; /* date last optimized, 0 if never */ + __le32 n_dir_band; /* number of sectors in dir band */ + __le32 dir_band_start; /* first sector in dir band */ + __le32 dir_band_end; /* last sector in dir band */ + __le32 dir_band_bitmap; /* free space map, 1 dnode per bit */ u8 volume_name[32]; /* not used */ - secno user_id_table; /* 8 preallocated sectors - user id */ + __le32 user_id_table; /* 8 preallocated sectors - user id */ u32 zero6[103]; /* 0 */ }; @@ -109,8 +109,8 @@ struct hpfs_super_block struct hpfs_spare_block { - u32 magic; /* f991 1849 */ - u32 magic1; /* fa52 29c5, more magic? */ + __le32 magic; /* f991 1849 */ + __le32 magic1; /* fa52 29c5, more magic? */ #ifdef __LITTLE_ENDIAN u8 dirty: 1; /* 0 clean, 1 "improperly stopped" */ @@ -153,21 +153,21 @@ struct hpfs_spare_block u8 mm_contlgulty; u8 unused; - secno hotfix_map; /* info about remapped bad sectors */ - u32 n_spares_used; /* number of hotfixes */ - u32 n_spares; /* number of spares in hotfix map */ - u32 n_dnode_spares_free; /* spare dnodes unused */ - u32 n_dnode_spares; /* length of spare_dnodes[] list, + __le32 hotfix_map; /* info about remapped bad sectors */ + __le32 n_spares_used; /* number of hotfixes */ + __le32 n_spares; /* number of spares in hotfix map */ + __le32 n_dnode_spares_free; /* spare dnodes unused */ + __le32 n_dnode_spares; /* length of spare_dnodes[] list, follows in this block*/ - secno code_page_dir; /* code page directory block */ - u32 n_code_pages; /* number of code pages */ - u32 super_crc; /* on HPFS386 and LAN Server this is + __le32 code_page_dir; /* code page directory block */ + __le32 n_code_pages; /* number of code pages */ + __le32 super_crc; /* on HPFS386 and LAN Server this is checksum of superblock, on normal OS/2 unused */ - u32 spare_crc; /* on HPFS386 checksum of spareblock */ - u32 zero1[15]; /* unused */ - dnode_secno spare_dnodes[100]; /* emergency free dnode list */ - u32 zero2[1]; /* room for more? */ + __le32 spare_crc; /* on HPFS386 checksum of spareblock */ + __le32 zero1[15]; /* unused */ + __le32 spare_dnodes[100]; /* emergency free dnode list */ + __le32 zero2[1]; /* room for more? */ }; /* The bad block list is 4 sectors long. The first word must be zero, @@ -202,18 +202,18 @@ struct hpfs_spare_block struct code_page_directory { - u32 magic; /* 4945 21f7 */ - u32 n_code_pages; /* number of pointers following */ - u32 zero1[2]; + __le32 magic; /* 4945 21f7 */ + __le32 n_code_pages; /* number of pointers following */ + __le32 zero1[2]; struct { - u16 ix; /* index */ - u16 code_page_number; /* code page number */ - u32 bounds; /* matches corresponding word + __le16 ix; /* index */ + __le16 code_page_number; /* code page number */ + __le32 bounds; /* matches corresponding word in data block */ - secno code_page_data; /* sector number of a code_page_data + __le32 code_page_data; /* sector number of a code_page_data containing c.p. array */ - u16 index; /* index in c.p. array in that sector*/ - u16 unknown; /* some unknown value; usually 0; + __le16 index; /* index in c.p. array in that sector*/ + __le16 unknown; /* some unknown value; usually 0; 2 in Japanese version */ } array[31]; /* unknown length */ }; @@ -224,19 +224,19 @@ struct code_page_directory struct code_page_data { - u32 magic; /* 8945 21f7 */ - u32 n_used; /* # elements used in c_p_data[] */ - u32 bounds[3]; /* looks a bit like + __le32 magic; /* 8945 21f7 */ + __le32 n_used; /* # elements used in c_p_data[] */ + __le32 bounds[3]; /* looks a bit like (beg1,end1), (beg2,end2) one byte each */ - u16 offs[3]; /* offsets from start of sector + __le16 offs[3]; /* offsets from start of sector to start of c_p_data[ix] */ struct { - u16 ix; /* index */ - u16 code_page_number; /* code page number */ - u16 unknown; /* the same as in cp directory */ + __le16 ix; /* index */ + __le16 code_page_number; /* code page number */ + __le16 unknown; /* the same as in cp directory */ u8 map[128]; /* upcase table for chars 80..ff */ - u16 zero2; + __le16 zero2; } code_page[3]; u8 incognita[78]; }; diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 1acb40f..8515bbb 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -82,7 +82,7 @@ struct hpfs_sb_info { unsigned char *sb_cp_table; /* code page tables: */ /* 128 bytes uppercasing table & */ /* 128 bytes lowercasing table */ - unsigned *sb_bmp_dir; /* main bitmap directory */ + __le32 *sb_bmp_dir; /* main bitmap directory */ unsigned sb_c_bitmap; /* current bitmap */ unsigned sb_max_fwd_alloc; /* max forwad allocation */ int sb_timeshift; @@ -278,7 +278,7 @@ void hpfs_evict_inode(struct inode *); __le32 *hpfs_map_dnode_bitmap(struct super_block *, struct quad_buffer_head *); __le32 *hpfs_map_bitmap(struct super_block *, unsigned, struct quad_buffer_head *, char *); unsigned char *hpfs_load_code_page(struct super_block *, secno); -secno *hpfs_load_bitmap_directory(struct super_block *, secno bmp); +__le32 *hpfs_load_bitmap_directory(struct super_block *, secno bmp); struct fnode *hpfs_map_fnode(struct super_block *s, ino_t, struct buffer_head **); struct anode *hpfs_map_anode(struct super_block *s, anode_secno, struct buffer_head **); struct dnode *hpfs_map_dnode(struct super_block *s, dnode_secno, struct quad_buffer_head *); diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index d8bed6d..4acb19d 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -89,18 +89,18 @@ unsigned char *hpfs_load_code_page(struct super_block *s, secno cps) return cp_table; } -secno *hpfs_load_bitmap_directory(struct super_block *s, secno bmp) +__le32 *hpfs_load_bitmap_directory(struct super_block *s, secno bmp) { struct buffer_head *bh; int n = (hpfs_sb(s)->sb_fs_size + 0x200000 - 1) >> 21; int i; - secno *b; + __le32 *b; if (!(b = kmalloc(n * 512, GFP_KERNEL))) { printk("HPFS: can't allocate memory for bitmap directory\n"); return NULL; } for (i=0;ihotfixes_used || le32_to_cpu(spareblock->n_spares_used)) { + if (spareblock->hotfixes_used || spareblock->n_spares_used) { if (errs >= 2) { printk("HPFS: Hotfixes not supported here, try chkdsk\n"); mark_dirty(s, 0); -- cgit v0.10.2 From 408bd629badbd4353b238ab6f58001529b274d73 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 May 2012 09:34:20 -0400 Subject: get rid of pointless allocations and copying in ecryptfs_follow_link() switch to generic_readlink(), while we are at it Signed-off-by: Al Viro diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index ab35b11..a07441a 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -660,11 +660,10 @@ static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, { struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry); char *lower_buf; - size_t lower_bufsiz = PATH_MAX; mm_segment_t old_fs; int rc; - lower_buf = kmalloc(lower_bufsiz, GFP_KERNEL); + lower_buf = kmalloc(PATH_MAX, GFP_KERNEL); if (!lower_buf) { rc = -ENOMEM; goto out; @@ -673,58 +672,29 @@ static int ecryptfs_readlink_lower(struct dentry *dentry, char **buf, set_fs(get_ds()); rc = lower_dentry->d_inode->i_op->readlink(lower_dentry, (char __user *)lower_buf, - lower_bufsiz); + PATH_MAX); set_fs(old_fs); if (rc < 0) goto out; - lower_bufsiz = rc; rc = ecryptfs_decode_and_decrypt_filename(buf, bufsiz, dentry, - lower_buf, lower_bufsiz); + lower_buf, rc); out: kfree(lower_buf); return rc; } -static int -ecryptfs_readlink(struct dentry *dentry, char __user *buf, int bufsiz) +static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) { - char *kbuf; - size_t kbufsiz, copied; + char *buf; + size_t len = PATH_MAX; int rc; - rc = ecryptfs_readlink_lower(dentry, &kbuf, &kbufsiz); + rc = ecryptfs_readlink_lower(dentry, &buf, &len); if (rc) goto out; - copied = min_t(size_t, bufsiz, kbufsiz); - rc = copy_to_user(buf, kbuf, copied) ? -EFAULT : copied; - kfree(kbuf); fsstack_copy_attr_atime(dentry->d_inode, ecryptfs_dentry_to_lower(dentry)->d_inode); -out: - return rc; -} - -static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - char *buf; - int len = PAGE_SIZE, rc; - mm_segment_t old_fs; - - /* Released in ecryptfs_put_link(); only release here on error */ - buf = kmalloc(len, GFP_KERNEL); - if (!buf) { - buf = ERR_PTR(-ENOMEM); - goto out; - } - old_fs = get_fs(); - set_fs(get_ds()); - rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len); - set_fs(old_fs); - if (rc < 0) { - kfree(buf); - buf = ERR_PTR(rc); - } else - buf[rc] = '\0'; + buf[len] = '\0'; out: nd_set_link(nd, buf); return NULL; @@ -1153,7 +1123,7 @@ out: } const struct inode_operations ecryptfs_symlink_iops = { - .readlink = ecryptfs_readlink, + .readlink = generic_readlink, .follow_link = ecryptfs_follow_link, .put_link = ecryptfs_put_link, .permission = ecryptfs_permission, -- cgit v0.10.2 From ea022dfb3c2a4680483b00eb2fecc9fc4f6091d1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 3 May 2012 10:14:29 -0400 Subject: ocfs: simplify symlink handling seeing that "fast" symlinks still get allocation + copy, we might as well simply switch them to pagecache-based variant of ->follow_link(); just need an appropriate ->readpage() for them... Signed-off-by: Al Viro diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 735514c..d89e08a 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c @@ -273,11 +273,13 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, inode->i_gid = le32_to_cpu(fe->i_gid); /* Fast symlinks will have i_size but no allocated clusters. */ - if (S_ISLNK(inode->i_mode) && !fe->i_clusters) + if (S_ISLNK(inode->i_mode) && !fe->i_clusters) { inode->i_blocks = 0; - else + inode->i_mapping->a_ops = &ocfs2_fast_symlink_aops; + } else { inode->i_blocks = ocfs2_inode_sector_count(inode); - inode->i_mapping->a_ops = &ocfs2_aops; + inode->i_mapping->a_ops = &ocfs2_aops; + } inode->i_atime.tv_sec = le64_to_cpu(fe->i_atime); inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec); inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime); @@ -331,10 +333,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, OCFS2_I(inode)->ip_dir_lock_gen = 1; break; case S_IFLNK: - if (ocfs2_inode_is_fast_symlink(inode)) - inode->i_op = &ocfs2_fast_symlink_inode_operations; - else - inode->i_op = &ocfs2_symlink_inode_operations; + inode->i_op = &ocfs2_symlink_inode_operations; i_size_write(inode, le64_to_cpu(fe->i_size)); break; default: diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index a9856e3..9f39c64 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -1724,15 +1724,16 @@ static int ocfs2_symlink(struct inode *dir, fe = (struct ocfs2_dinode *) new_fe_bh->b_data; inode->i_rdev = 0; newsize = l - 1; + inode->i_op = &ocfs2_symlink_inode_operations; if (l > ocfs2_fast_symlink_chars(sb)) { u32 offset = 0; - inode->i_op = &ocfs2_symlink_inode_operations; status = dquot_alloc_space_nodirty(inode, ocfs2_clusters_to_bytes(osb->sb, 1)); if (status) goto bail; did_quota = 1; + inode->i_mapping->a_ops = &ocfs2_aops; status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, new_fe_bh, handle, data_ac, NULL, @@ -1750,7 +1751,7 @@ static int ocfs2_symlink(struct inode *dir, i_size_write(inode, newsize); inode->i_blocks = ocfs2_inode_sector_count(inode); } else { - inode->i_op = &ocfs2_fast_symlink_inode_operations; + inode->i_mapping->a_ops = &ocfs2_fast_symlink_aops; memcpy((char *) fe->id2.i_symlink, symname, l); i_size_write(inode, newsize); inode->i_blocks = 0; diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 5d22872..f1fbb4b 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -54,101 +54,40 @@ #include "buffer_head_io.h" -static char *ocfs2_fast_symlink_getlink(struct inode *inode, - struct buffer_head **bh) +static int ocfs2_fast_symlink_readpage(struct file *unused, struct page *page) { - int status; - char *link = NULL; + struct inode *inode = page->mapping->host; + struct buffer_head *bh; + int status = ocfs2_read_inode_block(inode, &bh); struct ocfs2_dinode *fe; + const char *link; + void *kaddr; + size_t len; - status = ocfs2_read_inode_block(inode, bh); if (status < 0) { mlog_errno(status); - link = ERR_PTR(status); - goto bail; + return status; } - fe = (struct ocfs2_dinode *) (*bh)->b_data; + fe = (struct ocfs2_dinode *) bh->b_data; link = (char *) fe->id2.i_symlink; -bail: - - return link; -} - -static int ocfs2_readlink(struct dentry *dentry, - char __user *buffer, - int buflen) -{ - int ret; - char *link; - struct buffer_head *bh = NULL; - struct inode *inode = dentry->d_inode; - - link = ocfs2_fast_symlink_getlink(inode, &bh); - if (IS_ERR(link)) { - ret = PTR_ERR(link); - goto out; - } - - /* - * Without vfsmount we can't update atime now, - * but we will update atime here ultimately. - */ - ret = vfs_readlink(dentry, buffer, buflen, link); - + /* will be less than a page size */ + len = strnlen(link, ocfs2_fast_symlink_chars(inode->i_sb)); + kaddr = kmap_atomic(page); + memcpy(kaddr, link, len + 1); + kunmap_atomic(kaddr); + SetPageUptodate(page); + unlock_page(page); brelse(bh); -out: - if (ret < 0) - mlog_errno(ret); - return ret; + return 0; } -static void *ocfs2_fast_follow_link(struct dentry *dentry, - struct nameidata *nd) -{ - int status = 0; - int len; - char *target, *link = ERR_PTR(-ENOMEM); - struct inode *inode = dentry->d_inode; - struct buffer_head *bh = NULL; - - BUG_ON(!ocfs2_inode_is_fast_symlink(inode)); - target = ocfs2_fast_symlink_getlink(inode, &bh); - if (IS_ERR(target)) { - status = PTR_ERR(target); - mlog_errno(status); - goto bail; - } - - /* Fast symlinks can't be large */ - len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb)); - link = kzalloc(len + 1, GFP_NOFS); - if (!link) { - status = -ENOMEM; - mlog_errno(status); - goto bail; - } - - memcpy(link, target, len); - -bail: - nd_set_link(nd, status ? ERR_PTR(status) : link); - brelse(bh); - - if (status) - mlog_errno(status); - return NULL; -} - -static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) -{ - char *link = nd_get_link(nd); - if (!IS_ERR(link)) - kfree(link); -} +const struct address_space_operations ocfs2_fast_symlink_aops = { + .readpage = ocfs2_fast_symlink_readpage, +}; const struct inode_operations ocfs2_symlink_inode_operations = { - .readlink = page_readlink, + .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, .getattr = ocfs2_getattr, @@ -159,15 +98,3 @@ const struct inode_operations ocfs2_symlink_inode_operations = { .removexattr = generic_removexattr, .fiemap = ocfs2_fiemap, }; -const struct inode_operations ocfs2_fast_symlink_inode_operations = { - .readlink = ocfs2_readlink, - .follow_link = ocfs2_fast_follow_link, - .put_link = ocfs2_fast_put_link, - .getattr = ocfs2_getattr, - .setattr = ocfs2_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = ocfs2_listxattr, - .removexattr = generic_removexattr, - .fiemap = ocfs2_fiemap, -}; diff --git a/fs/ocfs2/symlink.h b/fs/ocfs2/symlink.h index 65a6c9c..71ee424 100644 --- a/fs/ocfs2/symlink.h +++ b/fs/ocfs2/symlink.h @@ -27,7 +27,7 @@ #define OCFS2_SYMLINK_H extern const struct inode_operations ocfs2_symlink_inode_operations; -extern const struct inode_operations ocfs2_fast_symlink_inode_operations; +extern const struct address_space_operations ocfs2_fast_symlink_aops; /* * Test whether an inode is a fast symlink. -- cgit v0.10.2 From 9dd6fa03ab31bb57cee4623a689d058d222fbe68 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 8 May 2012 13:29:45 +0930 Subject: lglock: remove online variants of lock Optimizing the slow paths adds a lot of complexity. If you need to grab every lock often, you have other problems. Signed-off-by: Rusty Russell Acked-by: Nick Piggin Signed-off-by: Al Viro diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 87f402c..0fdd821 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -28,8 +28,8 @@ #define br_lock_init(name) name##_lock_init() #define br_read_lock(name) name##_local_lock() #define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock_online() -#define br_write_unlock(name) name##_global_unlock_online() +#define br_write_lock(name) name##_global_lock() +#define br_write_unlock(name) name##_global_unlock() #define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) @@ -42,8 +42,6 @@ #define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) #define lg_global_lock(name) name##_global_lock() #define lg_global_unlock(name) name##_global_unlock() -#define lg_global_lock_online(name) name##_global_lock_online() -#define lg_global_unlock_online(name) name##_global_unlock_online() #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -68,36 +66,13 @@ extern void name##_local_unlock_cpu(int cpu); \ extern void name##_global_lock(void); \ extern void name##_global_unlock(void); \ - extern void name##_global_lock_online(void); \ - extern void name##_global_unlock_online(void); \ #define DEFINE_LGLOCK(name) \ \ DEFINE_SPINLOCK(name##_cpu_lock); \ - cpumask_t name##_cpus __read_mostly; \ DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ DEFINE_LGLOCK_LOCKDEP(name); \ \ - static int \ - name##_lg_cpu_callback(struct notifier_block *nb, \ - unsigned long action, void *hcpu) \ - { \ - switch (action & ~CPU_TASKS_FROZEN) { \ - case CPU_UP_PREPARE: \ - spin_lock(&name##_cpu_lock); \ - cpu_set((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - break; \ - case CPU_UP_CANCELED: case CPU_DEAD: \ - spin_lock(&name##_cpu_lock); \ - cpu_clear((unsigned long)hcpu, name##_cpus); \ - spin_unlock(&name##_cpu_lock); \ - } \ - return NOTIFY_OK; \ - } \ - static struct notifier_block name##_lg_cpu_notifier = { \ - .notifier_call = name##_lg_cpu_callback, \ - }; \ void name##_lock_init(void) { \ int i; \ LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ @@ -106,11 +81,6 @@ lock = &per_cpu(name##_lock, i); \ *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ } \ - register_hotcpu_notifier(&name##_lg_cpu_notifier); \ - get_online_cpus(); \ - for_each_online_cpu(i) \ - cpu_set(i, name##_cpus); \ - put_online_cpus(); \ } \ EXPORT_SYMBOL(name##_lock_init); \ \ @@ -150,30 +120,6 @@ } \ EXPORT_SYMBOL(name##_local_unlock_cpu); \ \ - void name##_global_lock_online(void) { \ - int i; \ - spin_lock(&name##_cpu_lock); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock_online); \ - \ - void name##_global_unlock_online(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_cpu(i, &name##_cpus) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - spin_unlock(&name##_cpu_lock); \ - } \ - EXPORT_SYMBOL(name##_global_unlock_online); \ - \ void name##_global_lock(void) { \ int i; \ preempt_disable(); \ -- cgit v0.10.2 From eea62f831b8030b0eeea8314eed73b6132d1de26 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 8 May 2012 13:32:24 +0930 Subject: brlocks/lglocks: turn into functions lglocks and brlocks are currently generated with some complicated macros in lglock.h. But there's no reason to not just use common utility functions and put all the data into a common data structure. Since there are at least two users it makes sense to share this code in a library. This is also easier maintainable than a macro forest. This will also make it later possible to dynamically allocate lglocks and also use them in modules (this would both still need some additional, but now straightforward, code) [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen Cc: Al Viro Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell Signed-off-by: Al Viro diff --git a/fs/file_table.c b/fs/file_table.c index 70f2a0f..f5c67c5 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -34,7 +34,6 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -DECLARE_LGLOCK(files_lglock); DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ diff --git a/fs/internal.h b/fs/internal.h index 9962c59..8040af4 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -56,7 +56,7 @@ extern int sb_prepare_remount_readonly(struct super_block *); extern void __init mnt_init(void); -DECLARE_BRLOCK(vfsmount_lock); +extern struct lglock vfsmount_lock; /* diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 0fdd821..f01e5f6 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -23,26 +23,17 @@ #include #include #include +#include /* can make br locks by using local lock for read side, global lock for write */ -#define br_lock_init(name) name##_lock_init() -#define br_read_lock(name) name##_local_lock() -#define br_read_unlock(name) name##_local_unlock() -#define br_write_lock(name) name##_global_lock() -#define br_write_unlock(name) name##_global_unlock() +#define br_lock_init(name) lg_lock_init(name, #name) +#define br_read_lock(name) lg_local_lock(name) +#define br_read_unlock(name) lg_local_unlock(name) +#define br_write_lock(name) lg_global_lock(name) +#define br_write_unlock(name) lg_global_unlock(name) -#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name) #define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name) - -#define lg_lock_init(name) name##_lock_init() -#define lg_local_lock(name) name##_local_lock() -#define lg_local_unlock(name) name##_local_unlock() -#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu) -#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu) -#define lg_global_lock(name) name##_global_lock() -#define lg_global_unlock(name) name##_global_unlock() - #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map @@ -57,90 +48,26 @@ #define DEFINE_LGLOCK_LOCKDEP(name) #endif - -#define DECLARE_LGLOCK(name) \ - extern void name##_lock_init(void); \ - extern void name##_local_lock(void); \ - extern void name##_local_unlock(void); \ - extern void name##_local_lock_cpu(int cpu); \ - extern void name##_local_unlock_cpu(int cpu); \ - extern void name##_global_lock(void); \ - extern void name##_global_unlock(void); \ +struct lglock { + arch_spinlock_t __percpu *lock; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lock_class_key lock_key; + struct lockdep_map lock_dep_map; +#endif +}; #define DEFINE_LGLOCK(name) \ - \ - DEFINE_SPINLOCK(name##_cpu_lock); \ - DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \ - DEFINE_LGLOCK_LOCKDEP(name); \ - \ - void name##_lock_init(void) { \ - int i; \ - LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - *lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \ - } \ - } \ - EXPORT_SYMBOL(name##_lock_init); \ - \ - void name##_local_lock(void) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock); \ - \ - void name##_local_unlock(void) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &__get_cpu_var(name##_lock); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock); \ - \ - void name##_local_lock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - preempt_disable(); \ - rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_lock(lock); \ - } \ - EXPORT_SYMBOL(name##_local_lock_cpu); \ - \ - void name##_local_unlock_cpu(int cpu) { \ - arch_spinlock_t *lock; \ - rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \ - lock = &per_cpu(name##_lock, cpu); \ - arch_spin_unlock(lock); \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_local_unlock_cpu); \ - \ - void name##_global_lock(void) { \ - int i; \ - preempt_disable(); \ - rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_lock(lock); \ - } \ - } \ - EXPORT_SYMBOL(name##_global_lock); \ - \ - void name##_global_unlock(void) { \ - int i; \ - rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_possible_cpu(i) { \ - arch_spinlock_t *lock; \ - lock = &per_cpu(name##_lock, i); \ - arch_spin_unlock(lock); \ - } \ - preempt_enable(); \ - } \ - EXPORT_SYMBOL(name##_global_unlock); + DEFINE_LGLOCK_LOCKDEP(name); \ + DEFINE_PER_CPU(arch_spinlock_t, name ## _lock) \ + = __ARCH_SPIN_LOCK_UNLOCKED; \ + struct lglock name = { .lock = &name ## _lock } + +void lg_lock_init(struct lglock *lg, char *name); +void lg_local_lock(struct lglock *lg); +void lg_local_unlock(struct lglock *lg); +void lg_local_lock_cpu(struct lglock *lg, int cpu); +void lg_local_unlock_cpu(struct lglock *lg, int cpu); +void lg_global_lock(struct lglock *lg); +void lg_global_unlock(struct lglock *lg); + #endif diff --git a/kernel/Makefile b/kernel/Makefile index 6c07f30..296132c 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o cred.o \ - async.o range.o groups.o + async.o range.o groups.o lglock.o ifdef CONFIG_FUNCTION_TRACER # Do not trace debug files and internal ftrace files diff --git a/kernel/lglock.c b/kernel/lglock.c new file mode 100644 index 0000000..6535a66 --- /dev/null +++ b/kernel/lglock.c @@ -0,0 +1,89 @@ +/* See include/linux/lglock.h for description */ +#include +#include +#include +#include + +/* + * Note there is no uninit, so lglocks cannot be defined in + * modules (but it's fine to use them from there) + * Could be added though, just undo lg_lock_init + */ + +void lg_lock_init(struct lglock *lg, char *name) +{ + LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0); +} +EXPORT_SYMBOL(lg_lock_init); + +void lg_local_lock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock); + +void lg_local_unlock(struct lglock *lg) +{ + arch_spinlock_t *lock; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = this_cpu_ptr(lg->lock); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock); + +void lg_local_lock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + preempt_disable(); + rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_lock(lock); +} +EXPORT_SYMBOL(lg_local_lock_cpu); + +void lg_local_unlock_cpu(struct lglock *lg, int cpu) +{ + arch_spinlock_t *lock; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + lock = per_cpu_ptr(lg->lock, cpu); + arch_spin_unlock(lock); + preempt_enable(); +} +EXPORT_SYMBOL(lg_local_unlock_cpu); + +void lg_global_lock(struct lglock *lg) +{ + int i; + + preempt_disable(); + rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_lock(lock); + } +} +EXPORT_SYMBOL(lg_global_lock); + +void lg_global_unlock(struct lglock *lg) +{ + int i; + + rwlock_release(&lg->lock_dep_map, 1, _RET_IP_); + for_each_possible_cpu(i) { + arch_spinlock_t *lock; + lock = per_cpu_ptr(lg->lock, i); + arch_spin_unlock(lock); + } + preempt_enable(); +} +EXPORT_SYMBOL(lg_global_unlock); -- cgit v0.10.2 From 962830df366b66e71849040770ae6ba55a8b4aec Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 8 May 2012 13:32:02 +0930 Subject: brlocks/lglocks: API cleanups lglocks and brlocks are currently generated with some complicated macros in lglock.h. But there's no reason to not just use common utility functions and put all the data into a common data structure. In preparation, this patch changes the API to look more like normal function calls with pointers, not magic macros. The patch is rather large because I move over all users in one go to keep it bisectable. This impacts the VFS somewhat in terms of lines changed. But no actual behaviour change. [akpm@linux-foundation.org: checkpatch fixes] Signed-off-by: Andi Kleen Cc: Al Viro Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Rusty Russell Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 4435d8b..4046904 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2575,7 +2575,7 @@ static int prepend_path(const struct path *path, bool slash = false; int error = 0; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -2606,7 +2606,7 @@ static int prepend_path(const struct path *path, error = prepend(buffer, buflen, "/", 1); out: - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return error; global_root: diff --git a/fs/file_table.c b/fs/file_table.c index f5c67c5..a305d9e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -420,9 +420,9 @@ static inline void __file_sb_list_add(struct file *file, struct super_block *sb) */ void file_sb_list_add(struct file *file, struct super_block *sb) { - lg_local_lock(files_lglock); + lg_local_lock(&files_lglock); __file_sb_list_add(file, sb); - lg_local_unlock(files_lglock); + lg_local_unlock(&files_lglock); } /** @@ -435,9 +435,9 @@ void file_sb_list_add(struct file *file, struct super_block *sb) void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - lg_local_lock_cpu(files_lglock, file_list_cpu(file)); + lg_local_lock_cpu(&files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); + lg_local_unlock_cpu(&files_lglock, file_list_cpu(file)); } } @@ -484,7 +484,7 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - lg_global_lock(files_lglock); + lg_global_lock(&files_lglock); do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) @@ -501,12 +501,12 @@ retry: file_release_write(f); mnt = mntget(f->f_path.mnt); /* This can sleep, so we can't hold the spinlock. */ - lg_global_unlock(files_lglock); + lg_global_unlock(&files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; } while_file_list_for_each_entry; - lg_global_unlock(files_lglock); + lg_global_unlock(&files_lglock); } void __init files_init(unsigned long mempages) @@ -524,6 +524,6 @@ void __init files_init(unsigned long mempages) n = (mempages * (PAGE_SIZE / 1024)) / 10; files_stat.max_files = max_t(unsigned long, n, NR_FILE); files_defer_init(); - lg_lock_init(files_lglock); + lg_lock_init(&files_lglock, "files_lglock"); percpu_counter_init(&nr_files, 0); } diff --git a/fs/namei.c b/fs/namei.c index c651f02..93ac932 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -449,7 +449,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); nd->flags &= ~LOOKUP_RCU; return 0; @@ -507,14 +507,14 @@ static int complete_walk(struct nameidata *nd) if (unlikely(!__d_rcu_to_refcount(dentry, nd->seq))) { spin_unlock(&dentry->d_lock); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return -ECHILD; } BUG_ON(nd->inode != dentry->d_inode); spin_unlock(&dentry->d_lock); mntget(nd->path.mnt); rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -681,15 +681,15 @@ int follow_up(struct path *path) struct mount *parent; struct dentry *mountpoint; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); parent = mnt->mnt_parent; if (&parent->mnt == path->mnt) { - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return 0; } mntget(&parent->mnt); mountpoint = dget(mnt->mnt_mountpoint); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -947,7 +947,7 @@ failed: if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return -ECHILD; } @@ -1265,7 +1265,7 @@ static void terminate_walk(struct nameidata *nd) if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); } } @@ -1620,7 +1620,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); } else { @@ -1633,7 +1633,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (*name=='/') { if (flags & LOOKUP_RCU) { - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); set_root_rcu(nd); } else { @@ -1646,7 +1646,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, struct fs_struct *fs = current->fs; unsigned seq; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); do { @@ -1682,7 +1682,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, if (fput_needed) *fp = file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); rcu_read_lock(); } else { path_get(&file->f_path); diff --git a/fs/namespace.c b/fs/namespace.c index e608199..224aff1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -397,7 +397,7 @@ static int mnt_make_readonly(struct mount *mnt) { int ret = 0; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -431,15 +431,15 @@ static int mnt_make_readonly(struct mount *mnt) */ smp_wmb(); mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return ret; } static void __mnt_unmake_readonly(struct mount *mnt) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt->mnt.mnt_flags &= ~MNT_READONLY; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } int sb_prepare_remount_readonly(struct super_block *sb) @@ -451,7 +451,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (atomic_long_read(&sb->s_remove_count)) return -EBUSY; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_for_each_entry(mnt, &sb->s_mounts, mnt_instance) { if (!(mnt->mnt.mnt_flags & MNT_READONLY)) { mnt->mnt.mnt_flags |= MNT_WRITE_HOLD; @@ -473,7 +473,7 @@ int sb_prepare_remount_readonly(struct super_block *sb) if (mnt->mnt.mnt_flags & MNT_WRITE_HOLD) mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return err; } @@ -522,14 +522,14 @@ struct vfsmount *lookup_mnt(struct path *path) { struct mount *child_mnt; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); child_mnt = __lookup_mnt(path->mnt, path->dentry, 1); if (child_mnt) { mnt_add_count(child_mnt, 1); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return &child_mnt->mnt; } else { - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return NULL; } } @@ -714,9 +714,9 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void mnt->mnt.mnt_sb = root->d_sb; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &root->d_sb->s_mounts); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return &mnt->mnt; } EXPORT_SYMBOL_GPL(vfs_kern_mount); @@ -745,9 +745,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, mnt->mnt.mnt_root = dget(root); mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&mnt->mnt_instance, &sb->s_mounts); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (flag & CL_SLAVE) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); @@ -803,35 +803,36 @@ static void mntput_no_expire(struct mount *mnt) { put_again: #ifdef CONFIG_SMP - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); if (likely(atomic_read(&mnt->mnt_longterm))) { mnt_add_count(mnt, -1); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return; } - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt_add_count(mnt, -1); if (mnt_get_count(mnt)) { - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return; } #else mnt_add_count(mnt, -1); if (likely(mnt_get_count(mnt))) return; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); #endif if (unlikely(mnt->mnt_pinned)) { mnt_add_count(mnt, mnt->mnt_pinned + 1); mnt->mnt_pinned = 0; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); acct_auto_close_mnt(&mnt->mnt); goto put_again; } + list_del(&mnt->mnt_instance); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); mntfree(mnt); } @@ -857,21 +858,21 @@ EXPORT_SYMBOL(mntget); void mnt_pin(struct vfsmount *mnt) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); real_mount(mnt)->mnt_pinned++; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *m) { struct mount *mnt = real_mount(m); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (mnt->mnt_pinned) { mnt_add_count(mnt, 1); mnt->mnt_pinned--; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } EXPORT_SYMBOL(mnt_unpin); @@ -988,12 +989,12 @@ int may_umount_tree(struct vfsmount *m) BUG_ON(!m); /* write lock needed for mnt_get_count */ - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += mnt_get_count(p); minimum_refs += 2; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -1020,10 +1021,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (propagate_mount_busy(real_mount(mnt), 2)) ret = 0; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1040,13 +1041,13 @@ void release_mounts(struct list_head *head) struct dentry *dentry; struct mount *m; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt.mnt_root; mnt->mnt_parent = mnt; m->mnt_ghosts--; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); dput(dentry); mntput(&m->mnt); } @@ -1112,12 +1113,12 @@ static int do_umount(struct mount *mnt, int flags) * probably don't strictly need the lock here if we examined * all race cases, but it's a slowpath. */ - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (mnt_get_count(mnt) != 2) { - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return -EBUSY; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); if (!xchg(&mnt->mnt_expiry_mark, 1)) return -EAGAIN; @@ -1159,7 +1160,7 @@ static int do_umount(struct mount *mnt, int flags) } down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); event++; if (!(flags & MNT_DETACH)) @@ -1171,7 +1172,7 @@ static int do_umount(struct mount *mnt, int flags) umount_tree(mnt, 1, &umount_list); retval = 0; } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); return retval; @@ -1286,19 +1287,19 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, q = clone_mnt(p, p->mnt.mnt_root, flag); if (!q) goto Enomem; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(res, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); } return NULL; @@ -1318,9 +1319,9 @@ void drop_collected_mounts(struct vfsmount *mnt) { LIST_HEAD(umount_list); down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(real_mount(mnt), 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); } @@ -1448,7 +1449,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, if (err) goto out_cleanup_ids; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1467,7 +1468,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, list_del_init(&child->mnt_hash); commit_tree(child); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); return 0; @@ -1565,10 +1566,10 @@ static int do_change_type(struct path *path, int flag) goto out_unlock; } - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); out_unlock: up_write(&namespace_sem); @@ -1617,9 +1618,9 @@ static int do_loopback(struct path *path, char *old_name, err = graft_tree(mnt, path); if (err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(mnt, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } out2: unlock_mount(path); @@ -1677,16 +1678,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK; mnt->mnt.mnt_flags = mnt_flags; - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } up_write(&sb->s_umount); if (!err) { - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); touch_mnt_namespace(mnt->mnt_ns); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); } return err; } @@ -1893,9 +1894,9 @@ fail: /* remove m from any expiration list it may be on */ if (!list_empty(&mnt->mnt_expire)) { down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_del_init(&mnt->mnt_expire); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); } mntput(m); @@ -1911,11 +1912,11 @@ fail: void mnt_set_expiry(struct vfsmount *mnt, struct list_head *expiry_list) { down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&real_mount(mnt)->mnt_expire, expiry_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); } EXPORT_SYMBOL(mnt_set_expiry); @@ -1935,7 +1936,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) return; down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -1954,7 +1955,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1, &umounts); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umounts); @@ -2218,9 +2219,9 @@ void mnt_make_shortterm(struct vfsmount *m) struct mount *mnt = real_mount(m); if (atomic_add_unless(&mnt->mnt_longterm, -1, 1)) return; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); atomic_dec(&mnt->mnt_longterm); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); #endif } @@ -2250,9 +2251,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, return ERR_PTR(-ENOMEM); } new_ns->root = new; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); list_add_tail(&new_ns->list, &new->mnt_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2416,9 +2417,9 @@ bool is_path_reachable(struct mount *mnt, struct dentry *dentry, int path_is_under(struct path *path1, struct path *path2) { int res; - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); res = is_path_reachable(real_mount(path1->mnt), path1->dentry, path2); - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return res; } EXPORT_SYMBOL(path_is_under); @@ -2505,7 +2506,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* make sure we can reach put_old from new_root */ if (!is_path_reachable(real_mount(old.mnt), old.dentry, &new)) goto out4; - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); /* mount old root on put_old */ @@ -2513,7 +2514,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new_mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); chroot_fs_refs(&root, &new); error = 0; out4: @@ -2576,7 +2577,7 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); - br_lock_init(vfsmount_lock); + br_lock_init(&vfsmount_lock); err = sysfs_init(); if (err) @@ -2596,9 +2597,9 @@ void put_mnt_ns(struct mnt_namespace *ns) if (!atomic_dec_and_test(&ns->count)) return; down_write(&namespace_sem); - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); umount_tree(ns->root, 0, &umount_list); - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); kfree(ns); diff --git a/fs/pnode.c b/fs/pnode.c index ab5fa9e..bed378d 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -257,12 +257,12 @@ int propagate_mnt(struct mount *dest_mnt, struct dentry *dest_dentry, prev_src_mnt = child; } out: - br_write_lock(vfsmount_lock); + br_write_lock(&vfsmount_lock); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct mount, mnt_hash); umount_tree(child, 0, &umount_list); } - br_write_unlock(vfsmount_lock); + br_write_unlock(&vfsmount_lock); release_mounts(&umount_list); return ret; } diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 1241285..5e289a7 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -23,12 +23,12 @@ static unsigned mounts_poll(struct file *file, poll_table *wait) poll_wait(file, &p->ns->poll, wait); - br_read_lock(vfsmount_lock); + br_read_lock(&vfsmount_lock); if (p->m.poll_event != ns->event) { p->m.poll_event = ns->event; res |= POLLERR | POLLPRI; } - br_read_unlock(vfsmount_lock); + br_read_unlock(&vfsmount_lock); return res; } -- cgit v0.10.2 From 16767652380074fd2a87ae7486e0fe0b609daf00 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 09:54:29 -0400 Subject: get rid of idiotic misplaced __kernel_mode_t in ncfps kernel-private data structure Signed-off-by: Al Viro diff --git a/fs/ncpfs/ncp_fs_sb.h b/fs/ncpfs/ncp_fs_sb.h index 4af803f..54cc0cd 100644 --- a/fs/ncpfs/ncp_fs_sb.h +++ b/fs/ncpfs/ncp_fs_sb.h @@ -23,17 +23,17 @@ struct ncp_mount_data_kernel { unsigned long flags; /* NCP_MOUNT_* flags */ unsigned int int_flags; /* internal flags */ #define NCP_IMOUNT_LOGGEDIN_POSSIBLE 0x0001 - __kernel_uid32_t mounted_uid; /* Who may umount() this filesystem? */ + uid_t mounted_uid; /* Who may umount() this filesystem? */ struct pid *wdog_pid; /* Who cares for our watchdog packets? */ unsigned int ncp_fd; /* The socket to the ncp port */ unsigned int time_out; /* How long should I wait after sending a NCP request? */ unsigned int retry_count; /* And how often should I retry? */ unsigned char mounted_vol[NCP_VOLNAME_LEN + 1]; - __kernel_uid32_t uid; - __kernel_gid32_t gid; - __kernel_mode_t file_mode; - __kernel_mode_t dir_mode; + uid_t uid; + gid_t gid; + umode_t file_mode; + umode_t dir_mode; int info_fd; }; -- cgit v0.10.2 From 726592a9be0bdf919399d3dfa633f8e2d69cbf13 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:00:52 -0400 Subject: mode_t whack-a-mole: ->is_visible() returns umode_t... Signed-off-by: Al Viro diff --git a/drivers/base/soc.c b/drivers/base/soc.c index ba29b2e..72b5e72 100644 --- a/drivers/base/soc.c +++ b/drivers/base/soc.c @@ -42,7 +42,7 @@ struct device *soc_device_to_device(struct soc_device *soc_dev) return &soc_dev->dev; } -static mode_t soc_attribute_mode(struct kobject *kobj, +static umode_t soc_attribute_mode(struct kobject *kobj, struct attribute *attr, int index) { -- cgit v0.10.2 From dcc62b6b38334075271eaffb1dc42cd47ceb5692 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:13:52 -0400 Subject: mips: get rid of nlink_t, use explictly-sized type (__u32 in all cases) Signed-off-by: Al Viro diff --git a/arch/mips/include/asm/stat.h b/arch/mips/include/asm/stat.h index 6e00f75..fe9a4c3 100644 --- a/arch/mips/include/asm/stat.h +++ b/arch/mips/include/asm/stat.h @@ -20,7 +20,7 @@ struct stat { long st_pad1[3]; /* Reserved for network id */ ino_t st_ino; mode_t st_mode; - nlink_t st_nlink; + __u32 st_nlink; uid_t st_uid; gid_t st_gid; unsigned st_rdev; @@ -55,7 +55,7 @@ struct stat64 { unsigned long long st_ino; mode_t st_mode; - nlink_t st_nlink; + __u32 st_nlink; uid_t st_uid; gid_t st_gid; @@ -96,7 +96,7 @@ struct stat { unsigned long st_ino; mode_t st_mode; - nlink_t st_nlink; + __u32 st_nlink; uid_t st_uid; gid_t st_gid; -- cgit v0.10.2 From e57f93cc53b772b2049222410cf6a141a724529a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:16:30 -0400 Subject: powerpc: get rid of nlink_t uses, switch to explicitly-sized type Signed-off-by: Al Viro diff --git a/arch/powerpc/include/asm/stat.h b/arch/powerpc/include/asm/stat.h index e4edc51..10cfb55 100644 --- a/arch/powerpc/include/asm/stat.h +++ b/arch/powerpc/include/asm/stat.h @@ -30,11 +30,11 @@ struct stat { unsigned long st_dev; ino_t st_ino; #ifdef __powerpc64__ - nlink_t st_nlink; + unsigned short st_nlink; mode_t st_mode; #else mode_t st_mode; - nlink_t st_nlink; + unsigned short st_nlink; #endif uid_t st_uid; gid_t st_gid; -- cgit v0.10.2 From 1dfb5751a4de7c6a57a5602e8e2b87267cfc8c81 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:17:45 -0400 Subject: parisc: get rid of nlink_t, switch to explicitly-sized type Signed-off-by: Al Viro diff --git a/arch/parisc/include/asm/stat.h b/arch/parisc/include/asm/stat.h index 9d5fbbc..d76fbda 100644 --- a/arch/parisc/include/asm/stat.h +++ b/arch/parisc/include/asm/stat.h @@ -7,7 +7,7 @@ struct stat { unsigned int st_dev; /* dev_t is 32 bits on parisc */ ino_t st_ino; /* 32 bits */ mode_t st_mode; /* 16 bits */ - nlink_t st_nlink; /* 16 bits */ + unsigned short st_nlink; /* 16 bits */ unsigned short st_reserved1; /* old st_uid */ unsigned short st_reserved2; /* old st_gid */ unsigned int st_rdev; @@ -42,7 +42,7 @@ struct hpux_stat64 { unsigned int st_dev; /* dev_t is 32 bits on parisc */ ino_t st_ino; /* 32 bits */ mode_t st_mode; /* 16 bits */ - nlink_t st_nlink; /* 16 bits */ + unsigned short st_nlink; /* 16 bits */ unsigned short st_reserved1; /* old st_uid */ unsigned short st_reserved2; /* old st_gid */ unsigned int st_rdev; -- cgit v0.10.2 From bb8ac181a5cf50458a0d83b4460790badc9fdc16 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 19 May 2012 10:25:23 -0400 Subject: bury __kernel_nlink_t, make internal nlink_t consistent Signed-off-by: Al Viro diff --git a/arch/alpha/include/asm/posix_types.h b/arch/alpha/include/asm/posix_types.h index 24779fc..5a8a483 100644 --- a/arch/alpha/include/asm/posix_types.h +++ b/arch/alpha/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned int __kernel_ino_t; #define __kernel_ino_t __kernel_ino_t -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #include diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h index efdf990..d2de9cb 100644 --- a/arch/arm/include/asm/posix_types.h +++ b/arch/arm/include/asm/posix_types.h @@ -22,9 +22,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/avr32/include/asm/posix_types.h b/arch/avr32/include/asm/posix_types.h index 74667bf..9ba9e74 100644 --- a/arch/avr32/include/asm/posix_types.h +++ b/arch/avr32/include/asm/posix_types.h @@ -17,9 +17,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/blackfin/include/asm/posix_types.h b/arch/blackfin/include/asm/posix_types.h index 41bc187..1bd3436 100644 --- a/arch/blackfin/include/asm/posix_types.h +++ b/arch/blackfin/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned int __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/cris/include/asm/posix_types.h b/arch/cris/include/asm/posix_types.h index 234891c..ce4e517 100644 --- a/arch/cris/include/asm/posix_types.h +++ b/arch/cris/include/asm/posix_types.h @@ -15,9 +15,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/frv/include/asm/posix_types.h b/arch/frv/include/asm/posix_types.h index 3f34cb4..fe512af 100644 --- a/arch/frv/include/asm/posix_types.h +++ b/arch/frv/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/h8300/include/asm/posix_types.h b/arch/h8300/include/asm/posix_types.h index bc4c34e..91e62ba 100644 --- a/arch/h8300/include/asm/posix_types.h +++ b/arch/h8300/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/ia64/include/asm/posix_types.h b/arch/ia64/include/asm/posix_types.h index 7323ab9..99ee1d6 100644 --- a/arch/ia64/include/asm/posix_types.h +++ b/arch/ia64/include/asm/posix_types.h @@ -1,9 +1,6 @@ #ifndef _ASM_IA64_POSIX_TYPES_H #define _ASM_IA64_POSIX_TYPES_H -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #include diff --git a/arch/m32r/include/asm/posix_types.h b/arch/m32r/include/asm/posix_types.h index 0195850..236de26 100644 --- a/arch/m32r/include/asm/posix_types.h +++ b/arch/m32r/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/m68k/include/asm/posix_types.h b/arch/m68k/include/asm/posix_types.h index 6373093..cf4dbf7 100644 --- a/arch/m68k/include/asm/posix_types.h +++ b/arch/m68k/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/mips/include/asm/posix_types.h b/arch/mips/include/asm/posix_types.h index e0308dc..fa03ec3 100644 --- a/arch/mips/include/asm/posix_types.h +++ b/arch/mips/include/asm/posix_types.h @@ -17,11 +17,6 @@ * assume GCC is being used. */ -#if (_MIPS_SZLONG == 64) -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t -#endif - typedef long __kernel_daddr_t; #define __kernel_daddr_t __kernel_daddr_t diff --git a/arch/mn10300/include/asm/posix_types.h b/arch/mn10300/include/asm/posix_types.h index ab50618..d31eeea 100644 --- a/arch/mn10300/include/asm/posix_types.h +++ b/arch/mn10300/include/asm/posix_types.h @@ -20,9 +20,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/parisc/include/asm/posix_types.h b/arch/parisc/include/asm/posix_types.h index 5212b03..b934425 100644 --- a/arch/parisc/include/asm/posix_types.h +++ b/arch/parisc/include/asm/posix_types.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/arch/powerpc/include/asm/posix_types.h b/arch/powerpc/include/asm/posix_types.h index f139325..2958c5b 100644 --- a/arch/powerpc/include/asm/posix_types.h +++ b/arch/powerpc/include/asm/posix_types.h @@ -16,9 +16,6 @@ typedef int __kernel_ssize_t; typedef long __kernel_ptrdiff_t; #define __kernel_size_t __kernel_size_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t #endif diff --git a/arch/s390/include/asm/posix_types.h b/arch/s390/include/asm/posix_types.h index edf8527..7be104c 100644 --- a/arch/s390/include/asm/posix_types.h +++ b/arch/s390/include/asm/posix_types.h @@ -24,7 +24,6 @@ typedef unsigned short __kernel_old_dev_t; typedef unsigned long __kernel_ino_t; typedef unsigned short __kernel_mode_t; -typedef unsigned short __kernel_nlink_t; typedef unsigned short __kernel_ipc_pid_t; typedef unsigned short __kernel_uid_t; typedef unsigned short __kernel_gid_t; @@ -35,7 +34,6 @@ typedef int __kernel_ptrdiff_t; typedef unsigned int __kernel_ino_t; typedef unsigned int __kernel_mode_t; -typedef unsigned int __kernel_nlink_t; typedef int __kernel_ipc_pid_t; typedef unsigned int __kernel_uid_t; typedef unsigned int __kernel_gid_t; @@ -47,7 +45,6 @@ typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ #define __kernel_ino_t __kernel_ino_t #define __kernel_mode_t __kernel_mode_t -#define __kernel_nlink_t __kernel_nlink_t #define __kernel_ipc_pid_t __kernel_ipc_pid_t #define __kernel_uid_t __kernel_uid_t #define __kernel_gid_t __kernel_gid_t diff --git a/arch/sh/include/asm/posix_types_32.h b/arch/sh/include/asm/posix_types_32.h index abda584..ba0bdc4 100644 --- a/arch/sh/include/asm/posix_types_32.h +++ b/arch/sh/include/asm/posix_types_32.h @@ -3,8 +3,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t typedef unsigned short __kernel_uid_t; diff --git a/arch/sh/include/asm/posix_types_64.h b/arch/sh/include/asm/posix_types_64.h index fcda07b..244f7e9 100644 --- a/arch/sh/include/asm/posix_types_64.h +++ b/arch/sh/include/asm/posix_types_64.h @@ -3,8 +3,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t typedef unsigned short __kernel_uid_t; diff --git a/arch/sparc/include/asm/posix_types.h b/arch/sparc/include/asm/posix_types.h index 3070f25..156220e 100644 --- a/arch/sparc/include/asm/posix_types.h +++ b/arch/sparc/include/asm/posix_types.h @@ -9,8 +9,6 @@ #if defined(__sparc__) && defined(__arch64__) /* sparc 64 bit */ -typedef unsigned int __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t typedef unsigned short __kernel_old_uid_t; typedef unsigned short __kernel_old_gid_t; @@ -38,9 +36,6 @@ typedef unsigned short __kernel_gid_t; typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef long __kernel_daddr_t; #define __kernel_daddr_t __kernel_daddr_t diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index 69adc08..6e74450 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -44,7 +44,6 @@ typedef __kernel_uid32_t __compat_gid32_t; typedef __kernel_mode_t compat_mode_t; typedef __kernel_dev_t compat_dev_t; typedef __kernel_loff_t compat_loff_t; -typedef __kernel_nlink_t compat_nlink_t; typedef __kernel_ipc_pid_t compat_ipc_pid_t; typedef __kernel_daddr_t compat_daddr_t; typedef __kernel_fsid_t compat_fsid_t; diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h index 99f262e..8e52505 100644 --- a/arch/x86/include/asm/posix_types_32.h +++ b/arch/x86/include/asm/posix_types_32.h @@ -10,9 +10,6 @@ typedef unsigned short __kernel_mode_t; #define __kernel_mode_t __kernel_mode_t -typedef unsigned short __kernel_nlink_t; -#define __kernel_nlink_t __kernel_nlink_t - typedef unsigned short __kernel_ipc_pid_t; #define __kernel_ipc_pid_t __kernel_ipc_pid_t diff --git a/include/asm-generic/posix_types.h b/include/asm-generic/posix_types.h index 91d44bd..fe74fcc 100644 --- a/include/asm-generic/posix_types.h +++ b/include/asm-generic/posix_types.h @@ -23,10 +23,6 @@ typedef __kernel_ulong_t __kernel_ino_t; typedef unsigned int __kernel_mode_t; #endif -#ifndef __kernel_nlink_t -typedef __kernel_ulong_t __kernel_nlink_t; -#endif - #ifndef __kernel_pid_t typedef int __kernel_pid_t; #endif diff --git a/include/linux/types.h b/include/linux/types.h index 7f480db..9c1bd53 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -25,7 +25,7 @@ typedef __kernel_dev_t dev_t; typedef __kernel_ino_t ino_t; typedef __kernel_mode_t mode_t; typedef unsigned short umode_t; -typedef __kernel_nlink_t nlink_t; +typedef __u32 nlink_t; typedef __kernel_off_t off_t; typedef __kernel_pid_t pid_t; typedef __kernel_daddr_t daddr_t; -- cgit v0.10.2 From 3a0c0e26b64505522b8bce8578a6e61609c31318 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 7 May 2012 19:56:50 +0300 Subject: jffs2: remove lock_super We do not need 'lock_super()'/'unlock_super()' in JFFS2 - kill them. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index f9916f3..3422a2d 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -67,15 +67,12 @@ static void jffs2_write_super(struct super_block *sb) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - lock_super(sb); sb->s_dirt = 0; if (!(sb->s_flags & MS_RDONLY)) { jffs2_dbg(1, "%s()\n", __func__); jffs2_flush_wbuf_gc(c, 0); } - - unlock_super(sb); } static const char *jffs2_compr_name(unsigned int compr) -- cgit v0.10.2 From d0490eea14cc9221cb8343091c216fb862d19958 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 7 May 2012 19:56:51 +0300 Subject: jffs2: remove unnecessary GC pass on umount We do not need to call 'jffs2_write_super()' on unmount. This function causes a GC pass to make sure the current contents is pushed out with the data which we already have on the media. But this is not needed on unmount and only slows unmount down unnecessarily. It is enough to just sync the write-buffer. This call was added by one of the generic VFS rework patch-sets, see 8c85e125124a473d6f3e9bb187b0b84207f81d91. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 3422a2d..d3dc9d8 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -316,9 +316,6 @@ static void jffs2_put_super (struct super_block *sb) jffs2_dbg(2, "%s()\n", __func__); - if (sb->s_dirt) - jffs2_write_super(sb); - mutex_lock(&c->alloc_sem); jffs2_flush_wbuf_pad(c); mutex_unlock(&c->alloc_sem); -- cgit v0.10.2 From 06688905cc36b86c700f376e9bc9bb68bc67d801 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 7 May 2012 19:56:52 +0300 Subject: jffs2: remove unnecessary GC pass on sync We do not need to call 'jffs2_write_super()' on sync. This function causes a GC pass to make sure the current contents is pushed out with the data which we already have on the media. But this is not needed on unmount and only slows sync down unnecessarily. It is enough to just sync the write-buffer. This call was added by one of the generic VFS rework patch-sets, see d579ed00aa96a7f7486978540a0d7cecaff742ae. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index d3dc9d8..dc366c0 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -110,8 +110,6 @@ static int jffs2_sync_fs(struct super_block *sb, int wait) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - jffs2_write_super(sb); - mutex_lock(&c->alloc_sem); jffs2_flush_wbuf_pad(c); mutex_unlock(&c->alloc_sem); -- cgit v0.10.2 From 8bdc81c5069e43755d6e59e5e990e21ca200e8e2 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 7 May 2012 19:56:53 +0300 Subject: jffs2: get rid of jffs2_sync_super Currently JFFS2 file-system maps the VFS "superblock" abstraction to the write-buffer. Namely, it uses VFS services to synchronize the write-buffer periodically. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblock using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds no matter what. So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super' VFS service, and then remove it together with the kernel thread. This patch switches the JFFS2 write-buffer management from '->write_super()'/'->s_dirt' to a delayed work. Instead of setting the 's_dirt' flag we just schedule a delayed work for synchronizing the write-buffer. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 55a0c1d..44dca1f 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h @@ -126,6 +126,10 @@ struct jffs2_sb_info { struct jffs2_inodirty *wbuf_inodes; struct rw_semaphore wbuf_sem; /* Protects the write buffer */ + struct delayed_work wbuf_dwork; /* write-buffer write-out work */ + int wbuf_queued; /* non-zero delayed work is queued */ + spinlock_t wbuf_dwork_lock; /* protects wbuf_dwork and and wbuf_queued */ + unsigned char *oobbuf; int oobavail; /* How many bytes are available for JFFS2 in OOB */ #endif diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 1cd3aec..bcd983d 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -95,6 +95,7 @@ static inline void jffs2_init_inode_info(struct jffs2_inode_info *f) #define jffs2_ubivol(c) (0) #define jffs2_ubivol_setup(c) (0) #define jffs2_ubivol_cleanup(c) do {} while (0) +#define jffs2_dirty_trigger(c) do {} while (0) #else /* NAND and/or ECC'd NOR support present */ @@ -135,14 +136,10 @@ void jffs2_ubivol_cleanup(struct jffs2_sb_info *c); #define jffs2_nor_wbuf_flash(c) (c->mtd->type == MTD_NORFLASH && ! (c->mtd->flags & MTD_BIT_WRITEABLE)) int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c); void jffs2_nor_wbuf_flash_cleanup(struct jffs2_sb_info *c); +void jffs2_dirty_trigger(struct jffs2_sb_info *c); #endif /* WRITEBUFFER */ -static inline void jffs2_dirty_trigger(struct jffs2_sb_info *c) -{ - OFNI_BS_2SFFJ(c)->s_dirt = 1; -} - /* background.c */ int jffs2_start_garbage_collect_thread(struct jffs2_sb_info *c); void jffs2_stop_garbage_collect_thread(struct jffs2_sb_info *c); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index dc366c0..bc586f2 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -63,18 +63,6 @@ static void jffs2_i_init_once(void *foo) inode_init_once(&f->vfs_inode); } -static void jffs2_write_super(struct super_block *sb) -{ - struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - - sb->s_dirt = 0; - - if (!(sb->s_flags & MS_RDONLY)) { - jffs2_dbg(1, "%s()\n", __func__); - jffs2_flush_wbuf_gc(c, 0); - } -} - static const char *jffs2_compr_name(unsigned int compr) { switch (compr) { @@ -246,7 +234,6 @@ static const struct super_operations jffs2_super_operations = .alloc_inode = jffs2_alloc_inode, .destroy_inode =jffs2_destroy_inode, .put_super = jffs2_put_super, - .write_super = jffs2_write_super, .statfs = jffs2_statfs, .remount_fs = jffs2_remount_fs, .evict_inode = jffs2_evict_inode, diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 74d9be1..6f4529d 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "nodelist.h" @@ -85,7 +86,7 @@ static void jffs2_wbuf_dirties_inode(struct jffs2_sb_info *c, uint32_t ino) { struct jffs2_inodirty *new; - /* Mark the superblock dirty so that kupdated will flush... */ + /* Schedule delayed write-buffer write-out */ jffs2_dirty_trigger(c); if (jffs2_wbuf_pending_for_ino(c, ino)) @@ -1148,6 +1149,47 @@ int jffs2_write_nand_badblock(struct jffs2_sb_info *c, struct jffs2_eraseblock * return 1; } +static struct jffs2_sb_info *work_to_sb(struct work_struct *work) +{ + struct delayed_work *dwork; + + dwork = container_of(work, struct delayed_work, work); + return container_of(dwork, struct jffs2_sb_info, wbuf_dwork); +} + +static void delayed_wbuf_sync(struct work_struct *work) +{ + struct jffs2_sb_info *c = work_to_sb(work); + struct super_block *sb = OFNI_BS_2SFFJ(c); + + spin_lock(&c->wbuf_dwork_lock); + c->wbuf_queued = 0; + spin_unlock(&c->wbuf_dwork_lock); + + if (!(sb->s_flags & MS_RDONLY)) { + jffs2_dbg(1, "%s()\n", __func__); + jffs2_flush_wbuf_gc(c, 0); + } +} + +void jffs2_dirty_trigger(struct jffs2_sb_info *c) +{ + struct super_block *sb = OFNI_BS_2SFFJ(c); + unsigned long delay; + + if (sb->s_flags & MS_RDONLY) + return; + + spin_lock(&c->wbuf_dwork_lock); + if (!c->wbuf_queued) { + jffs2_dbg(1, "%s()\n", __func__); + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay); + c->wbuf_queued = 1; + } + spin_unlock(&c->wbuf_dwork_lock); +} + int jffs2_nand_flash_setup(struct jffs2_sb_info *c) { struct nand_ecclayout *oinfo = c->mtd->ecclayout; @@ -1169,6 +1211,8 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c) /* Initialise write buffer */ init_rwsem(&c->wbuf_sem); + spin_lock_init(&c->wbuf_dwork_lock); + INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; @@ -1207,8 +1251,8 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) { /* Initialize write buffer */ init_rwsem(&c->wbuf_sem); - - + spin_lock_init(&c->wbuf_dwork_lock); + INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->erasesize; /* Find a suitable c->sector_size @@ -1267,6 +1311,9 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) { /* Initialize write buffer */ init_rwsem(&c->wbuf_sem); + spin_lock_init(&c->wbuf_dwork_lock); + INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); + c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; @@ -1299,6 +1346,8 @@ int jffs2_ubivol_setup(struct jffs2_sb_info *c) { return 0; init_rwsem(&c->wbuf_sem); + spin_lock_init(&c->wbuf_dwork_lock); + INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync); c->wbuf_pagesize = c->mtd->writesize; c->wbuf_ofs = 0xFFFFFFFF; -- cgit v0.10.2 From 3ed37648e1cbf1bbebc200c6ea8fd8daf8325843 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 15 May 2012 14:57:33 +0800 Subject: fs: move file_remove_suid() to fs/inode.c file_remove_suid() is a generic function operates on struct file, it almost has no relations with file mapping, so move it to fs/inode.c. Cc: Alexander Viro Signed-off-by: Cong Wang Signed-off-by: Al Viro diff --git a/fs/inode.c b/fs/inode.c index 183ddd6..a79555e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1530,6 +1530,71 @@ void touch_atime(struct path *path) } EXPORT_SYMBOL(touch_atime); +/* + * The logic we want is + * + * if suid or (sgid and xgrp) + * remove privs + */ +int should_remove_suid(struct dentry *dentry) +{ + umode_t mode = dentry->d_inode->i_mode; + int kill = 0; + + /* suid always must be killed */ + if (unlikely(mode & S_ISUID)) + kill = ATTR_KILL_SUID; + + /* + * sgid without any exec bits is just a mandatory locking mark; leave + * it alone. If some exec bits are set, it's a real sgid; kill it. + */ + if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) + kill |= ATTR_KILL_SGID; + + if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode))) + return kill; + + return 0; +} +EXPORT_SYMBOL(should_remove_suid); + +static int __remove_suid(struct dentry *dentry, int kill) +{ + struct iattr newattrs; + + newattrs.ia_valid = ATTR_FORCE | kill; + return notify_change(dentry, &newattrs); +} + +int file_remove_suid(struct file *file) +{ + struct dentry *dentry = file->f_path.dentry; + struct inode *inode = dentry->d_inode; + int killsuid; + int killpriv; + int error = 0; + + /* Fast path for nothing security related */ + if (IS_NOSEC(inode)) + return 0; + + killsuid = should_remove_suid(dentry); + killpriv = security_inode_need_killpriv(dentry); + + if (killpriv < 0) + return killpriv; + if (killpriv) + error = security_inode_killpriv(dentry); + if (!error && killsuid) + error = __remove_suid(dentry, killsuid); + if (!error && (inode->i_sb->s_flags & MS_NOSEC)) + inode->i_flags |= S_NOSEC; + + return error; +} +EXPORT_SYMBOL(file_remove_suid); + /** * file_update_time - update mtime and ctime time * @file: file accessed diff --git a/mm/filemap.c b/mm/filemap.c index 79c4b2b..21e5abf 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1938,71 +1938,6 @@ struct page *read_cache_page(struct address_space *mapping, } EXPORT_SYMBOL(read_cache_page); -/* - * The logic we want is - * - * if suid or (sgid and xgrp) - * remove privs - */ -int should_remove_suid(struct dentry *dentry) -{ - umode_t mode = dentry->d_inode->i_mode; - int kill = 0; - - /* suid always must be killed */ - if (unlikely(mode & S_ISUID)) - kill = ATTR_KILL_SUID; - - /* - * sgid without any exec bits is just a mandatory locking mark; leave - * it alone. If some exec bits are set, it's a real sgid; kill it. - */ - if (unlikely((mode & S_ISGID) && (mode & S_IXGRP))) - kill |= ATTR_KILL_SGID; - - if (unlikely(kill && !capable(CAP_FSETID) && S_ISREG(mode))) - return kill; - - return 0; -} -EXPORT_SYMBOL(should_remove_suid); - -static int __remove_suid(struct dentry *dentry, int kill) -{ - struct iattr newattrs; - - newattrs.ia_valid = ATTR_FORCE | kill; - return notify_change(dentry, &newattrs); -} - -int file_remove_suid(struct file *file) -{ - struct dentry *dentry = file->f_path.dentry; - struct inode *inode = dentry->d_inode; - int killsuid; - int killpriv; - int error = 0; - - /* Fast path for nothing security related */ - if (IS_NOSEC(inode)) - return 0; - - killsuid = should_remove_suid(dentry); - killpriv = security_inode_need_killpriv(dentry); - - if (killpriv < 0) - return killpriv; - if (killpriv) - error = security_inode_killpriv(dentry); - if (!error && killsuid) - error = __remove_suid(dentry, killsuid); - if (!error && (inode->i_sb->s_flags & MS_NOSEC)) - inode->i_flags |= S_NOSEC; - - return error; -} -EXPORT_SYMBOL(file_remove_suid); - static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) { -- cgit v0.10.2 From a0a9b0433767713342f9cc70f563483c38e26f30 Mon Sep 17 00:00:00 2001 From: Shai Fultheim Date: Tue, 15 May 2012 12:29:52 +0300 Subject: fs: Move bh_cachep to the __read_mostly section bh_cachep is only written to once on initialization, so move it to the __read_mostly section. Signed-off-by: Shai Fultheim Signed-off-by: Vlad Zolotarov Signed-off-by: Al Viro diff --git a/fs/buffer.c b/fs/buffer.c index ad5938c..838a9cf 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -3152,7 +3152,7 @@ SYSCALL_DEFINE2(bdflush, int, func, long, data) /* * Buffer-head allocation */ -static struct kmem_cache *bh_cachep; +static struct kmem_cache *bh_cachep __read_mostly; /* * Once the number of bh's in the machine exceeds this level, we start -- cgit v0.10.2 From 799243a389bde0de10fa21ca1ca453d2fe538b85 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Tue, 29 May 2012 11:02:21 -0700 Subject: vfs: increment iversion when a file is truncated When a file is truncated with truncate()/ftruncate() and then closed, iversion is not updated. This patch uses ATTR_SIZE flag as an indication to increment iversion. Mimi said: On fput(), i_version is used to detect and flag files that have changed and need to be re-measured in the IMA measurement policy. When a file is truncated with truncate()/ftruncate() and then closed, i_version is not updated. As a result, although the file has changed, it will not be re-measured and added to the IMA measurement list on subsequent access. Signed-off-by: Dmitry Kasatkin Acked-by: Mimi Zohar Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Al Viro diff --git a/fs/attr.c b/fs/attr.c index 584620e..0da9095 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -176,6 +176,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr) return -EPERM; } + if ((ia_valid & ATTR_SIZE) && IS_I_VERSION(inode)) { + if (attr->ia_size != inode->i_size) + inode_inc_iversion(inode); + } + if ((ia_valid & ATTR_MODE)) { umode_t amode = attr->ia_mode; /* Flag setting protected by i_mutex */ -- cgit v0.10.2 From a4f9a9a635e4d54ac93df4b861ed8792e17bd4a2 Mon Sep 17 00:00:00 2001 From: Naohiro Aota Date: Tue, 29 May 2012 11:02:24 -0700 Subject: fsnotify: handle subfiles' perm events Recently I'm working on fanotify and found the following strange behaviors. I wrote a program to set fanotify_mark on "/tmp/block" and FAN_DENY all events notified. fanotify_mask = FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD: $ cd /tmp/block; cat foo cat: foo: Operation not permitted Operation on the file is blocked as expected. But, fanotify_mask = FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD: $ cd /tmp/block; cat foo aaa It's not blocked anymore. This is confusing behavior. Also reading commit "fsnotify: call fsnotify_parent in perm events", it seems like fsnotify should handle subfiles' perm events as well as the other notify events. With this patch, regardless of FAN_ALL_EVENTS set or not: $ cd /tmp/block; cat foo cat: foo: Operation not permitted Operation on the file is now blocked properly. FS_OPEN_PERM and FS_ACCESS_PERM are not listed on FS_EVENTS_POSS_ON_CHILD. Due to fsnotify_inode_watches_children() check, if you only specify only these events as fsnotify_mask, you don't get subfiles' perm events notified. This patch add the events to FS_EVENTS_POSS_ON_CHILD to get them notified even if only these events are specified to fsnotify_mask. Signed-off-by: Naohiro Aota Cc: Eric Paris Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 91d0e0a3..63d966d 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -60,7 +60,7 @@ #define FS_EVENTS_POSS_ON_CHILD (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\ FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\ FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\ - FS_DELETE) + FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM) #define FS_MOVE (FS_MOVED_FROM | FS_MOVED_TO) -- cgit v0.10.2 From fd657170c039a918c0b46f51db8005317d4c83fa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 29 May 2012 11:02:24 -0700 Subject: fsnotify: remove unused parameter from send_to_group() We don't use "mnt" anymore in send_to_group() after 1968f5eed5 ("fanotify: use both marks when possible") was applied. Signed-off-by: Dan Carpenter Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Al Viro diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index ccb14d3..b39c5c1 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -123,7 +123,7 @@ int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) } EXPORT_SYMBOL_GPL(__fsnotify_parent); -static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, +static int send_to_group(struct inode *to_tell, struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, __u32 mask, void *data, @@ -168,10 +168,10 @@ static int send_to_group(struct inode *to_tell, struct vfsmount *mnt, vfsmount_test_mask &= ~inode_mark->ignored_mask; } - pr_debug("%s: group=%p to_tell=%p mnt=%p mask=%x inode_mark=%p" + pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p" " inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x" " data=%p data_is=%d cookie=%d event=%p\n", - __func__, group, to_tell, mnt, mask, inode_mark, + __func__, group, to_tell, mask, inode_mark, inode_test_mask, vfsmount_mark, vfsmount_test_mask, data, data_is, cookie, *event); @@ -258,16 +258,16 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, if (inode_group > vfsmount_group) { /* handle inode */ - ret = send_to_group(to_tell, NULL, inode_mark, NULL, mask, data, + ret = send_to_group(to_tell, inode_mark, NULL, mask, data, data_is, cookie, file_name, &event); /* we didn't use the vfsmount_mark */ vfsmount_group = NULL; } else if (vfsmount_group > inode_group) { - ret = send_to_group(to_tell, &mnt->mnt, NULL, vfsmount_mark, mask, data, + ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, data, data_is, cookie, file_name, &event); inode_group = NULL; } else { - ret = send_to_group(to_tell, &mnt->mnt, inode_mark, vfsmount_mark, + ret = send_to_group(to_tell, inode_mark, vfsmount_mark, mask, data, data_is, cookie, file_name, &event); } -- cgit v0.10.2 From 244ca2b4d0b5e500681e52ad9e6d7f3f2b9362a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 29 May 2012 21:24:36 -0400 Subject: i810: switch to vm_mmap() Weirdness around do_mmap() in there does not rely on ->mmap_sem for exclusion, so no need to keep it under that. As the result, we can turn that do_mmap() into vm_mmap(). Signed-off-by: Al Viro diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c index f920fb5..fa94391 100644 --- a/drivers/gpu/drm/i810/i810_dma.c +++ b/drivers/gpu/drm/i810/i810_dma.c @@ -130,11 +130,10 @@ static int i810_map_buffer(struct drm_buf *buf, struct drm_file *file_priv) return -EINVAL; /* This is all entirely broken */ - down_write(¤t->mm->mmap_sem); old_fops = file_priv->filp->f_op; file_priv->filp->f_op = &i810_buffer_fops; dev_priv->mmap_buffer = buf; - buf_priv->virtual = (void *)do_mmap(file_priv->filp, 0, buf->total, + buf_priv->virtual = (void *)vm_mmap(file_priv->filp, 0, buf->total, PROT_READ | PROT_WRITE, MAP_SHARED, buf->bus_address); dev_priv->mmap_buffer = NULL; @@ -145,7 +144,6 @@ static int i810_map_buffer(struct drm_buf *buf, struct drm_file *file_priv) retcode = PTR_ERR(buf_priv->virtual); buf_priv->virtual = NULL; } - up_write(¤t->mm->mmap_sem); return retcode; } -- cgit v0.10.2 From 7732a557b1342c6e6966efb5f07effcf99f56167 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 9 May 2012 17:18:05 -0400 Subject: vfs: stop d_splice_alias creating directory aliases A directory should never have more than one dentry pointing to it. But d_splice_alias() will add one if it finds a directory with an already-existing non-DISCONNECTED dentry. I can't find an obvious reproducer, but I also can't see what prevents d_splice_alias() from encountering such a case. It therefore seems safest to allow d_splice_alias to use any dentry it finds. (Prior to the removal of dentry_unhash() from vfs_rmdir(), around v3.0, this could cause an nfsd deadlock like this: - Somebody attempts to remove a non-empty directory. - The dentry_unhash() in vfs_rmdir() unhashes the dentry pointing to the non-empty directory. - ->rmdir() then fails with -ENOTEMPTY - Before the vfs_rmdir() caller reaches dput(), an nfsd process in rename looks up the directory by filehandle; at the end of that lookup, this dentry is found by d_alloc_anon(), and a reference is taken on it, preventing dput() from removing it. - A regular lookup of the directory calls d_splice_alias(), finds only an unhashed (not a DISCONNECTED) dentry, and insteads adds a new one, so the directory now has two dentries. - The nfsd process in rename, which was previously looking up the source directory of the rename, now looks up the target directory (which is the same), and gets the dentry newly created by the previous lookup. - The rename, seeing two different dentries, assumes this is a cross-directory rename and attempts to take the i_mutex on the directory twice. That reproducer no longer exists, but I don't think there was anything fundamentally incorrect about the vfs_rmdir() behavior there, so I think the real fault was here in d_splice_alias().) Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 4046904..3d4be6f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1650,9 +1650,8 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) if (inode && S_ISDIR(inode->i_mode)) { spin_lock(&inode->i_lock); - new = __d_find_alias(inode, 1); + new = __d_find_any_alias(inode); if (new) { - BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); spin_unlock(&inode->i_lock); security_d_instantiate(new, inode); d_move(new, dentry); -- cgit v0.10.2 From 3f50fff4dace23d3cfeb195d5cd4ee813cee68b7 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 9 May 2012 17:18:06 -0400 Subject: vfs: remove unused __d_splice_alias argument Nobody sets want_disconn any more. Reported-by: Peng Tao Signed-off-by: J. Bruce Fields Signed-off-by: Al Viro diff --git a/fs/dcache.c b/fs/dcache.c index 3d4be6f..85c9e2b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -683,8 +683,6 @@ EXPORT_SYMBOL(dget_parent); /** * d_find_alias - grab a hashed alias of inode * @inode: inode in question - * @want_discon: flag, used by d_splice_alias, to request - * that only a DISCONNECTED alias be returned. * * If inode has a hashed alias, or is a directory and has any alias, * acquire the reference to alias and return it. Otherwise return NULL. @@ -693,10 +691,9 @@ EXPORT_SYMBOL(dget_parent); * of a filesystem. * * If the inode has an IS_ROOT, DCACHE_DISCONNECTED alias, then prefer - * any other hashed alias over that one unless @want_discon is set, - * in which case only return an IS_ROOT, DCACHE_DISCONNECTED alias. + * any other hashed alias over that. */ -static struct dentry *__d_find_alias(struct inode *inode, int want_discon) +static struct dentry *__d_find_alias(struct inode *inode) { struct dentry *alias, *discon_alias; @@ -708,7 +705,7 @@ again: if (IS_ROOT(alias) && (alias->d_flags & DCACHE_DISCONNECTED)) { discon_alias = alias; - } else if (!want_discon) { + } else { __dget_dlock(alias); spin_unlock(&alias->d_lock); return alias; @@ -739,7 +736,7 @@ struct dentry *d_find_alias(struct inode *inode) if (!list_empty(&inode->i_dentry)) { spin_lock(&inode->i_lock); - de = __d_find_alias(inode, 0); + de = __d_find_alias(inode); spin_unlock(&inode->i_lock); } return de; @@ -2481,7 +2478,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) struct dentry *alias; /* Does an aliased dentry already exist? */ - alias = __d_find_alias(inode, 0); + alias = __d_find_alias(inode); if (alias) { actual = alias; write_seqlock(&rename_lock); -- cgit v0.10.2 From 46ce341b2f176c2611f12ac390adf862e932eb02 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 May 2012 11:39:13 +0100 Subject: pipe: return -ENOIOCTLCMD instead of -EINVAL on unknown ioctl command As described in commit 07d106d0a ("vfs: fix up ENOIOCTLCMD error handling"), drivers should return -ENOIOCTLCMD if they receive an ioctl command which they don't understand. Doing so will result in -ENOTTY being returned to userspace, which matches the behaviour of the compat layer if it fails to translate an ioctl command. This patch fixes the pipe ioctl to return -ENOIOCTLCMD instead of -EINVAL when passed an unknown ioctl command. Cc: Al Viro Cc: Andrew Morton Signed-off-by: Will Deacon Signed-off-by: Al Viro diff --git a/fs/pipe.c b/fs/pipe.c index fec5e4a..95ebb56 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -693,7 +693,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return put_user(count, (int __user *)arg); default: - return -EINVAL; + return -ENOIOCTLCMD; } } -- cgit v0.10.2 From 63d37a84ab6004c235314ffd7a76c5eb28c2fae0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 29 May 2012 22:03:48 -0400 Subject: vfs: umount_tree() might be called on subtree that had never made it __mnt_make_shortterm() in there undoes the effect of __mnt_make_longterm() we'd done back when we set ->mnt_ns non-NULL; it should not be done to vfsmounts that had never gone through commit_tree() and friends. Kudos to lczerner for catching that one... Cc: stable@vger.kernel.org Signed-off-by: Al Viro diff --git a/fs/namespace.c b/fs/namespace.c index 224aff1..1e4a5fe 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1074,8 +1074,9 @@ void umount_tree(struct mount *mnt, int propagate, struct list_head *kill) list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); + if (p->mnt_ns) + __mnt_make_shortterm(p); p->mnt_ns = NULL; - __mnt_make_shortterm(p); list_del_init(&p->mnt_child); if (mnt_has_parent(p)) { p->mnt_parent->mnt_ghosts++; -- cgit v0.10.2 From 5a5e4c2eca0307deeb438c97dbdc608663515c0a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 01:49:38 -0400 Subject: binfmt_elf: switch elf_map() to vm_mmap/vm_munmap No reason to hold ->mmap_sem over the sequence Signed-off-by: Al Viro diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index e658dd1..1b52956 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -329,7 +329,6 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, if (!size) return addr; - down_write(¤t->mm->mmap_sem); /* * total_size is the size of the ELF (interpreter) image. * The _first_ mmap needs to know the full size, otherwise @@ -340,13 +339,12 @@ static unsigned long elf_map(struct file *filep, unsigned long addr, */ if (total_size) { total_size = ELF_PAGEALIGN(total_size); - map_addr = do_mmap(filep, addr, total_size, prot, type, off); + map_addr = vm_mmap(filep, addr, total_size, prot, type, off); if (!BAD_ADDR(map_addr)) - do_munmap(current->mm, map_addr+size, total_size-size); + vm_munmap(map_addr+size, total_size-size); } else - map_addr = do_mmap(filep, addr, size, prot, type, off); + map_addr = vm_mmap(filep, addr, size, prot, type, off); - up_write(¤t->mm->mmap_sem); return(map_addr); } -- cgit v0.10.2 From 7696e0c37f43187431388df7d8087a099b3e2f1c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 01:56:23 -0400 Subject: binfmt_flat: use vm_munmap, we are missing ->mmap_sem there Signed-off-by: Al Viro diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 6b2daf9..178cb70 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -562,7 +562,7 @@ static int load_flat_file(struct linux_binprm * bprm, realdatastart = (unsigned long) -ENOMEM; printk("Unable to allocate RAM for process data, errno %d\n", (int)-realdatastart); - do_munmap(current->mm, textpos, text_len); + vm_munmap(textpos, text_len); ret = realdatastart; goto err; } @@ -586,8 +586,8 @@ static int load_flat_file(struct linux_binprm * bprm, } if (IS_ERR_VALUE(result)) { printk("Unable to read data+bss, errno %d\n", (int)-result); - do_munmap(current->mm, textpos, text_len); - do_munmap(current->mm, realdatastart, len); + vm_munmap(textpos, text_len); + vm_munmap(realdatastart, len); ret = result; goto err; } @@ -654,7 +654,7 @@ static int load_flat_file(struct linux_binprm * bprm, } if (IS_ERR_VALUE(result)) { printk("Unable to read code+data+bss, errno %d\n",(int)-result); - do_munmap(current->mm, textpos, text_len + data_len + extra + + vm_munmap(textpos, text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long)); ret = result; goto err; -- cgit v0.10.2 From 657bec850ff98b82b3a617b588a6523487344ccc Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 02:12:40 -0400 Subject: ia64, sparc64: convert wrappers around do_mremap() to sys_mremap() they contain open-coded sys_mremap() Signed-off-by: Al Viro diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index 609d500..d9439ef 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -171,22 +171,9 @@ asmlinkage unsigned long ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, unsigned long new_addr) { - extern unsigned long do_mremap (unsigned long addr, - unsigned long old_len, - unsigned long new_len, - unsigned long flags, - unsigned long new_addr); - - down_write(¤t->mm->mmap_sem); - { - addr = do_mremap(addr, old_len, new_len, flags, new_addr); - } - up_write(¤t->mm->mmap_sem); - - if (IS_ERR((void *) addr)) - return addr; - - force_successful_syscall_return(); + addr = sys_mremap(addr, old_len, new_len, flags, new_addr); + if (!IS_ERR((void *) addr)) + force_successful_syscall_return(); return addr; } diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 3ee51f1..275f74f 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -580,16 +580,9 @@ SYSCALL_DEFINE5(64_mremap, unsigned long, addr, unsigned long, old_len, unsigned long, new_len, unsigned long, flags, unsigned long, new_addr) { - unsigned long ret = -EINVAL; - if (test_thread_flag(TIF_32BIT)) - goto out; - - down_write(¤t->mm->mmap_sem); - ret = do_mremap(addr, old_len, new_len, flags, new_addr); - up_write(¤t->mm->mmap_sem); -out: - return ret; + return -EINVAL; + return sys_mremap(addr, old_len, new_len, flags, new_addr); } /* we come to here via sys_nis_syscall so it can setup the regs argument */ -- cgit v0.10.2 From 63a81db13205f21047a0799adfd3a2419f190577 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 11:32:04 -0400 Subject: merge do_mremap() into sys_mremap() Signed-off-by: Al Viro diff --git a/mm/mremap.c b/mm/mremap.c index db8d983..169c53b 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -432,15 +432,17 @@ static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) * MREMAP_FIXED option added 5-Dec-1999 by Benjamin LaHaise * This option implies MREMAP_MAYMOVE. */ -unsigned long do_mremap(unsigned long addr, - unsigned long old_len, unsigned long new_len, - unsigned long flags, unsigned long new_addr) +SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, + unsigned long, new_len, unsigned long, flags, + unsigned long, new_addr) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; unsigned long ret = -EINVAL; unsigned long charged = 0; + down_write(¤t->mm->mmap_sem); + if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) goto out; @@ -538,17 +540,6 @@ unsigned long do_mremap(unsigned long addr, out: if (ret & ~PAGE_MASK) vm_unacct_memory(charged); - return ret; -} - -SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, - unsigned long, new_len, unsigned long, flags, - unsigned long, new_addr) -{ - unsigned long ret; - - down_write(¤t->mm->mmap_sem); - ret = do_mremap(addr, old_len, new_len, flags, new_addr); up_write(¤t->mm->mmap_sem); return ret; } -- cgit v0.10.2 From 4ad310b836d5c61ac6e9b5fd7db12d0cd57136d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 11:55:49 -0400 Subject: ia64 perfmon: fix get_unmapped_area() use there get_unmapped_area() returns -E... on failure, not 0. Moreover, the wrapper around it is completely pointless. Signed-off-by: Al Viro diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index f00ba02..d7f558c 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -604,12 +604,6 @@ pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) spin_unlock(&(x)->ctx_lock); } -static inline unsigned long -pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec) -{ - return get_unmapped_area(file, addr, len, pgoff, flags); -} - /* forward declaration */ static const struct dentry_operations pfmfs_dentry_operations; @@ -2333,8 +2327,8 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t down_write(&task->mm->mmap_sem); /* find some free area in address space, must have mmap sem held */ - vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0); - if (vma->vm_start == 0UL) { + vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS); + if (IS_ERR_VALUE(vma->vm_start)) { DPRINT(("Cannot find unmapped area for size %ld\n", size)); up_write(&task->mm->mmap_sem); goto error; -- cgit v0.10.2 From cf74d14c4fbce9bcc9eb62f52d721d3399a2b87f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 12:09:53 -0400 Subject: unexport do_mmap() Signed-off-by: Al Viro diff --git a/mm/mmap.c b/mm/mmap.c index e8dcfc7..83c5662 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1118,7 +1118,6 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return -EINVAL; return do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); } -EXPORT_SYMBOL(do_mmap); unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, diff --git a/mm/nommu.c b/mm/nommu.c index bb8f4f0..de6084e 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1481,7 +1481,6 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return -EINVAL; return do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); } -EXPORT_SYMBOL(do_mmap); unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, -- cgit v0.10.2 From d007794a182bc072a7b7479909dbd0d67ba341be Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 13:11:37 -0400 Subject: split cap_mmap_addr() out of cap_file_mmap() ... switch callers. Signed-off-by: Al Viro diff --git a/include/linux/security.h b/include/linux/security.h index ab0e091..4ad59c9 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -86,6 +86,7 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name, extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); +extern int cap_mmap_addr(unsigned long addr); extern int cap_file_mmap(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags, unsigned long addr, unsigned long addr_only); @@ -2187,7 +2188,7 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { - return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + return cap_mmap_addr(addr); } static inline int security_file_mprotect(struct vm_area_struct *vma, diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 032daab..8430d89 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -497,7 +497,7 @@ static int apparmor_file_mmap(struct file *file, unsigned long reqprot, int rc = 0; /* do DAC check */ - rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + rc = cap_mmap_addr(addr); if (rc || addr_only) return rc; diff --git a/security/commoncap.c b/security/commoncap.c index e771cb1..ebac361 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -958,22 +958,15 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages) } /* - * cap_file_mmap - check if able to map given addr - * @file: unused - * @reqprot: unused - * @prot: unused - * @flags: unused + * cap_mmap_addr - check if able to map given addr * @addr: address attempting to be mapped - * @addr_only: unused * * If the process is attempting to map memory below dac_mmap_min_addr they need * CAP_SYS_RAWIO. The other parameters to this function are unused by the * capability security module. Returns 0 if this mapping should be allowed * -EPERM if not. */ -int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +int cap_mmap_addr(unsigned long addr) { int ret = 0; @@ -986,3 +979,24 @@ int cap_file_mmap(struct file *file, unsigned long reqprot, } return ret; } + +/* + * cap_file_mmap - check if able to map given addr + * @file: unused + * @reqprot: unused + * @prot: unused + * @flags: unused + * @addr: address attempting to be mapped + * @addr_only: unused + * + * If the process is attempting to map memory below dac_mmap_min_addr they need + * CAP_SYS_RAWIO. The other parameters to this function are unused by the + * capability security module. Returns 0 if this mapping should be allowed + * -EPERM if not. + */ +int cap_file_mmap(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags, + unsigned long addr, unsigned long addr_only) +{ + return cap_mmap_addr(addr); +} diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index fa2341b..25c125e 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3104,7 +3104,7 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, } /* do DAC check on address space usage */ - rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + rc = cap_mmap_addr(addr); if (rc || addr_only) return rc; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index d583c05..a621977 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1199,7 +1199,7 @@ static int smack_file_mmap(struct file *file, int rc; /* do DAC check on address space usage */ - rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only); + rc = cap_mmap_addr(addr); if (rc || addr_only) return rc; -- cgit v0.10.2 From e5467859f7f79b69fc49004403009dfdba3bec53 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 13:30:51 -0400 Subject: split ->file_mmap() into ->mmap_addr()/->mmap_file() ... i.e. file-dependent and address-dependent checks. Signed-off-by: Al Viro diff --git a/fs/exec.c b/fs/exec.c index 52c9e2f..a79786a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -280,10 +280,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm) vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); INIT_LIST_HEAD(&vma->anon_vma_chain); - err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); - if (err) - goto err; - err = insert_vm_struct(mm, vma); if (err) goto err; diff --git a/include/linux/security.h b/include/linux/security.h index 4ad59c9..f1bae09 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -87,9 +87,8 @@ extern int cap_inode_removexattr(struct dentry *dentry, const char *name); extern int cap_inode_need_killpriv(struct dentry *dentry); extern int cap_inode_killpriv(struct dentry *dentry); extern int cap_mmap_addr(unsigned long addr); -extern int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only); +extern int cap_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags); extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5); @@ -587,15 +586,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * simple integer value. When @arg represents a user space pointer, it * should never be used by the security module. * Return 0 if permission is granted. - * @file_mmap : + * @mmap_addr : + * Check permissions for a mmap operation at @addr. + * @addr contains virtual address that will be used for the operation. + * Return 0 if permission is granted. + * @mmap_file : * Check permissions for a mmap operation. The @file may be NULL, e.g. * if mapping anonymous memory. * @file contains the file structure for file to map (may be NULL). * @reqprot contains the protection requested by the application. * @prot contains the protection that will be applied by the kernel. * @flags contains the operational flags. - * @addr contains virtual address that will be used for the operation. - * @addr_only contains a boolean: 0 if file-backed VMA, otherwise 1. * Return 0 if permission is granted. * @file_mprotect: * Check permissions before changing memory access permissions. @@ -1482,10 +1483,10 @@ struct security_operations { void (*file_free_security) (struct file *file); int (*file_ioctl) (struct file *file, unsigned int cmd, unsigned long arg); - int (*file_mmap) (struct file *file, + int (*mmap_addr) (unsigned long addr); + int (*mmap_file) (struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, unsigned long addr, - unsigned long addr_only); + unsigned long flags); int (*file_mprotect) (struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); @@ -1744,9 +1745,9 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int security_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only); +int security_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags); +int security_mmap_addr(unsigned long addr); int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); int security_file_lock(struct file *file, unsigned int cmd); @@ -2182,11 +2183,14 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } -static inline int security_file_mmap(struct file *file, unsigned long reqprot, +static inline int security_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, - unsigned long addr, - unsigned long addr_only) + unsigned long flags) +{ + return 0; +} + +static inline int security_mmap_addr(unsigned long addr) { return cap_mmap_addr(addr); } diff --git a/mm/mmap.c b/mm/mmap.c index 83c5662..49283da 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1101,7 +1101,11 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } } - error = security_file_mmap(file, reqprot, prot, flags, addr, 0); + error = security_mmap_addr(addr); + if (error) + return error; + + error = security_mmap_file(file, reqprot, prot, flags); if (error) return error; @@ -1817,7 +1821,7 @@ int expand_downwards(struct vm_area_struct *vma, return -ENOMEM; address &= PAGE_MASK; - error = security_file_mmap(NULL, 0, 0, 0, address, 1); + error = security_mmap_addr(address); if (error) return error; @@ -2205,7 +2209,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len) if (!len) return addr; - error = security_file_mmap(NULL, 0, 0, 0, addr, 1); + error = security_mmap_addr(addr); if (error) return error; @@ -2561,7 +2565,7 @@ int install_special_mapping(struct mm_struct *mm, vma->vm_ops = &special_mapping_vmops; vma->vm_private_data = pages; - ret = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); + ret = security_mmap_addr(vma->vm_start); if (ret) goto out; diff --git a/mm/mremap.c b/mm/mremap.c index 169c53b..ebf1089 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -371,7 +371,7 @@ static unsigned long mremap_to(unsigned long addr, if ((addr <= new_addr) && (addr+old_len) > new_addr) goto out; - ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); + ret = security_mmap_addr(new_addr); if (ret) goto out; @@ -532,7 +532,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, goto out; } - ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); + ret = security_mmap_addr(new_addr); if (ret) goto out; ret = move_vma(vma, addr, old_len, new_len, new_addr); diff --git a/mm/nommu.c b/mm/nommu.c index de6084e..acfe419 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1047,7 +1047,10 @@ static int validate_mmap_request(struct file *file, } /* allow the security API to have its say */ - ret = security_file_mmap(file, reqprot, prot, flags, addr, 0); + ret = security_mmap_addr(addr); + if (ret < 0) + return ret; + ret = security_mmap_file(file, reqprot, prot, flags); if (ret < 0) return ret; diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 8430d89..8ea39aa 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -490,17 +490,9 @@ static int common_mmap(int op, struct file *file, unsigned long prot, return common_file_perm(op, file, mask); } -static int apparmor_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +static int apparmor_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { - int rc = 0; - - /* do DAC check */ - rc = cap_mmap_addr(addr); - if (rc || addr_only) - return rc; - return common_mmap(OP_FMMAP, file, prot, flags); } @@ -646,7 +638,8 @@ static struct security_operations apparmor_ops = { .file_permission = apparmor_file_permission, .file_alloc_security = apparmor_file_alloc_security, .file_free_security = apparmor_file_free_security, - .file_mmap = apparmor_file_mmap, + .mmap_file = apparmor_mmap_file, + .mmap_addr = cap_mmap_addr, .file_mprotect = apparmor_file_mprotect, .file_lock = apparmor_file_lock, diff --git a/security/capability.c b/security/capability.c index fca8896..61095df 100644 --- a/security/capability.c +++ b/security/capability.c @@ -949,7 +949,8 @@ void __init security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, file_alloc_security); set_to_cap_if_null(ops, file_free_security); set_to_cap_if_null(ops, file_ioctl); - set_to_cap_if_null(ops, file_mmap); + set_to_cap_if_null(ops, mmap_addr); + set_to_cap_if_null(ops, mmap_file); set_to_cap_if_null(ops, file_mprotect); set_to_cap_if_null(ops, file_lock); set_to_cap_if_null(ops, file_fcntl); diff --git a/security/commoncap.c b/security/commoncap.c index ebac361..6dbae46 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -980,23 +980,8 @@ int cap_mmap_addr(unsigned long addr) return ret; } -/* - * cap_file_mmap - check if able to map given addr - * @file: unused - * @reqprot: unused - * @prot: unused - * @flags: unused - * @addr: address attempting to be mapped - * @addr_only: unused - * - * If the process is attempting to map memory below dac_mmap_min_addr they need - * CAP_SYS_RAWIO. The other parameters to this function are unused by the - * capability security module. Returns 0 if this mapping should be allowed - * -EPERM if not. - */ -int cap_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +int cap_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { - return cap_mmap_addr(addr); + return 0; } diff --git a/security/security.c b/security/security.c index 5497a57..d91c66d 100644 --- a/security/security.c +++ b/security/security.c @@ -657,18 +657,22 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return security_ops->file_ioctl(file, cmd, arg); } -int security_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +int security_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) { int ret; - ret = security_ops->file_mmap(file, reqprot, prot, flags, addr, addr_only); + ret = security_ops->mmap_file(file, reqprot, prot, flags); if (ret) return ret; return ima_file_mmap(file, prot); } +int security_mmap_addr(unsigned long addr) +{ + return security_ops->mmap_addr(addr); +} + int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) { diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 25c125e..372ec65 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3083,9 +3083,7 @@ error: return rc; } -static int selinux_file_mmap(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags, - unsigned long addr, unsigned long addr_only) +static int selinux_mmap_addr(unsigned long addr) { int rc = 0; u32 sid = current_sid(); @@ -3104,10 +3102,12 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, } /* do DAC check on address space usage */ - rc = cap_mmap_addr(addr); - if (rc || addr_only) - return rc; + return cap_mmap_addr(addr); +} +static int selinux_mmap_file(struct file *file, unsigned long reqprot, + unsigned long prot, unsigned long flags) +{ if (selinux_checkreqprot) prot = reqprot; @@ -5570,7 +5570,8 @@ static struct security_operations selinux_ops = { .file_alloc_security = selinux_file_alloc_security, .file_free_security = selinux_file_free_security, .file_ioctl = selinux_file_ioctl, - .file_mmap = selinux_file_mmap, + .mmap_file = selinux_mmap_file, + .mmap_addr = selinux_mmap_addr, .file_mprotect = selinux_file_mprotect, .file_lock = selinux_file_lock, .file_fcntl = selinux_file_fcntl, diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index a621977..ee0bb57 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -1171,7 +1171,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, } /** - * smack_file_mmap : + * smack_mmap_file : * Check permissions for a mmap operation. The @file may be NULL, e.g. * if mapping anonymous memory. * @file contains the file structure for file to map (may be NULL). @@ -1180,10 +1180,9 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, * @flags contains the operational flags. * Return 0 if permission is granted. */ -static int smack_file_mmap(struct file *file, +static int smack_mmap_file(struct file *file, unsigned long reqprot, unsigned long prot, - unsigned long flags, unsigned long addr, - unsigned long addr_only) + unsigned long flags) { struct smack_known *skp; struct smack_rule *srp; @@ -1198,11 +1197,6 @@ static int smack_file_mmap(struct file *file, int tmay; int rc; - /* do DAC check on address space usage */ - rc = cap_mmap_addr(addr); - if (rc || addr_only) - return rc; - if (file == NULL || file->f_dentry == NULL) return 0; @@ -3482,7 +3476,8 @@ struct security_operations smack_ops = { .file_ioctl = smack_file_ioctl, .file_lock = smack_file_lock, .file_fcntl = smack_file_fcntl, - .file_mmap = smack_file_mmap, + .mmap_file = smack_mmap_file, + .mmap_addr = cap_mmap_addr, .file_set_fowner = smack_file_set_fowner, .file_send_sigiotask = smack_file_send_sigiotask, .file_receive = smack_file_receive, -- cgit v0.10.2 From 8b3ec6814c83d76b85bd13badc48552836c24839 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 17:11:23 -0400 Subject: take security_mmap_file() outside of ->mmap_sem Signed-off-by: Al Viro diff --git a/include/linux/security.h b/include/linux/security.h index f1bae09..4e5a73c 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1745,8 +1745,8 @@ int security_file_permission(struct file *file, int mask); int security_file_alloc(struct file *file); void security_file_free(struct file *file); int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg); -int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags); +int security_mmap_file(struct file *file, unsigned long prot, + unsigned long flags); int security_mmap_addr(unsigned long addr); int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot); @@ -2183,8 +2183,7 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd, return 0; } -static inline int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, +static inline int security_mmap_file(struct file *file, unsigned long prot, unsigned long flags) { return 0; diff --git a/ipc/shm.c b/ipc/shm.c index 406c5b2..e3a8063 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1036,6 +1036,10 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) sfd->file = shp->shm_file; sfd->vm_ops = NULL; + err = security_mmap_file(file, prot, flags); + if (err) + goto out_fput; + down_write(¤t->mm->mmap_sem); if (addr && !(shmflg & SHM_REMAP)) { err = -EINVAL; @@ -1058,6 +1062,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) invalid: up_write(¤t->mm->mmap_sem); +out_fput: fput(file); out_nattch: diff --git a/mm/mmap.c b/mm/mmap.c index 49283da..34b280f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -979,7 +979,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, struct inode *inode; vm_flags_t vm_flags; int error; - unsigned long reqprot = prot; /* * Does the application expect PROT_READ to imply PROT_EXEC? @@ -1105,10 +1104,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, if (error) return error; - error = security_mmap_file(file, reqprot, prot, flags); - if (error) - return error; - return mmap_region(file, addr, len, flags, vm_flags, pgoff); } @@ -1130,9 +1125,12 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long ret; struct mm_struct *mm = current->mm; - down_write(&mm->mmap_sem); - ret = do_mmap(file, addr, len, prot, flag, offset); - up_write(&mm->mmap_sem); + ret = security_mmap_file(file, prot, flag); + if (!ret) { + down_write(&mm->mmap_sem); + ret = do_mmap(file, addr, len, prot, flag, offset); + up_write(&mm->mmap_sem); + } return ret; } EXPORT_SYMBOL(vm_mmap); @@ -1168,9 +1166,12 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + retval = security_mmap_file(file, prot, flags); + if (!retval) { + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + } if (file) fput(file); diff --git a/mm/nommu.c b/mm/nommu.c index acfe419..8cbfd62 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -889,7 +889,6 @@ static int validate_mmap_request(struct file *file, unsigned long *_capabilities) { unsigned long capabilities, rlen; - unsigned long reqprot = prot; int ret; /* do the simple checks first */ @@ -1050,9 +1049,6 @@ static int validate_mmap_request(struct file *file, ret = security_mmap_addr(addr); if (ret < 0) return ret; - ret = security_mmap_file(file, reqprot, prot, flags); - if (ret < 0) - return ret; /* looks okay */ *_capabilities = capabilities; @@ -1492,9 +1488,12 @@ unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long ret; struct mm_struct *mm = current->mm; - down_write(&mm->mmap_sem); - ret = do_mmap(file, addr, len, prot, flag, offset); - up_write(&mm->mmap_sem); + ret = security_mmap_file(file, prot, flag); + if (!ret) { + down_write(&mm->mmap_sem); + ret = do_mmap(file, addr, len, prot, flag, offset); + up_write(&mm->mmap_sem); + } return ret; } EXPORT_SYMBOL(vm_mmap); @@ -1515,9 +1514,12 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + ret = security_mmap_file(file, prot, flags); + if (!ret) { + down_write(¤t->mm->mmap_sem); + retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + } if (file) fput(file); diff --git a/security/security.c b/security/security.c index d91c66d..3b11b3b 100644 --- a/security/security.c +++ b/security/security.c @@ -20,6 +20,9 @@ #include #include #include +#include +#include +#include #include #define MAX_LSM_EVM_XATTR 2 @@ -657,11 +660,35 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return security_ops->file_ioctl(file, cmd, arg); } -int security_mmap_file(struct file *file, unsigned long reqprot, - unsigned long prot, unsigned long flags) +int security_mmap_file(struct file *file, unsigned long prot, + unsigned long flags) { + unsigned long reqprot = prot; int ret; - + /* + * Does the application expect PROT_READ to imply PROT_EXEC? + * + * (the exception is when the underlying filesystem is noexec + * mounted, in which case we dont add PROT_EXEC.) + */ + if (!(reqprot & PROT_READ)) + goto out; + if (!(current->personality & READ_IMPLIES_EXEC)) + goto out; + if (!file) { + prot |= PROT_EXEC; + } else if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) { +#ifndef CONFIG_MMU + unsigned long caps = 0; + struct address_space *mapping = file->f_mapping; + if (mapping && mapping->backing_dev_info) + caps = mapping->backing_dev_info->capabilities; + if (!(caps & BDI_CAP_EXEC_MAP)) + goto out; +#endif + prot |= PROT_EXEC; + } +out: ret = security_ops->mmap_file(file, reqprot, prot, flags); if (ret) return ret; -- cgit v0.10.2 From 9ac4ed4bd0adec75db13a4b08a39a3918ec0e3c9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 17:13:15 -0400 Subject: move security_mmap_addr() to saner place it really should be done by get_unmapped_area(); that cuts down on the amount of callers considerably and it's the right place for that stuff anyway. Signed-off-by: Al Viro diff --git a/mm/mmap.c b/mm/mmap.c index 34b280f..131521e 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -978,7 +978,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, struct mm_struct * mm = current->mm; struct inode *inode; vm_flags_t vm_flags; - int error; /* * Does the application expect PROT_READ to imply PROT_EXEC? @@ -1100,10 +1099,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, } } - error = security_mmap_addr(addr); - if (error) - return error; - return mmap_region(file, addr, len, flags, vm_flags, pgoff); } @@ -1633,7 +1628,9 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, if (addr & ~PAGE_MASK) return -EINVAL; - return arch_rebalance_pgtables(addr, len); + addr = arch_rebalance_pgtables(addr, len); + error = security_mmap_addr(addr); + return error ? error : addr; } EXPORT_SYMBOL(get_unmapped_area); @@ -2210,10 +2207,6 @@ static unsigned long do_brk(unsigned long addr, unsigned long len) if (!len) return addr; - error = security_mmap_addr(addr); - if (error) - return error; - flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); @@ -2566,10 +2559,6 @@ int install_special_mapping(struct mm_struct *mm, vma->vm_ops = &special_mapping_vmops; vma->vm_private_data = pages; - ret = security_mmap_addr(vma->vm_start); - if (ret) - goto out; - ret = insert_vm_struct(mm, vma); if (ret) goto out; diff --git a/mm/mremap.c b/mm/mremap.c index ebf1089..21fed20 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -371,10 +371,6 @@ static unsigned long mremap_to(unsigned long addr, if ((addr <= new_addr) && (addr+old_len) > new_addr) goto out; - ret = security_mmap_addr(new_addr); - if (ret) - goto out; - ret = do_munmap(mm, new_addr, new_len); if (ret) goto out; @@ -532,9 +528,6 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, goto out; } - ret = security_mmap_addr(new_addr); - if (ret) - goto out; ret = move_vma(vma, addr, old_len, new_len, new_addr); } out: -- cgit v0.10.2 From 98de59bfe4b2ff6344d9ad8e5296f80de5dcc5b6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 19:58:30 -0400 Subject: take calculation of final prot in security_mmap_file() into a helper Signed-off-by: Al Viro diff --git a/security/security.c b/security/security.c index 3b11b3b..3efc9b1 100644 --- a/security/security.c +++ b/security/security.c @@ -660,36 +660,46 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return security_ops->file_ioctl(file, cmd, arg); } -int security_mmap_file(struct file *file, unsigned long prot, - unsigned long flags) +static inline unsigned long mmap_prot(struct file *file, unsigned long prot) { - unsigned long reqprot = prot; - int ret; /* - * Does the application expect PROT_READ to imply PROT_EXEC? - * - * (the exception is when the underlying filesystem is noexec - * mounted, in which case we dont add PROT_EXEC.) + * Does we have PROT_READ and does the application expect + * it to imply PROT_EXEC? If not, nothing to talk about... */ - if (!(reqprot & PROT_READ)) - goto out; + if ((prot & (PROT_READ | PROT_EXEC)) != PROT_READ) + return prot; if (!(current->personality & READ_IMPLIES_EXEC)) - goto out; - if (!file) { - prot |= PROT_EXEC; - } else if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) { + return prot; + /* + * if that's an anonymous mapping, let it. + */ + if (!file) + return prot | PROT_EXEC; + /* + * ditto if it's not on noexec mount, except that on !MMU we need + * BDI_CAP_EXEC_MMAP (== VM_MAYEXEC) in this case + */ + if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) { #ifndef CONFIG_MMU unsigned long caps = 0; struct address_space *mapping = file->f_mapping; if (mapping && mapping->backing_dev_info) caps = mapping->backing_dev_info->capabilities; if (!(caps & BDI_CAP_EXEC_MAP)) - goto out; + return prot; #endif - prot |= PROT_EXEC; + return prot | PROT_EXEC; } -out: - ret = security_ops->mmap_file(file, reqprot, prot, flags); + /* anything on noexec mount won't get PROT_EXEC */ + return prot; +} + +int security_mmap_file(struct file *file, unsigned long prot, + unsigned long flags) +{ + int ret; + ret = security_ops->mmap_file(file, prot, + mmap_prot(file, prot), flags); if (ret) return ret; return ima_file_mmap(file, prot); -- cgit v0.10.2 From e3fc629d7bb70848fbf479688a66d4e76dff46ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 20:08:42 -0400 Subject: switch aio and shm to do_mmap_pgoff(), make do_mmap() static after all, 0 bytes and 0 pages is the same thing... Signed-off-by: Al Viro diff --git a/fs/aio.c b/fs/aio.c index e7f2fad..07154d9 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -134,9 +134,9 @@ static int aio_setup_ring(struct kioctx *ctx) info->mmap_size = nr_pages * PAGE_SIZE; dprintk("attempting mmap of %lu bytes\n", info->mmap_size); down_write(&ctx->mm->mmap_sem); - info->mmap_base = do_mmap(NULL, 0, info->mmap_size, - PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, - 0); + info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, + PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, 0); if (IS_ERR((void *)info->mmap_base)) { up_write(&ctx->mm->mmap_sem); info->mmap_size = 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index 7d5c37f..4189e0d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1394,7 +1394,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff); -extern unsigned long do_mmap(struct file *, unsigned long, +extern unsigned long do_mmap_pgoff(struct file *, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long); extern int do_munmap(struct mm_struct *, unsigned long, size_t); diff --git a/ipc/shm.c b/ipc/shm.c index e3a8063..5e2cbfd 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1054,7 +1054,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) goto invalid; } - user_addr = do_mmap (file, addr, size, prot, flags, 0); + user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0); *raddr = user_addr; err = 0; if (IS_ERR_VALUE(user_addr)) diff --git a/mm/mmap.c b/mm/mmap.c index 131521e..f778654 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -971,7 +971,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint) * The caller must hold down_write(¤t->mm->mmap_sem). */ -static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, +unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long pgoff) { @@ -1102,7 +1102,7 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, return mmap_region(file, addr, len, flags, vm_flags, pgoff); } -unsigned long do_mmap(struct file *file, unsigned long addr, +static unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { diff --git a/mm/nommu.c b/mm/nommu.c index 8cbfd62..a1792ed 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1232,7 +1232,7 @@ enomem: /* * handle mapping creation for uClinux */ -static unsigned long do_mmap_pgoff(struct file *file, +unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, @@ -1470,7 +1470,7 @@ error_getting_region: return -ENOMEM; } -unsigned long do_mmap(struct file *file, unsigned long addr, +static unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { -- cgit v0.10.2 From dc982501d9643ab0c117e7d87562857ce234652d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 20:11:57 -0400 Subject: kill do_mmap() completely just pull into vm_mmap() Signed-off-by: Al Viro diff --git a/mm/mmap.c b/mm/mmap.c index f778654..538c905 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1102,28 +1102,22 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, return mmap_region(file, addr, len, flags, vm_flags, pgoff); } -static unsigned long do_mmap(struct file *file, unsigned long addr, +unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { + unsigned long ret; + struct mm_struct *mm = current->mm; + if (unlikely(offset + PAGE_ALIGN(len) < offset)) return -EINVAL; if (unlikely(offset & ~PAGE_MASK)) return -EINVAL; - return do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); -} - -unsigned long vm_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long offset) -{ - unsigned long ret; - struct mm_struct *mm = current->mm; ret = security_mmap_file(file, prot, flag); if (!ret) { down_write(&mm->mmap_sem); - ret = do_mmap(file, addr, len, prot, flag, offset); + ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); up_write(&mm->mmap_sem); } return ret; diff --git a/mm/nommu.c b/mm/nommu.c index a1792ed..e6123a5 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1470,28 +1470,22 @@ error_getting_region: return -ENOMEM; } -static unsigned long do_mmap(struct file *file, unsigned long addr, +unsigned long vm_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flag, unsigned long offset) { + unsigned long ret; + struct mm_struct *mm = current->mm; + if (unlikely(offset + PAGE_ALIGN(len) < offset)) return -EINVAL; if (unlikely(offset & ~PAGE_MASK)) return -EINVAL; - return do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); -} - -unsigned long vm_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long offset) -{ - unsigned long ret; - struct mm_struct *mm = current->mm; ret = security_mmap_file(file, prot, flag); if (!ret) { down_write(&mm->mmap_sem); - ret = do_mmap(file, addr, len, prot, flag, offset); + ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); up_write(&mm->mmap_sem); } return ret; -- cgit v0.10.2 From eb36c5873b96e8c7376768d3906da74aae6e3839 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 20:17:35 -0400 Subject: new helper: vm_mmap_pgoff() take it to mm/util.c, convert vm_mmap() to use of that one and take it to mm/util.c as well, convert both sys_mmap_pgoff() to use of vm_mmap_pgoff() Signed-off-by: Al Viro diff --git a/mm/internal.h b/mm/internal.h index aee4761..5307a02 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -342,3 +342,7 @@ extern u64 hwpoison_filter_flags_mask; extern u64 hwpoison_filter_flags_value; extern u64 hwpoison_filter_memcg; extern u32 hwpoison_filter_enable; + +extern unsigned long vm_mmap_pgoff(struct file *, unsigned long, + unsigned long, unsigned long, + unsigned long, unsigned long); diff --git a/mm/mmap.c b/mm/mmap.c index 538c905..98ef8a6 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1102,28 +1102,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, return mmap_region(file, addr, len, flags, vm_flags, pgoff); } -unsigned long vm_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long offset) -{ - unsigned long ret; - struct mm_struct *mm = current->mm; - - if (unlikely(offset + PAGE_ALIGN(len) < offset)) - return -EINVAL; - if (unlikely(offset & ~PAGE_MASK)) - return -EINVAL; - - ret = security_mmap_file(file, prot, flag); - if (!ret) { - down_write(&mm->mmap_sem); - ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); - up_write(&mm->mmap_sem); - } - return ret; -} -EXPORT_SYMBOL(vm_mmap); - SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) @@ -1155,13 +1133,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - retval = security_mmap_file(file, prot, flags); - if (!retval) { - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - } - + retval = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); if (file) fput(file); out: diff --git a/mm/nommu.c b/mm/nommu.c index e6123a5..c4acfbc 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1470,28 +1470,6 @@ error_getting_region: return -ENOMEM; } -unsigned long vm_mmap(struct file *file, unsigned long addr, - unsigned long len, unsigned long prot, - unsigned long flag, unsigned long offset) -{ - unsigned long ret; - struct mm_struct *mm = current->mm; - - if (unlikely(offset + PAGE_ALIGN(len) < offset)) - return -EINVAL; - if (unlikely(offset & ~PAGE_MASK)) - return -EINVAL; - - ret = security_mmap_file(file, prot, flag); - if (!ret) { - down_write(&mm->mmap_sem); - ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); - up_write(&mm->mmap_sem); - } - return ret; -} -EXPORT_SYMBOL(vm_mmap); - SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, unsigned long, prot, unsigned long, flags, unsigned long, fd, unsigned long, pgoff) @@ -1508,12 +1486,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); - ret = security_mmap_file(file, prot, flags); - if (!ret) { - down_write(¤t->mm->mmap_sem); - retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); - } + ret = vm_mmap_pgoff(file, addr, len, prot, flags, pgoff); if (file) fput(file); diff --git a/mm/util.c b/mm/util.c index ae962b3..8c7265a 100644 --- a/mm/util.c +++ b/mm/util.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include "internal.h" @@ -341,6 +342,35 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, } EXPORT_SYMBOL_GPL(get_user_pages_fast); +unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long pgoff) +{ + unsigned long ret; + struct mm_struct *mm = current->mm; + + ret = security_mmap_file(file, prot, flag); + if (!ret) { + down_write(&mm->mmap_sem); + ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff); + up_write(&mm->mmap_sem); + } + return ret; +} + +unsigned long vm_mmap(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long offset) +{ + if (unlikely(offset + PAGE_ALIGN(len) < offset)) + return -EINVAL; + if (unlikely(offset & ~PAGE_MASK)) + return -EINVAL; + + return vm_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); +} +EXPORT_SYMBOL(vm_mmap); + /* Tracepoints definitions. */ EXPORT_TRACEPOINT_SYMBOL(kmalloc); EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); -- cgit v0.10.2 From 17d1587f553dbdc9a55fe253903437f7e3e3448e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 30 May 2012 20:19:20 -0400 Subject: unexport do_munmap() Signed-off-by: Al Viro diff --git a/mm/mmap.c b/mm/mmap.c index 98ef8a6..6cb3193 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2125,7 +2125,6 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) return 0; } -EXPORT_SYMBOL(do_munmap); int vm_munmap(unsigned long start, size_t len) { -- cgit v0.10.2 From d58367515f47371f7202d8b258ee0614a8955a6a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 19 Apr 2012 18:17:15 -0400 Subject: sch_atm.c: get rid of poinless extern sockfd_lookup() is declared in linux/net.h, which is pulled by linux/skbuff.h (and needed for a lot of other stuff in sch_atm.c anyway). Signed-off-by: Al Viro diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 8522a47..ca8e0a5 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -16,8 +16,6 @@ #include #include -extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ - /* * The ATM queuing discipline provides a framework for invoking classifiers * (aka "filters"), which in turn select classes of this queuing discipline. -- cgit v0.10.2 From efaa33eb1309d65528d5a54d87d69bdcbdae8c10 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 1 Jun 2012 17:18:04 +0300 Subject: reiserfs: cleanup reiserfs_fill_super a bit We have the reiserfs superblock pointer in the 'sbi' variable in this function, no need to use the 'REISERFS_SB(s)' macro which is the same. This is jut a small clean-up. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index c07b7d7..60cddb7 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1730,19 +1730,19 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) return -ENOMEM; s->s_fs_info = sbi; /* Set default values for options: non-aggressive tails, RO on errors */ - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_SMALLTAIL); - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_ERROR_RO); - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); + sbi->s_mount_opt |= (1 << REISERFS_SMALLTAIL); + sbi->s_mount_opt |= (1 << REISERFS_ERROR_RO); + sbi->s_mount_opt |= (1 << REISERFS_BARRIER_FLUSH); /* no preallocation minimum, be smart in reiserfs_file_write instead */ - REISERFS_SB(s)->s_alloc_options.preallocmin = 0; + sbi->s_alloc_options.preallocmin = 0; /* Preallocate by 16 blocks (17-1) at once */ - REISERFS_SB(s)->s_alloc_options.preallocsize = 17; + sbi->s_alloc_options.preallocsize = 17; /* setup default block allocator options */ reiserfs_init_alloc_options(s); - mutex_init(&REISERFS_SB(s)->lock); - REISERFS_SB(s)->lock_depth = -1; + mutex_init(&sbi->lock); + sbi->lock_depth = -1; jdev_name = NULL; if (reiserfs_parse_options @@ -1751,8 +1751,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) goto error_unlocked; } if (jdev_name && jdev_name[0]) { - REISERFS_SB(s)->s_jdev = kstrdup(jdev_name, GFP_KERNEL); - if (!REISERFS_SB(s)->s_jdev) { + sbi->s_jdev = kstrdup(jdev_name, GFP_KERNEL); + if (!sbi->s_jdev) { SWARN(silent, s, "", "Cannot allocate memory for " "journal device name"); goto error; @@ -1810,7 +1810,7 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) /* make data=ordered the default */ if (!reiserfs_data_log(s) && !reiserfs_data_ordered(s) && !reiserfs_data_writeback(s)) { - REISERFS_SB(s)->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); + sbi->s_mount_opt |= (1 << REISERFS_DATA_ORDERED); } if (reiserfs_data_log(s)) { -- cgit v0.10.2 From 25729b0e94c2103a8d726eda843136a3775366cf Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 1 Jun 2012 17:18:05 +0300 Subject: reiserfs: clean-up function return type Turn 'reiserfs_flush_old_commits()' into a void function because the callers do not cares about what it returns anyway. We are going to remove the 'sb->s_dirt' field completely and this patch is a small step towards this direction. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index b1a0857..68aea62 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -3492,7 +3492,7 @@ static void flush_async_commits(struct work_struct *work) ** flushes any old transactions to disk ** ends the current transaction if it is too old */ -int reiserfs_flush_old_commits(struct super_block *sb) +void reiserfs_flush_old_commits(struct super_block *sb) { time_t now; struct reiserfs_transaction_handle th; @@ -3502,9 +3502,8 @@ int reiserfs_flush_old_commits(struct super_block *sb) /* safety check so we don't flush while we are replaying the log during * mount */ - if (list_empty(&journal->j_journal_list)) { - return 0; - } + if (list_empty(&journal->j_journal_list)) + return; /* check the current transaction. If there are no writers, and it is * too old, finish it, and force the commit blocks to disk @@ -3526,7 +3525,6 @@ int reiserfs_flush_old_commits(struct super_block *sb) do_journal_end(&th, sb, 1, COMMIT_NOW | WAIT); } } - return sb->s_dirt; } /* diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 14a4f9d..5c70055 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -2452,7 +2452,7 @@ struct reiserfs_transaction_handle *reiserfs_persistent_transaction(struct int reiserfs_end_persistent_transaction(struct reiserfs_transaction_handle *); int reiserfs_commit_page(struct inode *inode, struct page *page, unsigned from, unsigned to); -int reiserfs_flush_old_commits(struct super_block *); +void reiserfs_flush_old_commits(struct super_block *); int reiserfs_commit_for_inode(struct inode *); int reiserfs_inode_needs_commit(struct inode *); void reiserfs_update_inode_transaction(struct inode *); -- cgit v0.10.2 From 717f03c4d71677d2afb68d54628def3aae5d46ab Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 1 Jun 2012 17:18:06 +0300 Subject: reiserfs: remove useless superblock dirtying The 'reiserfs_resize()' function marks the superblock as dirty by assigning 1 to 's_dirt' and then calls 'journal_mark_dirty()' which does the same. Thus, we can remove the assignment from 'reiserfs_resize()'. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 9a17f63..3ce02cf 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -200,7 +200,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) (bmap_nr_new - bmap_nr))); PUT_SB_BLOCK_COUNT(s, block_count_new); PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new); - s->s_dirt = 1; journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); -- cgit v0.10.2 From 5c5fd81962271d4ee2984837fef4ec37e689aa41 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 1 Jun 2012 17:18:07 +0300 Subject: reiserfs: mark the superblock as dirty a bit later The 'journal_mark_dirty()' function currently first marks the superblock as dirty by setting 's_dirt' to 1, then does various sanity checks and returns, then actuall does all the magic with the journal. This is not an ideal order, though. It makes more sense to first do all the checks, then do all the internal stuff, and at the end notify the VFS that the superblock is now dirty. This patch moves the 's_dirt = 1' assignment from the very beginning of this function to the very end. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 68aea62..e5e06dd 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -3231,8 +3231,6 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, th->t_trans_id, journal->j_trans_id); } - sb->s_dirt = 1; - prepared = test_clear_buffer_journal_prepared(bh); clear_buffer_journal_restore_dirty(bh); /* already in this transaction, we are done */ @@ -3316,6 +3314,7 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, journal->j_first = cn; journal->j_last = cn; } + sb->s_dirt = 1; return 0; } -- cgit v0.10.2 From 033369d1af1264abc23bea2e174aa47cdd212f6f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 1 Jun 2012 17:18:08 +0300 Subject: reiserfs: get rid of resierfs_sync_super This patch stops reiserfs using the VFS 'write_super()' method along with the s_dirt flag, because they are on their way out. The whole "superblock write-out" VFS infrastructure is served by the 'sync_supers()' kernel thread, which wakes up every 5 (by default) seconds and writes out all dirty superblock using the '->write_super()' call-back. But the problem with this thread is that it wastes power by waking up the system every 5 seconds, even if there are no diry superblocks, or there are no client file-systems which would need this (e.g., btrfs does not use '->write_super()'). So we want to kill it completely and thus, we need to make file-systems to stop using the '->write_super()' VFS service, and then remove it together with the kernel thread. Signed-off-by: Artem Bityutskiy Signed-off-by: Al Viro diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index e5e06dd..afcadcc 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1923,6 +1923,8 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, * the workqueue job (flush_async_commit) needs this lock */ reiserfs_write_unlock(sb); + + cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work); flush_workqueue(commit_wq); if (!reiserfs_mounted_fs_count) { @@ -3314,7 +3316,7 @@ int journal_mark_dirty(struct reiserfs_transaction_handle *th, journal->j_first = cn; journal->j_last = cn; } - sb->s_dirt = 1; + reiserfs_schedule_old_flush(sb); return 0; } @@ -3952,7 +3954,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, ** it tells us if we should continue with the journal_end, or just return */ if (!check_journal_end(th, sb, nblocks, flags)) { - sb->s_dirt = 1; + reiserfs_schedule_old_flush(sb); wake_queued_writers(sb); reiserfs_async_progress_wait(sb); goto out; diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 5c70055..33215f5 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -480,6 +480,11 @@ struct reiserfs_sb_info { struct dentry *priv_root; /* root of /.reiserfs_priv */ struct dentry *xattr_root; /* root of /.reiserfs_priv/xattrs */ int j_errno; + + int work_queued; /* non-zero delayed work is queued */ + struct delayed_work old_work; /* old transactions flush delayed work */ + spinlock_t old_work_lock; /* protects old_work and work_queued */ + #ifdef CONFIG_QUOTA char *s_qf_names[MAXQUOTAS]; int s_jquota_fmt; @@ -2487,6 +2492,7 @@ void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...); int reiserfs_allocate_list_bitmaps(struct super_block *s, struct reiserfs_list_bitmap *, unsigned int); +void reiserfs_schedule_old_flush(struct super_block *s); void add_save_link(struct reiserfs_transaction_handle *th, struct inode *inode, int truncate); int remove_save_link(struct inode *inode, int truncate); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 60cddb7..651ce76 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -72,20 +72,58 @@ static int reiserfs_sync_fs(struct super_block *s, int wait) if (!journal_begin(&th, s, 1)) if (!journal_end_sync(&th, s, 1)) reiserfs_flush_old_commits(s); - s->s_dirt = 0; /* Even if it's not true. - * We'll loop forever in sync_supers otherwise */ reiserfs_write_unlock(s); return 0; } -static void reiserfs_write_super(struct super_block *s) +static void flush_old_commits(struct work_struct *work) { + struct reiserfs_sb_info *sbi; + struct super_block *s; + + sbi = container_of(work, struct reiserfs_sb_info, old_work.work); + s = sbi->s_journal->j_work_sb; + + spin_lock(&sbi->old_work_lock); + sbi->work_queued = 0; + spin_unlock(&sbi->old_work_lock); + reiserfs_sync_fs(s, 1); } +void reiserfs_schedule_old_flush(struct super_block *s) +{ + struct reiserfs_sb_info *sbi = REISERFS_SB(s); + unsigned long delay; + + if (s->s_flags & MS_RDONLY) + return; + + spin_lock(&sbi->old_work_lock); + if (!sbi->work_queued) { + delay = msecs_to_jiffies(dirty_writeback_interval * 10); + queue_delayed_work(system_long_wq, &sbi->old_work, delay); + sbi->work_queued = 1; + } + spin_unlock(&sbi->old_work_lock); +} + +static void cancel_old_flush(struct super_block *s) +{ + struct reiserfs_sb_info *sbi = REISERFS_SB(s); + + cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); + spin_lock(&sbi->old_work_lock); + sbi->work_queued = 0; + spin_unlock(&sbi->old_work_lock); +} + static int reiserfs_freeze(struct super_block *s) { struct reiserfs_transaction_handle th; + + cancel_old_flush(s); + reiserfs_write_lock(s); if (!(s->s_flags & MS_RDONLY)) { int err = journal_begin(&th, s, 1); @@ -99,7 +137,6 @@ static int reiserfs_freeze(struct super_block *s) journal_end_sync(&th, s, 1); } } - s->s_dirt = 0; reiserfs_write_unlock(s); return 0; } @@ -483,9 +520,6 @@ static void reiserfs_put_super(struct super_block *s) reiserfs_write_lock(s); - if (s->s_dirt) - reiserfs_write_super(s); - /* change file system state to current state if it was mounted with read-write permissions */ if (!(s->s_flags & MS_RDONLY)) { if (!journal_begin(&th, s, 10)) { @@ -692,7 +726,6 @@ static const struct super_operations reiserfs_sops = { .dirty_inode = reiserfs_dirty_inode, .evict_inode = reiserfs_evict_inode, .put_super = reiserfs_put_super, - .write_super = reiserfs_write_super, .sync_fs = reiserfs_sync_fs, .freeze_fs = reiserfs_freeze, .unfreeze_fs = reiserfs_unfreeze, @@ -1400,7 +1433,6 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) err = journal_end(&th, s, 10); if (err) goto out_err; - s->s_dirt = 0; if (!(*mount_flags & MS_RDONLY)) { dquot_resume(s, -1); @@ -1741,6 +1773,8 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) /* setup default block allocator options */ reiserfs_init_alloc_options(s); + spin_lock_init(&sbi->old_work_lock); + INIT_DELAYED_WORK(&sbi->old_work, flush_old_commits); mutex_init(&sbi->lock); sbi->lock_depth = -1; @@ -2003,6 +2037,8 @@ error_unlocked: reiserfs_write_unlock(s); } + cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); + reiserfs_free_bitmap_cache(s); if (SB_BUFFER_WITH_SB(s)) brelse(SB_BUFFER_WITH_SB(s)); -- cgit v0.10.2 From c3b2da314834499f34cba94f7053e55f6d6f92d8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Mar 2012 09:59:21 -0400 Subject: fs: introduce inode operation ->update_time Btrfs has to make sure we have space to allocate new blocks in order to modify the inode, so updating time can fail. We've gotten around this by having our own file_update_time but this is kind of a pain, and Christoph has indicated he would like to make xfs do something different with atime updates. So introduce ->update_time, where we will deal with i_version an a/m/c time updates and indicate which changes need to be made. The normal version just does what it has always done, updates the time and marks the inode dirty, and then filesystems can choose to do something different. I've gone through all of the users of file_update_time and made them check for errors with the exception of the fault code since it's complicated and I wasn't quite sure what to do there, also Jan is going to be pushing the file time updates into page_mkwrite for those who have it so that should satisfy btrfs and make it not a big deal to check the file_update_time() return code in the generic fault path. Thanks, Signed-off-by: Josef Bacik diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 4fca82e..d5a269a 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -62,6 +62,7 @@ ata *); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); + void (*update_time)(struct inode *, struct timespec *, int); locking rules: all may block @@ -89,6 +90,8 @@ listxattr: no removexattr: yes truncate_range: yes fiemap: no +update_time: no + Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on victim. cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 0d04920..b2aa722 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -364,6 +364,7 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); + void (*update_time)(struct inode *, struct timespec *, int); }; Again, all methods are called without any locks being held, unless @@ -475,6 +476,9 @@ otherwise noted. truncate_range: a method provided by the underlying filesystem to truncate a range of blocks , i.e. punch a hole somewhere in a file. + update_time: called by the VFS to update a specific time or the i_version of + an inode. If this is not defined the VFS will update the inode itself + and call mark_inode_dirty_sync. The Address Space Object ======================== diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 504e61b..9562109 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -962,7 +962,9 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - file_update_time(file); + err = file_update_time(file); + if (err) + goto out; if (file->f_flags & O_DIRECT) { written = generic_file_direct_write(iocb, iov, &nr_segs, diff --git a/fs/inode.c b/fs/inode.c index a79555e..f0335fc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1487,6 +1487,27 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, return 0; } +/* + * This does the actual work of updating an inodes time or version. Must have + * had called mnt_want_write() before calling this. + */ +static int update_time(struct inode *inode, struct timespec *time, int flags) +{ + if (inode->i_op->update_time) + return inode->i_op->update_time(inode, time, flags); + + if (flags & S_ATIME) + inode->i_atime = *time; + if (flags & S_VERSION) + inode_inc_iversion(inode); + if (flags & S_CTIME) + inode->i_ctime = *time; + if (flags & S_MTIME) + inode->i_mtime = *time; + mark_inode_dirty_sync(inode); + return 0; +} + /** * touch_atime - update the access time * @path: the &struct path to update @@ -1524,8 +1545,14 @@ void touch_atime(struct path *path) if (mnt_want_write(mnt)) return; - inode->i_atime = now; - mark_inode_dirty_sync(inode); + /* + * File systems can error out when updating inodes if they need to + * allocate new space to modify an inode (such is the case for + * Btrfs), but since we touch atime while walking down the path we + * really don't care if we failed to update the atime of the file, + * so just ignore the return value. + */ + update_time(inode, &now, S_ATIME); mnt_drop_write(mnt); } EXPORT_SYMBOL(touch_atime); @@ -1604,18 +1631,20 @@ EXPORT_SYMBOL(file_remove_suid); * usage in the file write path of filesystems, and filesystems may * choose to explicitly ignore update via this function with the * S_NOCMTIME inode flag, e.g. for network filesystem where these - * timestamps are handled by the server. + * timestamps are handled by the server. This can return an error for + * file systems who need to allocate space in order to update an inode. */ -void file_update_time(struct file *file) +int file_update_time(struct file *file) { struct inode *inode = file->f_path.dentry->d_inode; struct timespec now; - enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; + int sync_it = 0; + int ret; /* First try to exhaust all avenues to not sync */ if (IS_NOCMTIME(inode)) - return; + return 0; now = current_fs_time(inode->i_sb); if (!timespec_equal(&inode->i_mtime, &now)) @@ -1628,21 +1657,16 @@ void file_update_time(struct file *file) sync_it |= S_VERSION; if (!sync_it) - return; + return 0; /* Finally allowed to write? Takes lock. */ if (mnt_want_write_file(file)) - return; + return 0; - /* Only change inode inside the lock region */ - if (sync_it & S_VERSION) - inode_inc_iversion(inode); - if (sync_it & S_CTIME) - inode->i_ctime = now; - if (sync_it & S_MTIME) - inode->i_mtime = now; - mark_inode_dirty_sync(inode); + ret = update_time(inode, &now, sync_it); mnt_drop_write_file(file); + + return ret; } EXPORT_SYMBOL(file_update_time); diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 3ff5fcc..122e260 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -221,6 +221,10 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * already_written = 0; + errno = file_update_time(file); + if (errno) + goto outrel; + bouncebuffer = vmalloc(bufsize); if (!bouncebuffer) { errno = -EIO; /* -ENOMEM */ @@ -252,8 +256,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * } vfree(bouncebuffer); - file_update_time(file); - *ppos = pos; if (pos > i_size_read(inode)) { diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 8639169..7389d2d 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2096,7 +2096,9 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, err = file_remove_suid(file); if (err) goto out; - file_update_time(file); + err = file_update_time(file); + if (err) + goto out; written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos, count); out: diff --git a/fs/pipe.c b/fs/pipe.c index 95ebb56..49c1065 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -654,8 +654,11 @@ out: wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM); kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); } - if (ret > 0) - file_update_time(filp); + if (ret > 0) { + int err = file_update_time(filp); + if (err) + ret = err; + } return ret; } diff --git a/fs/splice.c b/fs/splice.c index f847684..47c4c1a 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1003,8 +1003,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); ret = file_remove_suid(out); if (!ret) { - file_update_time(out); - ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); + ret = file_update_time(out); + if (!ret) + ret = splice_from_pipe_feed(pipe, &sd, + pipe_to_file); } mutex_unlock(&inode->i_mutex); } while (ret > 0); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8d214b8..9f7ec15 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -586,8 +586,11 @@ restart: * lock above. Eventually we should look into a way to avoid * the pointless lock roundtrip. */ - if (likely(!(file->f_mode & FMODE_NOCMTIME))) - file_update_time(file); + if (likely(!(file->f_mode & FMODE_NOCMTIME))) { + error = file_update_time(file); + if (error) + return error; + } /* * If we're writing the file then make sure to clear the setuid and diff --git a/include/linux/fs.h b/include/linux/fs.h index cdc1a96..57fc705 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1684,6 +1684,7 @@ struct inode_operations { void (*truncate_range)(struct inode *, loff_t, loff_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); + int (*update_time)(struct inode *, struct timespec *, int); } ____cacheline_aligned; struct seq_file; @@ -1843,6 +1844,13 @@ static inline void inode_inc_iversion(struct inode *inode) spin_unlock(&inode->i_lock); } +enum file_time_flags { + S_ATIME = 1, + S_MTIME = 2, + S_CTIME = 4, + S_VERSION = 8, +}; + extern void touch_atime(struct path *); static inline void file_accessed(struct file *file) { @@ -2579,7 +2587,7 @@ extern int inode_change_ok(const struct inode *, struct iattr *); extern int inode_newsize_ok(const struct inode *, loff_t offset); extern void setattr_copy(struct inode *inode, const struct iattr *attr); -extern void file_update_time(struct file *file); +extern int file_update_time(struct file *file); extern int generic_show_options(struct seq_file *m, struct dentry *root); extern void save_mount_options(struct super_block *sb, char *options); diff --git a/mm/filemap.c b/mm/filemap.c index 21e5abf..51070f1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2463,7 +2463,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) goto out; - file_update_time(file); + err = file_update_time(file); + if (err) + goto out; /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */ if (unlikely(file->f_flags & O_DIRECT)) { diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index a4eb311..213ca1f 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -426,7 +426,9 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len, if (ret) goto out_backing; - file_update_time(filp); + ret = file_update_time(filp); + if (ret) + goto out_backing; ret = __xip_file_write (filp, buf, count, pos, ppos); -- cgit v0.10.2 From e41f941a23115e84a8550b3d901a13a14b2edc2f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Mar 2012 09:46:47 -0400 Subject: Btrfs: move over to use ->update_time Btrfs had been doing it's own file_update_time so we could catch ENOSPC properly, so just update our btrfs_update_time to work with the new stuff and then we'll be fancy later. Thanks, Signed-off-by: Josef Bacik diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8fd7233..ba8743b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2922,7 +2922,6 @@ int btrfs_readpage(struct file *file, struct page *page); void btrfs_evict_inode(struct inode *inode); int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc); int btrfs_dirty_inode(struct inode *inode); -int btrfs_update_time(struct file *file); struct inode *btrfs_alloc_inode(struct super_block *sb); void btrfs_destroy_inode(struct inode *inode); int btrfs_drop_inode(struct inode *inode); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 53bf2d7..974beb8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1404,7 +1404,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, goto out; } - err = btrfs_update_time(file); + err = file_update_time(file); if (err) { mutex_unlock(&inode->i_mutex); goto out; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ceb7b9c..3c1723a9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4431,46 +4431,18 @@ int btrfs_dirty_inode(struct inode *inode) * This is a copy of file_update_time. We need this so we can return error on * ENOSPC for updating the inode in the case of file write and mmap writes. */ -int btrfs_update_time(struct file *file) +static int btrfs_update_time(struct inode *inode, struct timespec *now, + int flags) { - struct inode *inode = file->f_path.dentry->d_inode; - struct timespec now; - int ret; - enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; - - /* First try to exhaust all avenues to not sync */ - if (IS_NOCMTIME(inode)) - return 0; - - now = current_fs_time(inode->i_sb); - if (!timespec_equal(&inode->i_mtime, &now)) - sync_it = S_MTIME; - - if (!timespec_equal(&inode->i_ctime, &now)) - sync_it |= S_CTIME; - - if (IS_I_VERSION(inode)) - sync_it |= S_VERSION; - - if (!sync_it) - return 0; - - /* Finally allowed to write? Takes lock. */ - if (mnt_want_write_file(file)) - return 0; - - /* Only change inode inside the lock region */ - if (sync_it & S_VERSION) + if (flags & S_VERSION) inode_inc_iversion(inode); - if (sync_it & S_CTIME) - inode->i_ctime = now; - if (sync_it & S_MTIME) - inode->i_mtime = now; - ret = btrfs_dirty_inode(inode); - if (!ret) - mark_inode_dirty_sync(inode); - mnt_drop_write(file->f_path.mnt); - return ret; + if (flags & S_CTIME) + inode->i_ctime = *now; + if (flags & S_MTIME) + inode->i_mtime = *now; + if (flags & S_ATIME) + inode->i_atime = *now; + return btrfs_dirty_inode(inode); } /* @@ -6576,7 +6548,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); if (!ret) { - ret = btrfs_update_time(vma->vm_file); + ret = file_update_time(vma->vm_file); reserved = 1; } if (ret) { @@ -7647,6 +7619,7 @@ static const struct inode_operations btrfs_file_inode_operations = { .permission = btrfs_permission, .fiemap = btrfs_fiemap, .get_acl = btrfs_get_acl, + .update_time = btrfs_update_time, }; static const struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, @@ -7657,6 +7630,7 @@ static const struct inode_operations btrfs_special_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .get_acl = btrfs_get_acl, + .update_time = btrfs_update_time, }; static const struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, @@ -7670,6 +7644,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .get_acl = btrfs_get_acl, + .update_time = btrfs_update_time, }; const struct dentry_operations btrfs_dentry_operations = { -- cgit v0.10.2 From 697f514df10b0f46bcd7596c1be18b7e2e9b28bb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:05 +0200 Subject: vfs: split do_lookup() Split do_lookup() into two functions: lookup_fast() - does cached lookup without i_mutex lookup_slow() - does lookup with i_mutex Both follow managed dentries. The new functions are needed by atomic_open. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 93ac932..7f4ab82 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1125,8 +1125,8 @@ static struct dentry *__lookup_hash(struct qstr *name, * small and for now I'd prefer to have fast path as straight as possible. * It _is_ time-critical. */ -static int do_lookup(struct nameidata *nd, struct qstr *name, - struct path *path, struct inode **inode) +static int lookup_fast(struct nameidata *nd, struct qstr *name, + struct path *path, struct inode **inode) { struct vfsmount *mnt = nd->path.mnt; struct dentry *dentry, *parent = nd->path.dentry; @@ -1208,7 +1208,7 @@ unlazy: goto need_lookup; } } -done: + path->mnt = mnt; path->dentry = dentry; err = follow_managed(path, nd->flags); @@ -1222,6 +1222,17 @@ done: return 0; need_lookup: + return 1; +} + +/* Fast lookup failed, do it the slow way */ +static int lookup_slow(struct nameidata *nd, struct qstr *name, + struct path *path) +{ + struct dentry *dentry, *parent; + int err; + + parent = nd->path.dentry; BUG_ON(nd->inode != parent->d_inode); mutex_lock(&parent->d_inode->i_mutex); @@ -1229,7 +1240,16 @@ need_lookup: mutex_unlock(&parent->d_inode->i_mutex); if (IS_ERR(dentry)) return PTR_ERR(dentry); - goto done; + path->mnt = nd->path.mnt; + path->dentry = dentry; + err = follow_managed(path, nd->flags); + if (unlikely(err < 0)) { + path_put_conditional(path, nd); + return err; + } + if (err) + nd->flags |= LOOKUP_JUMPED; + return 0; } static inline int may_lookup(struct nameidata *nd) @@ -1301,21 +1321,26 @@ static inline int walk_component(struct nameidata *nd, struct path *path, */ if (unlikely(type != LAST_NORM)) return handle_dots(nd, type); - err = do_lookup(nd, name, path, &inode); + err = lookup_fast(nd, name, path, &inode); if (unlikely(err)) { - terminate_walk(nd); - return err; - } - if (!inode) { - path_to_nameidata(path, nd); - terminate_walk(nd); - return -ENOENT; + if (err < 0) + goto out_err; + + err = lookup_slow(nd, name, path); + if (err < 0) + goto out_err; + + inode = path->dentry->d_inode; } + err = -ENOENT; + if (!inode) + goto out_path_put; + if (should_follow_link(inode, follow)) { if (nd->flags & LOOKUP_RCU) { if (unlikely(unlazy_walk(nd, path->dentry))) { - terminate_walk(nd); - return -ECHILD; + err = -ECHILD; + goto out_err; } } BUG_ON(inode != path->dentry->d_inode); @@ -1324,6 +1349,12 @@ static inline int walk_component(struct nameidata *nd, struct path *path, path_to_nameidata(path, nd); nd->inode = inode; return 0; + +out_path_put: + path_to_nameidata(path, nd); +out_err: + terminate_walk(nd); + return err; } /* -- cgit v0.10.2 From e276ae672fa2d727721b1a5a2508ff34bac85439 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:06 +0200 Subject: vfs: do_last(): make exit RCU safe Allow returning from do_last() with LOOKUP_RCU still set on the "out:" and "exit:" labels. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 7f4ab82..edc18cd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2383,7 +2383,7 @@ common: out: if (want_write) mnt_drop_write(nd->path.mnt); - path_put(&nd->path); + terminate_walk(nd); return filp; exit_mutex_unlock: -- cgit v0.10.2 From a1eb33153090549e622ab42cb375af06614dd7a8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:07 +0200 Subject: vfs: do_last(): inline walk_component() Copy walk_component() into do_lookup(). Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index edc18cd..f6b31c9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2200,6 +2200,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, int want_write = 0; int acc_mode = op->acc_mode; struct file *filp; + struct inode *inode; int error; nd->flags &= ~LOOKUP_PARENT; @@ -2237,12 +2238,36 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) symlink_ok = 1; /* we _can_ be in RCU mode here */ - error = walk_component(nd, path, &nd->last, LAST_NORM, - !symlink_ok); - if (error < 0) - return ERR_PTR(error); - if (error) /* symlink */ + error = lookup_fast(nd, &nd->last, path, &inode); + if (unlikely(error)) { + if (error < 0) + goto exit; + + error = lookup_slow(nd, &nd->last, path); + if (error < 0) + goto exit; + + inode = path->dentry->d_inode; + } + error = -ENOENT; + if (!inode) { + path_to_nameidata(path, nd); + goto exit; + } + + if (should_follow_link(inode, !symlink_ok)) { + if (nd->flags & LOOKUP_RCU) { + if (unlikely(unlazy_walk(nd, path->dentry))) { + error = -ECHILD; + goto exit; + } + } + BUG_ON(inode != path->dentry->d_inode); return NULL; + } + path_to_nameidata(path, nd); + nd->inode = inode; + /* sayonara */ error = complete_walk(nd); if (error) -- cgit v0.10.2 From decf3400879d02d0eafedea52c7f208587be062a Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:08 +0200 Subject: vfs: do_last(): use inode variable Use helper variable instead of path->dentry->d_inode before complete_walk(). This will allow this code to be used in RCU mode. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index f6b31c9..41445e7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2358,15 +2358,17 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (error) nd->flags |= LOOKUP_JUMPED; + BUG_ON(nd->flags & LOOKUP_RCU); + inode = path->dentry->d_inode; error = -ENOENT; - if (!path->dentry->d_inode) + if (!inode) goto exit_dput; - if (path->dentry->d_inode->i_op->follow_link) + if (inode->i_op->follow_link) return NULL; path_to_nameidata(path, nd); - nd->inode = path->dentry->d_inode; + nd->inode = inode; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ error = complete_walk(nd); if (error) -- cgit v0.10.2 From d45ea86792db9679ed010b2c3df3db32b2ce5bde Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:09 +0200 Subject: vfs: make follow_link check RCU safe This will allow this code to be used in RCU mode. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 41445e7..c6b9968 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2201,6 +2201,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, int acc_mode = op->acc_mode; struct file *filp; struct inode *inode; + int symlink_ok = 0; int error; nd->flags &= ~LOOKUP_PARENT; @@ -2232,7 +2233,6 @@ static struct file *do_last(struct nameidata *nd, struct path *path, } if (!(open_flag & O_CREAT)) { - int symlink_ok = 0; if (nd->last.name[nd->last.len]) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) @@ -2364,8 +2364,16 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (!inode) goto exit_dput; - if (inode->i_op->follow_link) + if (should_follow_link(inode, !symlink_ok)) { + if (nd->flags & LOOKUP_RCU) { + if (unlikely(unlazy_walk(nd, path->dentry))) { + error = -ECHILD; + goto exit; + } + } + BUG_ON(inode != path->dentry->d_inode); return NULL; + } path_to_nameidata(path, nd); nd->inode = inode; -- cgit v0.10.2 From 54c33e7f95284539e52ec2d99dcdf6efd29b247f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:10 +0200 Subject: vfs: do_last(): make ENOENT exit RCU safe This will allow this code to be used in RCU mode. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index c6b9968..a7e994b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2361,8 +2361,10 @@ static struct file *do_last(struct nameidata *nd, struct path *path, BUG_ON(nd->flags & LOOKUP_RCU); inode = path->dentry->d_inode; error = -ENOENT; - if (!inode) - goto exit_dput; + if (!inode) { + path_to_nameidata(path, nd); + goto exit; + } if (should_follow_link(inode, !symlink_ok)) { if (nd->flags & LOOKUP_RCU) { -- cgit v0.10.2 From af2f55426d1d888dcc0ba8dc9e9deb49fae38e38 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:11 +0200 Subject: vfs: do_last(): check LOOKUP_DIRECTORY Check for ENOTDIR before finishing open. This allows this code to be shared between O_CREAT and plain opens. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index a7e994b..4767c05 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2386,6 +2386,9 @@ static struct file *do_last(struct nameidata *nd, struct path *path, error = -EISDIR; if (S_ISDIR(nd->inode->i_mode)) goto exit; + error = -ENOTDIR; + if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) + goto exit; ok: if (!S_ISREG(nd->inode->i_mode)) will_truncate = 0; -- cgit v0.10.2 From 050ac841ea90610067fec26150574be8c6077738 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:12 +0200 Subject: vfs: do_last(): only return EISDIR for O_CREAT This allows this code to be shared between O_CREAT and plain opens. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 4767c05..90210b4 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2384,7 +2384,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (error) return ERR_PTR(error); error = -EISDIR; - if (S_ISDIR(nd->inode->i_mode)) + if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) goto exit; error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) -- cgit v0.10.2 From d7fdd7f6e1afbffda03aeacb90039c092e8cacf8 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:13 +0200 Subject: vfs: do_last(): add audit_inode before open This allows this code to be shared between O_CREAT and plain opens. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 90210b4..125386c 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2389,6 +2389,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, error = -ENOTDIR; if ((nd->flags & LOOKUP_DIRECTORY) && !nd->inode->i_op->lookup) goto exit; + audit_inode(pathname, nd->path.dentry); ok: if (!S_ISREG(nd->inode->i_mode)) will_truncate = 0; -- cgit v0.10.2 From 5f5daac12a4cef568d1269be0215fec0667193c1 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:14 +0200 Subject: vfs: do_last() common post lookup Now the post lookup code can be shared between O_CREAT and plain opens since they are essentially the same. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 125386c..998d531 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2249,37 +2249,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, inode = path->dentry->d_inode; } - error = -ENOENT; - if (!inode) { - path_to_nameidata(path, nd); - goto exit; - } - - if (should_follow_link(inode, !symlink_ok)) { - if (nd->flags & LOOKUP_RCU) { - if (unlikely(unlazy_walk(nd, path->dentry))) { - error = -ECHILD; - goto exit; - } - } - BUG_ON(inode != path->dentry->d_inode); - return NULL; - } - path_to_nameidata(path, nd); - nd->inode = inode; - - /* sayonara */ - error = complete_walk(nd); - if (error) - return ERR_PTR(error); - - error = -ENOTDIR; - if (nd->flags & LOOKUP_DIRECTORY) { - if (!nd->inode->i_op->lookup) - goto exit; - } - audit_inode(pathname, nd->path.dentry); - goto ok; + goto finish_lookup; } /* create side of things */ @@ -2360,6 +2330,8 @@ static struct file *do_last(struct nameidata *nd, struct path *path, BUG_ON(nd->flags & LOOKUP_RCU); inode = path->dentry->d_inode; +finish_lookup: + /* we _can_ be in RCU mode here */ error = -ENOENT; if (!inode) { path_to_nameidata(path, nd); -- cgit v0.10.2 From 90ad1a8ecb9bfd5ff4503ac42cd049a97643ee51 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:15 +0200 Subject: vfs: split __dentry_open() Split __dentry_open() into two functions: do_dentry_open() - does most of the actual work, doesn't put file on failure open_check_o_direct() - after a successful open, checks direct_IO method This will allow i_op->atomic_open to do just the file initialization and leave the direct_IO checking to the VFS. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/internal.h b/fs/internal.h index 8040af4..18bc216 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -100,6 +100,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, extern long do_handle_open(int mountdirfd, struct file_handle __user *ufh, int open_flag); +extern int open_check_o_direct(struct file *f); /* * inode.c diff --git a/fs/open.c b/fs/open.c index d543012..9daa1ce 100644 --- a/fs/open.c +++ b/fs/open.c @@ -654,10 +654,23 @@ static inline int __get_file_write_access(struct inode *inode, return error; } -static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, - struct file *f, - int (*open)(struct inode *, struct file *), - const struct cred *cred) +int open_check_o_direct(struct file *f) +{ + /* NB: we're sure to have correct a_ops only after f_op->open */ + if (f->f_flags & O_DIRECT) { + if (!f->f_mapping->a_ops || + ((!f->f_mapping->a_ops->direct_IO) && + (!f->f_mapping->a_ops->get_xip_mem))) { + return -EINVAL; + } + } + return 0; +} + +static struct file *do_dentry_open(struct dentry *dentry, struct vfsmount *mnt, + struct file *f, + int (*open)(struct inode *, struct file *), + const struct cred *cred) { static const struct file_operations empty_fops = {}; struct inode *inode; @@ -713,16 +726,6 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping); - /* NB: we're sure to have correct a_ops only after f_op->open */ - if (f->f_flags & O_DIRECT) { - if (!f->f_mapping->a_ops || - ((!f->f_mapping->a_ops->direct_IO) && - (!f->f_mapping->a_ops->get_xip_mem))) { - fput(f); - f = ERR_PTR(-EINVAL); - } - } - return f; cleanup_all: @@ -750,6 +753,22 @@ cleanup_file: return ERR_PTR(error); } +static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, + struct file *f, + int (*open)(struct inode *, struct file *), + const struct cred *cred) +{ + struct file *res = do_dentry_open(dentry, mnt, f, open, cred); + if (!IS_ERR(res)) { + int error = open_check_o_direct(f); + if (error) { + fput(res); + res = ERR_PTR(error); + } + } + return res; +} + /** * lookup_instantiate_filp - instantiates the open intent filp * @nd: pointer to nameidata -- cgit v0.10.2 From 78f71eff3c274f3907f4aa1bbe3267281ba1c603 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:16 +0200 Subject: vfs: do_dentry_open(): don't put filp Move put_filp() out to __dentry_open(), the only caller now. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/open.c b/fs/open.c index 9daa1ce..511c548 100644 --- a/fs/open.c +++ b/fs/open.c @@ -747,7 +747,6 @@ cleanup_all: f->f_path.dentry = NULL; f->f_path.mnt = NULL; cleanup_file: - put_filp(f); dput(dentry); mntput(mnt); return ERR_PTR(error); @@ -765,6 +764,8 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, fput(res); res = ERR_PTR(error); } + } else { + put_filp(f); } return res; } -- cgit v0.10.2 From 91daee988db38b0207eec719a3160b163c077007 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:17 +0200 Subject: vfs: nameidata_to_filp(): inline __dentry_open() Copy __dentry_open() into nameidata_to_filp(). Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/open.c b/fs/open.c index 511c548..9fd34b7 100644 --- a/fs/open.c +++ b/fs/open.c @@ -828,9 +828,25 @@ struct file *nameidata_to_filp(struct nameidata *nd) /* Has the filesystem initialised the file for us? */ if (filp->f_path.dentry == NULL) { + struct file *res; + path_get(&nd->path); - filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp, - NULL, cred); + res = do_dentry_open(nd->path.dentry, nd->path.mnt, + filp, NULL, cred); + if (!IS_ERR(res)) { + int error; + + BUG_ON(res != filp); + + error = open_check_o_direct(filp); + if (error) { + fput(filp); + filp = ERR_PTR(error); + } + } else { + put_filp(filp); + filp = res; + } } return filp; } -- cgit v0.10.2 From 50ee93afcaa970620d1fb5a9894109a2ab152868 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:18 +0200 Subject: vfs: nameidata_to_filp(): don't throw away file on error If open fails, don't put the file. This allows it to be reused if open needs to be retried. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/open.c b/fs/open.c index 9fd34b7..d6c79a0 100644 --- a/fs/open.c +++ b/fs/open.c @@ -824,10 +824,11 @@ struct file *nameidata_to_filp(struct nameidata *nd) /* Pick up the filp from the open intent */ filp = nd->intent.open.file; - nd->intent.open.file = NULL; /* Has the filesystem initialised the file for us? */ - if (filp->f_path.dentry == NULL) { + if (filp->f_path.dentry != NULL) { + nd->intent.open.file = NULL; + } else { struct file *res; path_get(&nd->path); @@ -836,6 +837,7 @@ struct file *nameidata_to_filp(struct nameidata *nd) if (!IS_ERR(res)) { int error; + nd->intent.open.file = NULL; BUG_ON(res != filp); error = open_check_o_direct(filp); @@ -844,7 +846,7 @@ struct file *nameidata_to_filp(struct nameidata *nd) filp = ERR_PTR(error); } } else { - put_filp(filp); + /* Allow nd->intent.open.file to be recycled */ filp = res; } } -- cgit v0.10.2 From 16b1c1cd71176ab0a76b26818fbf12db9183ed57 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:19 +0200 Subject: vfs: retry last component if opening stale dentry NFS optimizes away d_revalidates for last component of open. This means that open itself can find the dentry stale. This patch allows the filesystem to return EOPENSTALE and the VFS will retry the lookup on just the last component if possible. If the lookup was done using RCU mode, including the last component, then this is not possible since the parent dentry is lost. In this case fall back to non-RCU lookup. Currently this is not used since NFS will always leave RCU mode. Signed-off-by: Miklos Szeredi Signed-off-by: Al Viro diff --git a/fs/namei.c b/fs/namei.c index 998d531..7d69419 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2202,6 +2202,8 @@ static struct file *do_last(struct nameidata *nd, struct path *path, struct file *filp; struct inode *inode; int symlink_ok = 0; + struct path save_parent = { .dentry = NULL, .mnt = NULL }; + bool retried = false; int error; nd->flags &= ~LOOKUP_PARENT; @@ -2267,6 +2269,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path, if (nd->last.name[nd->last.len]) goto exit; +retry_lookup: mutex_lock(&dir->d_inode->i_mutex); dentry = lookup_hash(nd); @@ -2349,12 +2352,21 @@ finish_lookup: return NULL; } - path_to_nameidata(path, nd); + if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) { + path_to_nameidata(path, nd); + } else { + save_parent.dentry = nd->path.dentry; + save_parent.mnt = mntget(path->mnt); + nd->path.dentry = path->dentry; + + } nd->inode = inode; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ error = complete_walk(nd); - if (error) + if (error) { + path_put(&save_parent); return ERR_PTR(error); + } error = -EISDIR; if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode)) goto exit; @@ -2377,6 +2389,20 @@ common: if (error) goto exit; filp = nameidata_to_filp(nd); + if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) { + BUG_ON(save_parent.dentry != dir); + path_put(&nd->path); + nd->path = save_parent; + nd->inode = dir->d_inode; + save_parent.mnt = NULL; + save_parent.dentry = NULL; + if (want_write) { + mnt_drop_write(nd->path.mnt); + want_write = 0; + } + retried = true; + goto retry_lookup; + } if (!IS_ERR(filp)) { error = ima_file_check(filp, op->acc_mode); if (error) { @@ -2396,6 +2422,7 @@ common: out: if (want_write) mnt_drop_write(nd->path.mnt); + path_put(&save_parent); terminate_walk(nd); return filp; @@ -2459,6 +2486,12 @@ out: if (base) fput(base); release_open_intent(nd); + if (filp == ERR_PTR(-EOPENSTALE)) { + if (flags & LOOKUP_RCU) + filp = ERR_PTR(-ECHILD); + else + filp = ERR_PTR(-ESTALE); + } return filp; out_filp: diff --git a/include/linux/errno.h b/include/linux/errno.h index 2d09bfa..e0de516 100644 --- a/include/linux/errno.h +++ b/include/linux/errno.h @@ -17,6 +17,7 @@ #define ENOIOCTLCMD 515 /* No ioctl command */ #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */ #define EPROBE_DEFER 517 /* Driver requests probe retry */ +#define EOPENSTALE 518 /* open found a stale dentry */ /* Defined for the NFSv3 protocol */ #define EBADHANDLE 521 /* Illegal NFS file handle */ -- cgit v0.10.2 From 0ef97dcfce4179a2eba046b855ee2f91d6f1b414 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 21 May 2012 17:30:20 +0200 Subject: nfs: don't open in ->d_revalidate NFSv4 can't do reliable opens in d_revalidate, since it cannot know whether a mount needs to be followed or not. It does check d_mountpoint() on the dentry, which can result in a weird error if the VFS found that the mount does not in fact need to be followed, e.g.: # mount --bind /mnt/nfs /mnt/nfs-clone # echo something > /mnt/nfs/tmp/bar # echo x > /tmp/file # mount --bind /tmp/file /mnt/nfs-clone/tmp/bar # cat /mnt/nfs/tmp/bar cat: /mnt/nfs/tmp/bar: Not a directory Which should, by any sane filesystem, result in "something" being printed. So instead do the open in f_op->open() and in the unlikely case that the cached dentry turned out to be invalid, drop the dentry and return EOPENSTALE to let the VFS retry. Signed-off-by: Miklos Szeredi CC: Trond Myklebust Signed-off-by: Al Viro diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 0989a20..f430057 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1354,10 +1354,10 @@ out: } #ifdef CONFIG_NFS_V4 -static int nfs_open_revalidate(struct dentry *, struct nameidata *); +static int nfs4_lookup_revalidate(struct dentry *, struct nameidata *); const struct dentry_operations nfs4_dentry_operations = { - .d_revalidate = nfs_open_revalidate, + .d_revalidate = nfs4_lookup_revalidate, .d_delete = nfs_dentry_delete, .d_iput = nfs_dentry_iput, .d_automount = nfs_d_automount, @@ -1519,13 +1519,11 @@ no_open: return nfs_lookup(dir, dentry, nd); } -static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) +static int nfs4_lookup_revalidate(struct dentry *dentry, struct nameidata *nd) { struct dentry *parent = NULL; struct inode *inode; struct inode *dir; - struct nfs_open_context *ctx; - struct iattr attr; int openflags, ret = 0; if (nd->flags & LOOKUP_RCU) @@ -1554,57 +1552,13 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) /* We cannot do exclusive creation on a positive dentry */ if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) goto no_open_dput; - /* We can't create new files here */ - openflags &= ~(O_CREAT|O_EXCL); - - ctx = create_nfs_open_context(dentry, openflags); - ret = PTR_ERR(ctx); - if (IS_ERR(ctx)) - goto out; - attr.ia_valid = ATTR_OPEN; - if (openflags & O_TRUNC) { - attr.ia_valid |= ATTR_SIZE; - attr.ia_size = 0; - nfs_wb_all(inode); - } - - /* - * Note: we're not holding inode->i_mutex and so may be racing with - * operations that change the directory. We therefore save the - * change attribute *before* we do the RPC call. - */ - inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); - switch (ret) { - case -EPERM: - case -EACCES: - case -EDQUOT: - case -ENOSPC: - case -EROFS: - goto out_put_ctx; - default: - goto out_drop; - } - } - iput(inode); - if (inode != dentry->d_inode) - goto out_drop; + /* Let f_op->open() actually open (and revalidate) the file */ + ret = 1; - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - ret = nfs_intent_set_file(nd, ctx); - if (ret >= 0) - ret = 1; out: dput(parent); return ret; -out_drop: - d_drop(dentry); - ret = 0; -out_put_ctx: - put_nfs_open_context(ctx); - goto out; no_open_dput: dput(parent); diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 56311ca..a6708e6b 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -879,12 +879,81 @@ const struct file_operations nfs_file_operations = { static int nfs4_file_open(struct inode *inode, struct file *filp) { + struct nfs_open_context *ctx; + struct dentry *dentry = filp->f_path.dentry; + struct dentry *parent = NULL; + struct inode *dir; + unsigned openflags = filp->f_flags; + struct iattr attr; + int err; + + BUG_ON(inode != dentry->d_inode); /* - * NFSv4 opens are handled in d_lookup and d_revalidate. If we get to - * this point, then something is very wrong + * If no cached dentry exists or if it's negative, NFSv4 handled the + * opens in ->lookup() or ->create(). + * + * We only get this far for a cached positive dentry. We skipped + * revalidation, so handle it here by dropping the dentry and returning + * -EOPENSTALE. The VFS will retry the lookup/create/open. */ - dprintk("NFS: %s called! inode=%p filp=%p\n", __func__, inode, filp); - return -ENOTDIR; + + dprintk("NFS: open file(%s/%s)\n", + dentry->d_parent->d_name.name, + dentry->d_name.name); + + if ((openflags & O_ACCMODE) == 3) + openflags--; + + /* We can't create new files here */ + openflags &= ~(O_CREAT|O_EXCL); + + parent = dget_parent(dentry); + dir = parent->d_inode; + + ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode); + err = PTR_ERR(ctx); + if (IS_ERR(ctx)) + goto out; + + attr.ia_valid = ATTR_OPEN; + if (openflags & O_TRUNC) { + attr.ia_valid |= ATTR_SIZE; + attr.ia_size = 0; + nfs_wb_all(inode); + } + + inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + switch (err) { + case -EPERM: + case -EACCES: + case -EDQUOT: + case -ENOSPC: + case -EROFS: + goto out_put_ctx; + default: + goto out_drop; + } + } + iput(inode); + if (inode != dentry->d_inode) + goto out_drop; + + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + nfs_file_set_open_context(filp, ctx); + err = 0; + +out_put_ctx: + put_nfs_open_context(ctx); +out: + dput(parent); + return err; + +out_drop: + d_drop(dentry); + err = -EOPENSTALE; + goto out_put_ctx; } const struct file_operations nfs4_file_operations = { -- cgit v0.10.2