From 9c64daff9d5afb102dfe64a26829e26725538e58 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 8 Jun 2009 15:22:24 -0400 Subject: ext3: avoid unnecessary spinlock in critical POSIX ACL path If a filesystem supports POSIX ACL's, the VFS layer expects the filesystem to do POSIX ACL checks on any files not owned by the caller, and it does this for every single pathname component that it looks up. That obviously can be pretty expensive if the filesystem isn't careful about it, especially with locking. That's doubly sad, since the common case tends to be that there are no ACL's associated with the files in question. ext3 already caches the ACL data so that it doesn't have to look it up over and over again, but it does so by taking the inode->i_lock spinlock on every lookup. Which is a noticeable overhead even if it's a private lock, especially on CPU's where the serialization is expensive (eg Intel Netburst aka 'P4'). For the special case of not actually having any ACL's, all that locking is unnecessary. Even if somebody else were to be changing the ACL's on another CPU, we simply don't care - if we've seen a NULL ACL, we might as well use it. So just load the ACL speculatively without any locking, and if it was NULL, just use it. If it's non-NULL (either because we had a cached entry, or because the cache hasn't been filled in at all), it means that we'll need to get the lock and re-load it properly. This is noticeable even on Nehalem, which does locking quite well (much better than P4). From lmbench: Processor, Processes - times in microseconds - smaller is better -------------------------------------------------------------------- Host OS Mhz null null open slct fork exec sh call I/O stat clos TCP proc proc proc --------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- - before: nehalem.l Linux 2.6.30- 3193 0.04 0.09 0.95 1.45 2.18 69.1 273. 1141 nehalem.l Linux 2.6.30- 3193 0.04 0.09 0.95 1.48 2.28 69.9 253. 1140 nehalem.l Linux 2.6.30- 3193 0.04 0.10 0.95 1.42 2.19 68.6 284. 1141 - after: nehalem.l Linux 2.6.30- 3193 0.04 0.09 0.92 1.44 2.12 68.3 282. 1094 nehalem.l Linux 2.6.30- 3193 0.04 0.09 0.92 1.39 2.20 67.0 308. 1123 nehalem.l Linux 2.6.30- 3193 0.04 0.09 0.92 1.39 2.36 67.4 293. 1148 where you can see what appears to be a roughly 3% improvement in stat and open/close latencies from just the removal of the locking overhead. Of course, this only matters for files you don't own (the owner never needs to do the ACL checks), but that's the common case for libraries, header files, and executables. As well as for the base components of any absolute pathname, even if you are the owner of the final file. [ At some point we probably want to move this ACL caching logic entirely into the VFS layer (and only call down to the filesystem when uncached), but in the meantime this improves ext3 a bit. A similar fix to btrfs makes a much bigger difference (15x improvement in lmbench) due to broken caching. ] Signed-off-by: Linus Torvalds Signed-off-by: "Theodore Ts'o" Acked-by: Jan Kara Cc: Al Viro Signed-off-by: Al Viro diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index d81ef2f..e0c7454 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -129,12 +129,15 @@ fail: static inline struct posix_acl * ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl) { - struct posix_acl *acl = EXT3_ACL_NOT_CACHED; + struct posix_acl *acl = ACCESS_ONCE(*i_acl); - spin_lock(&inode->i_lock); - if (*i_acl != EXT3_ACL_NOT_CACHED) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); + if (acl) { + spin_lock(&inode->i_lock); + acl = *i_acl; + if (acl != EXT3_ACL_NOT_CACHED) + acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); + } return acl; } -- cgit v0.10.2 From 210ad6aedb332e73167ece5af9bd47f0da8c2aca Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 8 Jun 2009 15:22:25 -0400 Subject: ext4: avoid unnecessary spinlock in critical POSIX ACL path If a filesystem supports POSIX ACL's, the VFS layer expects the filesystem to do POSIX ACL checks on any files not owned by the caller, and it does this for every single pathname component that it looks up. That obviously can be pretty expensive if the filesystem isn't careful about it, especially with locking. That's doubly sad, since the common case tends to be that there are no ACL's associated with the files in question. ext4 already caches the ACL data so that it doesn't have to look it up over and over again, but it does so by taking the inode->i_lock spinlock on every lookup. Which is a noticeable overhead even if it's a private lock, especially on CPU's where the serialization is expensive (eg Intel Netburst aka 'P4'). For the special case of not actually having any ACL's, all that locking is unnecessary. Even if somebody else were to be changing the ACL's on another CPU, we simply don't care - if we've seen a NULL ACL, we might as well use it. So just load the ACL speculatively without any locking, and if it was NULL, just use it. If it's non-NULL (either because we had a cached entry, or because the cache hasn't been filled in at all), it means that we'll need to get the lock and re-load it properly. (This commit was ported from a patch originally authored by Linus for ext3.) Signed-off-by: "Theodore Ts'o" Signed-off-by: Al Viro diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 647e0d6..605aeed 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -129,12 +129,15 @@ fail: static inline struct posix_acl * ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl) { - struct posix_acl *acl = EXT4_ACL_NOT_CACHED; + struct posix_acl *acl = ACCESS_ONCE(*i_acl); - spin_lock(&inode->i_lock); - if (*i_acl != EXT4_ACL_NOT_CACHED) - acl = posix_acl_dup(*i_acl); - spin_unlock(&inode->i_lock); + if (acl) { + spin_lock(&inode->i_lock); + acl = *i_acl; + if (acl != EXT4_ACL_NOT_CACHED) + acl = posix_acl_dup(acl); + spin_unlock(&inode->i_lock); + } return acl; } -- cgit v0.10.2 From b0895513f499b8f786d292ce48589ca210ca1d6e Mon Sep 17 00:00:00 2001 From: "J. R. Okajima" Date: Wed, 17 Jun 2009 01:16:50 +0900 Subject: remove unlock_kernel() left accidentally commit 337eb00a2c3a421999c39c94ce7e33545ee8baa7 Push BKL down into ->remount_fs() and commit 4aa98cf768b6f2ea4b204620d949a665959214f6 Push BKL down into do_remount_sb() were uncorrectly merged. The former removes one pair of lock/unlock_kernel(), but the latter adds several unlock_kernel(). Finally a few unlock_kernel() calls left. Signed-off-by: J. R. Okajima Signed-off-by: Al Viro diff --git a/fs/super.c b/fs/super.c index 83b47416..d40d53a 100644 --- a/fs/super.c +++ b/fs/super.c @@ -545,24 +545,18 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY)) { if (force) mark_files_ro(sb); - else if (!fs_may_remount_ro(sb)) { - unlock_kernel(); + else if (!fs_may_remount_ro(sb)) return -EBUSY; - } retval = vfs_dq_off(sb, 1); - if (retval < 0 && retval != -ENOSYS) { - unlock_kernel(); + if (retval < 0 && retval != -ENOSYS) return -EBUSY; - } } remount_rw = !(flags & MS_RDONLY) && (sb->s_flags & MS_RDONLY); if (sb->s_op->remount_fs) { retval = sb->s_op->remount_fs(sb, &flags, data); - if (retval) { - unlock_kernel(); + if (retval) return retval; - } } sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) | (flags & MS_RMT_MASK); if (remount_rw) -- cgit v0.10.2 From fe36adf47eb1f7f4972559efa30ce3d2d3f977f2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 13:35:01 -0400 Subject: No instance of ->bmap() needs BKL Signed-off-by: Al Viro diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 3120f8d..229d7b7 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -187,7 +187,7 @@ readpages: no write_begin: no locks the page yes write_end: no yes, unlocks yes perform_write: no n/a yes -bmap: yes +bmap: no invalidatepage: no yes releasepage: no yes direct_IO: no diff --git a/fs/ioctl.c b/fs/ioctl.c index 286f38d..001f8d3 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -70,9 +70,7 @@ static int ioctl_fibmap(struct file *filp, int __user *p) res = get_user(block, p); if (res) return res; - lock_kernel(); res = mapping->a_ops->bmap(mapping, block); - unlock_kernel(); return put_user(res, p); } -- cgit v0.10.2 From 66c6af2e8ba55d4d6691c136b42f2423ab9598ec Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 14:15:00 -0400 Subject: fuse doesn't need BKL in ->umount_begin() Signed-off-by: Al Viro diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f0df55a..d8673cc 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -19,7 +19,6 @@ #include #include #include -#include MODULE_AUTHOR("Miklos Szeredi "); MODULE_DESCRIPTION("Filesystem in Userspace"); @@ -260,9 +259,7 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, static void fuse_umount_begin(struct super_block *sb) { - lock_kernel(); fuse_abort_conn(get_fuse_conn_super(sb)); - unlock_kernel(); } static void fuse_send_destroy(struct fuse_conn *fc) -- cgit v0.10.2 From ee450f796f6c4f3a563c914cb93ccfa91a1f7580 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 14:17:21 -0400 Subject: 9P doesn't need BKL in ->umount_begin() Signed-off-by: Al Viro diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index ab5547f..38d695d 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include @@ -231,10 +230,8 @@ v9fs_umount_begin(struct super_block *sb) { struct v9fs_session_info *v9ses; - lock_kernel(); v9ses = sb->s_fs_info; v9fs_session_cancel(v9ses); - unlock_kernel(); } static const struct super_operations v9fs_super_ops = { -- cgit v0.10.2 From 608ba50bd0225d95469154feba8f00a6457848c1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 14:52:13 -0400 Subject: Cleanup of adfs headers Signed-off-by: Al Viro diff --git a/fs/adfs/adfs.h b/fs/adfs/adfs.h index a6665f3..9cc1877 100644 --- a/fs/adfs/adfs.h +++ b/fs/adfs/adfs.h @@ -1,3 +1,6 @@ +#include +#include + /* Internal data structures for ADFS */ #define ADFS_FREE_FRAG 0 @@ -17,6 +20,58 @@ struct buffer_head; /* + * adfs file system inode data in memory + */ +struct adfs_inode_info { + loff_t mmu_private; + unsigned long parent_id; /* object id of parent */ + __u32 loadaddr; /* RISC OS load address */ + __u32 execaddr; /* RISC OS exec address */ + unsigned int filetype; /* RISC OS file type */ + unsigned int attr; /* RISC OS permissions */ + unsigned int stamped:1; /* RISC OS file has date/time */ + struct inode vfs_inode; +}; + +/* + * Forward-declare this + */ +struct adfs_discmap; +struct adfs_dir_ops; + +/* + * ADFS file system superblock data in memory + */ +struct adfs_sb_info { + struct adfs_discmap *s_map; /* bh list containing map */ + struct adfs_dir_ops *s_dir; /* directory operations */ + + uid_t s_uid; /* owner uid */ + gid_t s_gid; /* owner gid */ + umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ + umode_t s_other_mask; /* ADFS other perm -> unix perm */ + + __u32 s_ids_per_zone; /* max. no ids in one zone */ + __u32 s_idlen; /* length of ID in map */ + __u32 s_map_size; /* sector size of a map */ + unsigned long s_size; /* total size (in blocks) of this fs */ + signed int s_map2blk; /* shift left by this for map->sector */ + unsigned int s_log2sharesize;/* log2 share size */ + __le32 s_version; /* disc format version */ + unsigned int s_namelen; /* maximum number of characters in name */ +}; + +static inline struct adfs_sb_info *ADFS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct adfs_inode_info *ADFS_I(struct inode *inode) +{ + return container_of(inode, struct adfs_inode_info, vfs_inode); +} + +/* * Directory handling */ struct adfs_dir { diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 4d40734..23aa52f 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -9,15 +9,7 @@ * * Common directory handling for ADFS */ -#include -#include -#include -#include -#include -#include #include -#include /* for file_fsync() */ - #include "adfs.h" /* diff --git a/fs/adfs/dir_f.c b/fs/adfs/dir_f.c index 31df6ad..bafc712 100644 --- a/fs/adfs/dir_f.c +++ b/fs/adfs/dir_f.c @@ -9,15 +9,7 @@ * * E and F format directory handling */ -#include -#include -#include -#include -#include -#include #include -#include - #include "adfs.h" #include "dir_f.h" diff --git a/fs/adfs/dir_fplus.c b/fs/adfs/dir_fplus.c index 139e0f3..1796bb35 100644 --- a/fs/adfs/dir_fplus.c +++ b/fs/adfs/dir_fplus.c @@ -7,15 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include -#include -#include -#include -#include -#include #include -#include - #include "adfs.h" #include "dir_fplus.h" diff --git a/fs/adfs/file.c b/fs/adfs/file.c index 8224d54..005ea34 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -19,10 +19,6 @@ * * adfs regular file handling primitives */ -#include -#include /* for file_fsync() */ -#include - #include "adfs.h" const struct file_operations adfs_file_operations = { diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 05b3a67..798cb07 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -7,17 +7,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include -#include -#include -#include -#include -#include -#include #include -#include #include - #include "adfs.h" /* @@ -395,4 +386,3 @@ int adfs_write_inode(struct inode *inode, int wait) unlock_kernel(); return ret; } -MODULE_LICENSE("GPL"); diff --git a/fs/adfs/map.c b/fs/adfs/map.c index 568081b..d1a5932 100644 --- a/fs/adfs/map.c +++ b/fs/adfs/map.c @@ -7,14 +7,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include -#include -#include -#include #include - #include - #include "adfs.h" /* diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 0ec5aaf..aad92f0 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -8,26 +8,12 @@ * published by the Free Software Foundation. */ #include -#include -#include -#include -#include -#include -#include -#include #include #include -#include #include -#include #include #include - -#include -#include - -#include - +#include #include "adfs.h" #include "dir_f.h" #include "dir_fplus.h" @@ -534,3 +520,4 @@ static void __exit exit_adfs_fs(void) module_init(init_adfs_fs) module_exit(exit_adfs_fs) +MODULE_LICENSE("GPL"); diff --git a/include/linux/adfs_fs.h b/include/linux/adfs_fs.h index ef788c2..b19801f 100644 --- a/include/linux/adfs_fs.h +++ b/include/linux/adfs_fs.h @@ -41,8 +41,6 @@ struct adfs_discrecord { #define ADFS_DR_SIZE_BITS (ADFS_DR_SIZE << 3) #ifdef __KERNEL__ -#include -#include /* * Calculate the boot block checksum on an ADFS drive. Note that this will * appear to be correct if the sector contains all zeros, so also check that @@ -60,17 +58,6 @@ static inline int adfs_checkbblk(unsigned char *ptr) return (result & 0xff) != ptr[511]; } - -static inline struct adfs_sb_info *ADFS_SB(struct super_block *sb) -{ - return sb->s_fs_info; -} - -static inline struct adfs_inode_info *ADFS_I(struct inode *inode) -{ - return container_of(inode, struct adfs_inode_info, vfs_inode); -} - #endif #endif diff --git a/include/linux/adfs_fs_i.h b/include/linux/adfs_fs_i.h deleted file mode 100644 index cb54303..0000000 --- a/include/linux/adfs_fs_i.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * linux/include/linux/adfs_fs_i.h - * - * Copyright (C) 1997 Russell King - */ - -#ifndef _ADFS_FS_I -#define _ADFS_FS_I - -/* - * adfs file system inode data in memory - */ -struct adfs_inode_info { - loff_t mmu_private; - unsigned long parent_id; /* object id of parent */ - __u32 loadaddr; /* RISC OS load address */ - __u32 execaddr; /* RISC OS exec address */ - unsigned int filetype; /* RISC OS file type */ - unsigned int attr; /* RISC OS permissions */ - unsigned int stamped:1; /* RISC OS file has date/time */ - struct inode vfs_inode; -}; - -#endif diff --git a/include/linux/adfs_fs_sb.h b/include/linux/adfs_fs_sb.h deleted file mode 100644 index d9bf05c..0000000 --- a/include/linux/adfs_fs_sb.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * linux/include/linux/adfs_fs_sb.h - * - * Copyright (C) 1997-1999 Russell King - */ - -#ifndef _ADFS_FS_SB -#define _ADFS_FS_SB - -/* - * Forward-declare this - */ -struct adfs_discmap; -struct adfs_dir_ops; - -/* - * ADFS file system superblock data in memory - */ -struct adfs_sb_info { - struct adfs_discmap *s_map; /* bh list containing map */ - struct adfs_dir_ops *s_dir; /* directory operations */ - - uid_t s_uid; /* owner uid */ - gid_t s_gid; /* owner gid */ - umode_t s_owner_mask; /* ADFS owner perm -> unix perm */ - umode_t s_other_mask; /* ADFS other perm -> unix perm */ - - __u32 s_ids_per_zone; /* max. no ids in one zone */ - __u32 s_idlen; /* length of ID in map */ - __u32 s_map_size; /* sector size of a map */ - unsigned long s_size; /* total size (in blocks) of this fs */ - signed int s_map2blk; /* shift left by this for map->sector */ - unsigned int s_log2sharesize;/* log2 share size */ - __le32 s_version; /* disc format version */ - unsigned int s_namelen; /* maximum number of characters in name */ -}; - -#endif -- cgit v0.10.2 From 536c94901eb8f2eb6fccf81ae6be814899a9f6e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 23:24:50 -0400 Subject: befs ->pust_super() doesn't need BKL Signed-off-by: Al Viro diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 9367b62..02c0613 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -737,8 +737,6 @@ parse_options(char *options, befs_mount_options * opts) static void befs_put_super(struct super_block *sb) { - lock_kernel(); - kfree(BEFS_SB(sb)->mount_opts.iocharset); BEFS_SB(sb)->mount_opts.iocharset = NULL; @@ -749,8 +747,6 @@ befs_put_super(struct super_block *sb) kfree(sb->s_fs_info); sb->s_fs_info = NULL; - - unlock_kernel(); } /* Allocate private field of the superblock, fill it. -- cgit v0.10.2 From e7ec952f6aa6ac1649ac49eb5e4de5b92c829d1e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 23:35:46 -0400 Subject: get rid of BKL in fs/efs Only readdir() really needed it, and that's easily fixable by switch to generic_file_llseek() Signed-off-by: Al Viro diff --git a/fs/efs/dir.c b/fs/efs/dir.c index 49308a2..7ee6f7e3 100644 --- a/fs/efs/dir.c +++ b/fs/efs/dir.c @@ -5,12 +5,12 @@ */ #include -#include #include "efs.h" static int efs_readdir(struct file *, void *, filldir_t); const struct file_operations efs_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = efs_readdir, }; @@ -33,8 +33,6 @@ static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { if (inode->i_size & (EFS_DIRBSIZE-1)) printk(KERN_WARNING "EFS: WARNING: readdir(): directory size not a multiple of EFS_DIRBSIZE\n"); - lock_kernel(); - /* work out where this entry can be found */ block = filp->f_pos >> EFS_DIRBSIZE_BITS; @@ -107,7 +105,6 @@ static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { filp->f_pos = (block << EFS_DIRBSIZE_BITS) | slot; out: - unlock_kernel(); return 0; } diff --git a/fs/efs/namei.c b/fs/efs/namei.c index c3fb5f9..1511bf9 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -8,7 +8,6 @@ #include #include -#include #include #include "efs.h" @@ -63,16 +62,12 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei efs_ino_t inodenum; struct inode * inode = NULL; - lock_kernel(); inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len); if (inodenum) { inode = efs_iget(dir->i_sb, inodenum); - if (IS_ERR(inode)) { - unlock_kernel(); + if (IS_ERR(inode)) return ERR_CAST(inode); - } } - unlock_kernel(); return d_splice_alias(inode, dentry); } @@ -115,11 +110,9 @@ struct dentry *efs_get_parent(struct dentry *child) struct dentry *parent = ERR_PTR(-ENOENT); efs_ino_t ino; - lock_kernel(); ino = efs_find_entry(child->d_inode, "..", 2); if (ino) parent = d_obtain_alias(efs_iget(child->d_inode->i_sb, ino)); - unlock_kernel(); return parent; } diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c index 41911ec..75117d0 100644 --- a/fs/efs/symlink.c +++ b/fs/efs/symlink.c @@ -9,7 +9,6 @@ #include #include #include -#include #include "efs.h" static int efs_symlink_readpage(struct file *file, struct page *page) @@ -22,9 +21,8 @@ static int efs_symlink_readpage(struct file *file, struct page *page) err = -ENAMETOOLONG; if (size > 2 * EFS_BLOCKSIZE) - goto fail_notlocked; + goto fail; - lock_kernel(); /* read first 512 bytes of link target */ err = -EIO; bh = sb_bread(inode->i_sb, efs_bmap(inode, 0)); @@ -40,14 +38,11 @@ static int efs_symlink_readpage(struct file *file, struct page *page) brelse(bh); } link[size] = '\0'; - unlock_kernel(); SetPageUptodate(page); kunmap(page); unlock_page(page); return 0; fail: - unlock_kernel(); -fail_notlocked: SetPageError(page); kunmap(page); unlock_page(page); -- cgit v0.10.2 From cc46759a8c0ac4c6f13aa4b0f470305c05f600e1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 23:47:45 -0400 Subject: get rid of BKL in fs/minix Signed-off-by: Al Viro diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c index 3aebe32..6ac693f 100644 --- a/fs/minix/bitmap.c +++ b/fs/minix/bitmap.c @@ -12,13 +12,14 @@ /* bitmap.c contains the code that handles the inode and block bitmaps */ #include "minix.h" -#include #include #include #include static const int nibblemap[] = { 4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0 }; +static DEFINE_SPINLOCK(bitmap_lock); + static unsigned long count_free(struct buffer_head *map[], unsigned numblocks, __u32 numbits) { unsigned i, j, sum = 0; @@ -69,11 +70,11 @@ void minix_free_block(struct inode *inode, unsigned long block) return; } bh = sbi->s_zmap[zone]; - lock_kernel(); + spin_lock(&bitmap_lock); if (!minix_test_and_clear_bit(bit, bh->b_data)) printk("minix_free_block (%s:%lu): bit already cleared\n", sb->s_id, block); - unlock_kernel(); + spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); return; } @@ -88,18 +89,18 @@ int minix_new_block(struct inode * inode) struct buffer_head *bh = sbi->s_zmap[i]; int j; - lock_kernel(); + spin_lock(&bitmap_lock); j = minix_find_first_zero_bit(bh->b_data, bits_per_zone); if (j < bits_per_zone) { minix_set_bit(j, bh->b_data); - unlock_kernel(); + spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); j += i * bits_per_zone + sbi->s_firstdatazone-1; if (j < sbi->s_firstdatazone || j >= sbi->s_nzones) break; return j; } - unlock_kernel(); + spin_unlock(&bitmap_lock); } return 0; } @@ -211,10 +212,10 @@ void minix_free_inode(struct inode * inode) minix_clear_inode(inode); /* clear on-disk copy */ bh = sbi->s_imap[ino]; - lock_kernel(); + spin_lock(&bitmap_lock); if (!minix_test_and_clear_bit(bit, bh->b_data)) printk("minix_free_inode: bit %lu already cleared\n", bit); - unlock_kernel(); + spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); out: clear_inode(inode); /* clear in-memory copy */ @@ -237,7 +238,7 @@ struct inode * minix_new_inode(const struct inode * dir, int * error) j = bits_per_zone; bh = NULL; *error = -ENOSPC; - lock_kernel(); + spin_lock(&bitmap_lock); for (i = 0; i < sbi->s_imap_blocks; i++) { bh = sbi->s_imap[i]; j = minix_find_first_zero_bit(bh->b_data, bits_per_zone); @@ -245,17 +246,17 @@ struct inode * minix_new_inode(const struct inode * dir, int * error) break; } if (!bh || j >= bits_per_zone) { - unlock_kernel(); + spin_unlock(&bitmap_lock); iput(inode); return NULL; } if (minix_test_and_set_bit(j, bh->b_data)) { /* shouldn't happen */ - unlock_kernel(); + spin_unlock(&bitmap_lock); printk("minix_new_inode: bit already set\n"); iput(inode); return NULL; } - unlock_kernel(); + spin_unlock(&bitmap_lock); mark_buffer_dirty(bh); j += i * bits_per_zone; if (!j || j > sbi->s_ninodes) { diff --git a/fs/minix/dir.c b/fs/minix/dir.c index e5f2064..d407e7a 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -11,7 +11,6 @@ #include "minix.h" #include #include -#include #include typedef struct minix_dir_entry minix_dirent; @@ -20,6 +19,7 @@ typedef struct minix3_dir_entry minix3_dirent; static int minix_readdir(struct file *, void *, filldir_t); const struct file_operations minix_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = minix_readdir, .fsync = simple_fsync, @@ -102,8 +102,6 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) char *name; __u32 inumber; - lock_kernel(); - pos = (pos + chunk_size-1) & ~(chunk_size-1); if (pos >= inode->i_size) goto done; @@ -146,7 +144,6 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) done: filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset; - unlock_kernel(); return 0; } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index f91a236..74ea82d 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -35,8 +35,6 @@ static void minix_put_super(struct super_block *sb) int i; struct minix_sb_info *sbi = minix_sb(sb); - lock_kernel(); - if (!(sb->s_flags & MS_RDONLY)) { if (sbi->s_version != MINIX_V3) /* s_state is now out from V3 sb */ sbi->s_ms->s_state = sbi->s_mount_state; @@ -50,8 +48,6 @@ static void minix_put_super(struct super_block *sb) kfree(sbi->s_imap); sb->s_fs_info = NULL; kfree(sbi); - - unlock_kernel(); } static struct kmem_cache * minix_inode_cachep; -- cgit v0.10.2 From 5ac3455a843d2ca77333c954eea83aa4514c8199 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 16 Jun 2009 23:59:37 -0400 Subject: get rid of BKL in fs/sysv Signed-off-by: Al Viro diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index c779807..4e50286 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -15,13 +15,13 @@ #include #include -#include #include #include "sysv.h" static int sysv_readdir(struct file *, void *, filldir_t); const struct file_operations sysv_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = sysv_readdir, .fsync = simple_fsync, @@ -74,8 +74,6 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) unsigned long n = pos >> PAGE_CACHE_SHIFT; unsigned long npages = dir_pages(inode); - lock_kernel(); - pos = (pos + SYSV_DIRSIZE-1) & ~(SYSV_DIRSIZE-1); if (pos >= inode->i_size) goto done; @@ -113,7 +111,6 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) done: filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset; - unlock_kernel(); return 0; } diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 4799234..9824743 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -21,7 +21,6 @@ * the superblock. */ -#include #include #include #include @@ -37,7 +36,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait) unsigned long time = get_seconds(), old_time; lock_super(sb); - lock_kernel(); /* * If we are going to write out the super block, @@ -52,7 +50,6 @@ static int sysv_sync_fs(struct super_block *sb, int wait) mark_buffer_dirty(sbi->s_bh2); } - unlock_kernel(); unlock_super(sb); return 0; @@ -82,8 +79,6 @@ static void sysv_put_super(struct super_block *sb) { struct sysv_sb_info *sbi = SYSV_SB(sb); - lock_kernel(); - if (sb->s_dirt) sysv_write_super(sb); @@ -99,8 +94,6 @@ static void sysv_put_super(struct super_block *sb) brelse(sbi->s_bh2); kfree(sbi); - - unlock_kernel(); } static int sysv_statfs(struct dentry *dentry, struct kstatfs *buf) @@ -275,7 +268,6 @@ int sysv_write_inode(struct inode *inode, int wait) return -EIO; } - lock_kernel(); raw_inode->i_mode = cpu_to_fs16(sbi, inode->i_mode); raw_inode->i_uid = cpu_to_fs16(sbi, fs_high2lowuid(inode->i_uid)); raw_inode->i_gid = cpu_to_fs16(sbi, fs_high2lowgid(inode->i_gid)); @@ -291,7 +283,6 @@ int sysv_write_inode(struct inode *inode, int wait) for (block = 0; block < 10+1+1+1; block++) write3byte(sbi, (u8 *)&si->i_data[block], &raw_inode->i_data[3*block]); - unlock_kernel(); mark_buffer_dirty(bh); if (wait) { sync_dirty_buffer(bh); @@ -315,9 +306,7 @@ static void sysv_delete_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); inode->i_size = 0; sysv_truncate(inode); - lock_kernel(); sysv_free_inode(inode); - unlock_kernel(); } static struct kmem_cache *sysv_inode_cachep; -- cgit v0.10.2