From 4688a066ecf60086ea82f68edb3b036b567d2c08 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 22 Jan 2011 20:05:05 +0100 Subject: adfs: remove the big kernel lock According to Russell King, adfs was written to not require the big kernel lock, and all inode updates are done under adfs_dir_lock. All other metadata in adfs is read-only and does not require locking. The use of the BKL is the result of various pushdowns from the VFS operations. Signed-off-by: Arnd Bergmann Acked-by: Russell King Cc: Stuart Swales diff --git a/fs/adfs/Kconfig b/fs/adfs/Kconfig index 1dd5f34..e55182a 100644 --- a/fs/adfs/Kconfig +++ b/fs/adfs/Kconfig @@ -1,7 +1,6 @@ config ADFS_FS tristate "ADFS file system support (EXPERIMENTAL)" depends on BLOCK && EXPERIMENTAL - depends on BKL # need to fix help The Acorn Disc Filing System is the standard file system of the RiscOS operating system which runs on Acorn's ARM-based Risc PC diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 3b4a764..3d83075a 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -9,7 +9,6 @@ * * Common directory handling for ADFS */ -#include #include "adfs.h" /* @@ -27,8 +26,6 @@ adfs_readdir(struct file *filp, void *dirent, filldir_t filldir) struct adfs_dir dir; int ret = 0; - lock_kernel(); - if (filp->f_pos >> 32) goto out; @@ -70,7 +67,6 @@ free_out: ops->free(&dir); out: - unlock_kernel(); return ret; } @@ -276,7 +272,6 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) struct object_info obj; int error; - lock_kernel(); error = adfs_dir_lookup_byname(dir, &dentry->d_name, &obj); if (error == 0) { error = -EACCES; @@ -288,7 +283,6 @@ adfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (inode) error = 0; } - unlock_kernel(); d_add(dentry, inode); return ERR_PTR(error); } diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 65794b8..09fe401 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -7,7 +7,6 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include #include #include #include "adfs.h" @@ -316,8 +315,6 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr) unsigned int ia_valid = attr->ia_valid; int error; - lock_kernel(); - error = inode_change_ok(inode, attr); /* @@ -359,7 +356,6 @@ adfs_notify_change(struct dentry *dentry, struct iattr *attr) if (ia_valid & (ATTR_SIZE | ATTR_MTIME | ATTR_MODE)) mark_inode_dirty(inode); out: - unlock_kernel(); return error; } @@ -374,7 +370,6 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc) struct object_info obj; int ret; - lock_kernel(); obj.file_id = inode->i_ino; obj.name_len = 0; obj.parent_id = ADFS_I(inode)->parent_id; @@ -384,6 +379,5 @@ int adfs_write_inode(struct inode *inode, struct writeback_control *wbc) obj.size = inode->i_size; ret = adfs_dir_update(sb, &obj, wbc->sync_mode == WB_SYNC_ALL); - unlock_kernel(); return ret; } diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 2d79540..06d7388 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include "adfs.h" #include "dir_f.h" @@ -120,15 +119,11 @@ static void adfs_put_super(struct super_block *sb) int i; struct adfs_sb_info *asb = ADFS_SB(sb); - lock_kernel(); - for (i = 0; i < asb->s_map_size; i++) brelse(asb->s_map[i].dm_bh); kfree(asb->s_map); kfree(asb); sb->s_fs_info = NULL; - - unlock_kernel(); } static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) @@ -359,15 +354,11 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) struct adfs_sb_info *asb; struct inode *root; - lock_kernel(); - sb->s_flags |= MS_NODIRATIME; asb = kzalloc(sizeof(*asb), GFP_KERNEL); - if (!asb) { - unlock_kernel(); + if (!asb) return -ENOMEM; - } sb->s_fs_info = asb; /* set default options */ @@ -485,7 +476,6 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) adfs_error(sb, "get root inode failed\n"); goto error; } - unlock_kernel(); return 0; error_free_bh: @@ -493,7 +483,6 @@ error_free_bh: error: sb->s_fs_info = NULL; kfree(asb); - unlock_kernel(); return -EINVAL; } -- cgit v0.10.2 From f51b452bed4ae5c20e1f8a790e4ed8663d909a40 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Jan 2011 21:54:50 +0100 Subject: tracing: don't trace the BKL No reason to trace it when the last user is gone. Signed-off-by: Arnd Bergmann Acked-by: Frederic Weisbecker diff --git a/include/trace/events/bkl.h b/include/trace/events/bkl.h deleted file mode 100644 index 1af72dc..0000000 --- a/include/trace/events/bkl.h +++ /dev/null @@ -1,61 +0,0 @@ -#undef TRACE_SYSTEM -#define TRACE_SYSTEM bkl - -#if !defined(_TRACE_BKL_H) || defined(TRACE_HEADER_MULTI_READ) -#define _TRACE_BKL_H - -#include - -TRACE_EVENT(lock_kernel, - - TP_PROTO(const char *func, const char *file, int line), - - TP_ARGS(func, file, line), - - TP_STRUCT__entry( - __field( int, depth ) - __field_ext( const char *, func, FILTER_PTR_STRING ) - __field_ext( const char *, file, FILTER_PTR_STRING ) - __field( int, line ) - ), - - TP_fast_assign( - /* We want to record the lock_depth after lock is acquired */ - __entry->depth = current->lock_depth + 1; - __entry->func = func; - __entry->file = file; - __entry->line = line; - ), - - TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, - __entry->file, __entry->line, __entry->func) -); - -TRACE_EVENT(unlock_kernel, - - TP_PROTO(const char *func, const char *file, int line), - - TP_ARGS(func, file, line), - - TP_STRUCT__entry( - __field(int, depth ) - __field(const char *, func ) - __field(const char *, file ) - __field(int, line ) - ), - - TP_fast_assign( - __entry->depth = current->lock_depth; - __entry->func = func; - __entry->file = file; - __entry->line = line; - ), - - TP_printk("depth=%d file:line=%s:%d func=%s()", __entry->depth, - __entry->file, __entry->line, __entry->func) -); - -#endif /* _TRACE_BKL_H */ - -/* This part must be outside protection */ -#include diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c index b135d04a..d80e122 100644 --- a/lib/kernel_lock.c +++ b/lib/kernel_lock.c @@ -10,9 +10,6 @@ #include #include -#define CREATE_TRACE_POINTS -#include - /* * The 'big kernel lock' * @@ -120,8 +117,6 @@ void __lockfunc _lock_kernel(const char *func, const char *file, int line) { int depth = current->lock_depth + 1; - trace_lock_kernel(func, file, line); - if (likely(!depth)) { might_sleep(); __lock_kernel(); @@ -134,8 +129,6 @@ void __lockfunc _unlock_kernel(const char *func, const char *file, int line) BUG_ON(current->lock_depth < 0); if (likely(--current->lock_depth < 0)) __unlock_kernel(); - - trace_unlock_kernel(func, file, line); } EXPORT_SYMBOL(_lock_kernel); -- cgit v0.10.2 From 5edc341313a188d94cde7ef87ac31647cea8601a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Jan 2011 22:08:05 +0100 Subject: drivers: remove extraneous includes of smp_lock.h These were missed the last time I cleaned this up globally, because of code moving around or new code getting merged. Signed-off-by: Arnd Bergmann diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c index 53fa96a..8fe3a45 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fp.c +++ b/drivers/scsi/megaraid/megaraid_sas_fp.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index c1e09d5..d6e2a66 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/staging/easycap/easycap_ioctl.c b/drivers/staging/easycap/easycap_ioctl.c index 447953a..c1a0c9c 100644 --- a/drivers/staging/easycap/easycap_ioctl.c +++ b/drivers/staging/easycap/easycap_ioctl.c @@ -25,7 +25,6 @@ */ /*****************************************************************************/ -#include #include "easycap.h" #include "easycap_debug.h" #include "easycap_standard.h" diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 5da051a..350ed40 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 2628564..a3c695a 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 0aaca88..190ca8a 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c index 4bbe820..73f7d6d 100644 --- a/drivers/target/target_core_hba.c +++ b/drivers/target/target_core_hba.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 67f0c09..3df570d 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c index f2a0847..5a9d2ba 100644 --- a/drivers/target/target_core_pscsi.c +++ b/drivers/target/target_core_pscsi.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index 979aebf..8dc6d74 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c index c26f674..5ec745f 100644 --- a/drivers/target/target_core_tpg.c +++ b/drivers/target/target_core_tpg.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 236e22d..121445f 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 52fc0c9..cea5603 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -97,7 +97,6 @@ #include #include #include -#include #include /* used in new tty drivers */ #include /* used in new tty drivers */ #include diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c index 88dda0c..5c6c314 100644 --- a/drivers/tty/n_r3964.c +++ b/drivers/tty/n_r3964.c @@ -57,7 +57,6 @@ #include #include #include -#include #include #include #include /* used in new tty drivers */ diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index 923a485..2310cb7 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 0065da4..cf73d5b 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -90,7 +90,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c index 4214d58..70808ad 100644 --- a/drivers/tty/tty_ldisc.c +++ b/drivers/tty/tty_ldisc.c @@ -34,8 +34,6 @@ #include #include -#include /* For the moment */ - #include #include diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index c956ed6..adf0ad2 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -26,7 +26,6 @@ #include #include #include -#include /* Don't take this from : 011-015 on the screen aren't spaces */ #define isspace(c) ((c) == ' ') diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c index a672ed1..95e05df 100644 --- a/drivers/tty/vt/vc_screen.c +++ b/drivers/tty/vt/vc_screen.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 147ede34..2178efc 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -89,7 +89,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 1235ebd..fd4c937 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include -- cgit v0.10.2 From 9a311b96c3065f362e3348cb5d7af1a57ca6bff9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 22 Jan 2011 20:26:12 +0100 Subject: hpfs: remove the BKL This removes the BKL in hpfs in a rather awful way, by making the code only work on uniprocessor systems without kernel preemption, as suggested by Andi Kleen. The HPFS code probably has close to zero remaining users on current kernels, all archeological uses of the file system can probably be done with the significant restrictions. The hpfs_lock/hpfs_unlock functions are left in the code, sincen Mikulas has indicated that he is still interested in fixing it in a better way. Signed-off-by: Arnd Bergmann Acked-by: Andi Kleen Cc: Mikulas Patocka Cc: linux-fsdevel@vger.kernel.org diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig index 63b6f56..0c39dc3 100644 --- a/fs/hpfs/Kconfig +++ b/fs/hpfs/Kconfig @@ -1,7 +1,7 @@ config HPFS_FS tristate "OS/2 HPFS file system support" depends on BLOCK - depends on BKL # nontrivial to fix + depends on BROKEN || !PREEMPT help OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS is the file system used for organizing files on OS/2 hard disk diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index d32f63a..b3d7c0d 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -6,16 +6,15 @@ * directory VFS functions */ -#include #include #include "hpfs_fn.h" static int hpfs_dir_release(struct inode *inode, struct file *filp) { - lock_kernel(); + hpfs_lock(inode->i_sb); hpfs_del_pos(inode, &filp->f_pos); /*hpfs_write_if_changed(inode);*/ - unlock_kernel(); + hpfs_unlock(inode->i_sb); return 0; } @@ -30,7 +29,7 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence) struct hpfs_inode_info *hpfs_inode = hpfs_i(i); struct super_block *s = i->i_sb; - lock_kernel(); + hpfs_lock(s); /*printk("dir lseek\n");*/ if (new_off == 0 || new_off == 1 || new_off == 11 || new_off == 12 || new_off == 13) goto ok; @@ -43,12 +42,12 @@ static loff_t hpfs_dir_lseek(struct file *filp, loff_t off, int whence) } mutex_unlock(&i->i_mutex); ok: - unlock_kernel(); + hpfs_unlock(s); return filp->f_pos = new_off; fail: mutex_unlock(&i->i_mutex); /*printk("illegal lseek: %016llx\n", new_off);*/ - unlock_kernel(); + hpfs_unlock(s); return -ESPIPE; } @@ -64,7 +63,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) int c1, c2 = 0; int ret = 0; - lock_kernel(); + hpfs_lock(inode->i_sb); if (hpfs_sb(inode->i_sb)->sb_chk) { if (hpfs_chk_sectors(inode->i_sb, inode->i_ino, 1, "dir_fnode")) { @@ -167,7 +166,7 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) hpfs_brelse4(&qbh); } out: - unlock_kernel(); + hpfs_unlock(inode->i_sb); return ret; } @@ -197,10 +196,10 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name struct inode *result = NULL; struct hpfs_inode_info *hpfs_result; - lock_kernel(); + hpfs_lock(dir->i_sb); if ((err = hpfs_chk_name(name, &len))) { if (err == -ENAMETOOLONG) { - unlock_kernel(); + hpfs_unlock(dir->i_sb); return ERR_PTR(-ENAMETOOLONG); } goto end_add; @@ -298,7 +297,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name end: end_add: - unlock_kernel(); + hpfs_unlock(dir->i_sb); d_add(dentry, result); return NULL; @@ -311,7 +310,7 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, struct name /*bail:*/ - unlock_kernel(); + hpfs_unlock(dir->i_sb); return ERR_PTR(-ENOENT); } diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index c034088..2dbae20 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -6,16 +6,15 @@ * file VFS functions */ -#include #include "hpfs_fn.h" #define BLOCKS(size) (((size) + 511) >> 9) static int hpfs_file_release(struct inode *inode, struct file *file) { - lock_kernel(); + hpfs_lock(inode->i_sb); hpfs_write_if_changed(inode); - unlock_kernel(); + hpfs_unlock(inode->i_sb); return 0; } @@ -49,14 +48,14 @@ static secno hpfs_bmap(struct inode *inode, unsigned file_secno) static void hpfs_truncate(struct inode *i) { if (IS_IMMUTABLE(i)) return /*-EPERM*/; - lock_kernel(); + hpfs_lock(i->i_sb); hpfs_i(i)->i_n_secs = 0; i->i_blocks = 1 + ((i->i_size + 511) >> 9); hpfs_i(i)->mmu_private = i->i_size; hpfs_truncate_btree(i->i_sb, i->i_ino, 1, ((i->i_size + 511) >> 9)); hpfs_write_inode(i); hpfs_i(i)->i_n_secs = 0; - unlock_kernel(); + hpfs_unlock(i->i_sb); } static int hpfs_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 1c43dbe..c15adbc 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -342,3 +342,25 @@ static inline time32_t gmt_to_local(struct super_block *s, time_t t) extern struct timezone sys_tz; return t - sys_tz.tz_minuteswest * 60 - hpfs_sb(s)->sb_timeshift; } + +/* + * Locking: + * + * hpfs_lock() is a leftover from the big kernel lock. + * Right now, these functions are empty and only left + * for documentation purposes. The file system no longer + * works on SMP systems, so the lock is not needed + * any more. + * + * If someone is interested in making it work again, this + * would be the place to start by adding a per-superblock + * mutex and fixing all the bugs and performance issues + * caused by that. + */ +static inline void hpfs_lock(struct super_block *s) +{ +} + +static inline void hpfs_unlock(struct super_block *s) +{ +} diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 1ae35ba..87f1f78 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -6,7 +6,6 @@ * inode VFS functions */ -#include #include #include "hpfs_fn.h" @@ -267,7 +266,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr) struct inode *inode = dentry->d_inode; int error = -EINVAL; - lock_kernel(); + hpfs_lock(inode->i_sb); if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root) goto out_unlock; if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) @@ -290,7 +289,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr) hpfs_write_inode(inode); out_unlock: - unlock_kernel(); + hpfs_unlock(inode->i_sb); return error; } @@ -307,8 +306,8 @@ void hpfs_evict_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); end_writeback(inode); if (!inode->i_nlink) { - lock_kernel(); + hpfs_lock(inode->i_sb); hpfs_remove_fnode(inode->i_sb, inode->i_ino); - unlock_kernel(); + hpfs_unlock(inode->i_sb); } } diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index f4ad9e3..d5f8c8a 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -6,7 +6,6 @@ * adding & removing files & directories */ #include -#include #include "hpfs_fn.h" static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) @@ -25,7 +24,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) struct hpfs_dirent dee; int err; if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err; - lock_kernel(); + hpfs_lock(dir->i_sb); err = -ENOSPC; fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); if (!fnode) @@ -103,7 +102,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) } d_instantiate(dentry, result); mutex_unlock(&hpfs_i(dir)->i_mutex); - unlock_kernel(); + hpfs_unlock(dir->i_sb); return 0; bail3: mutex_unlock(&hpfs_i(dir)->i_mutex); @@ -115,7 +114,7 @@ bail1: brelse(bh); hpfs_free_sectors(dir->i_sb, fno, 1); bail: - unlock_kernel(); + hpfs_unlock(dir->i_sb); return err; } @@ -132,7 +131,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc int err; if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err; - lock_kernel(); + hpfs_lock(dir->i_sb); err = -ENOSPC; fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); if (!fnode) @@ -195,7 +194,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, int mode, struc } d_instantiate(dentry, result); mutex_unlock(&hpfs_i(dir)->i_mutex); - unlock_kernel(); + hpfs_unlock(dir->i_sb); return 0; bail2: @@ -205,7 +204,7 @@ bail1: brelse(bh); hpfs_free_sectors(dir->i_sb, fno, 1); bail: - unlock_kernel(); + hpfs_unlock(dir->i_sb); return err; } @@ -224,7 +223,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t if (hpfs_sb(dir->i_sb)->sb_eas < 2) return -EPERM; if (!new_valid_dev(rdev)) return -EINVAL; - lock_kernel(); + hpfs_lock(dir->i_sb); err = -ENOSPC; fnode = hpfs_alloc_fnode(dir->i_sb, hpfs_i(dir)->i_dno, &fno, &bh); if (!fnode) @@ -274,7 +273,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t d_instantiate(dentry, result); mutex_unlock(&hpfs_i(dir)->i_mutex); brelse(bh); - unlock_kernel(); + hpfs_unlock(dir->i_sb); return 0; bail2: mutex_unlock(&hpfs_i(dir)->i_mutex); @@ -283,7 +282,7 @@ bail1: brelse(bh); hpfs_free_sectors(dir->i_sb, fno, 1); bail: - unlock_kernel(); + hpfs_unlock(dir->i_sb); return err; } @@ -299,9 +298,9 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy struct inode *result; int err; if ((err = hpfs_chk_name(name, &len))) return err==-ENOENT ? -EINVAL : err; - lock_kernel(); + hpfs_lock(dir->i_sb); if (hpfs_sb(dir->i_sb)->sb_eas < 2) { - unlock_kernel(); + hpfs_unlock(dir->i_sb); return -EPERM; } err = -ENOSPC; @@ -354,7 +353,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy hpfs_write_inode_nolock(result); d_instantiate(dentry, result); mutex_unlock(&hpfs_i(dir)->i_mutex); - unlock_kernel(); + hpfs_unlock(dir->i_sb); return 0; bail2: mutex_unlock(&hpfs_i(dir)->i_mutex); @@ -363,7 +362,7 @@ bail1: brelse(bh); hpfs_free_sectors(dir->i_sb, fno, 1); bail: - unlock_kernel(); + hpfs_unlock(dir->i_sb); return err; } @@ -380,7 +379,7 @@ static int hpfs_unlink(struct inode *dir, struct dentry *dentry) int rep = 0; int err; - lock_kernel(); + hpfs_lock(dir->i_sb); hpfs_adjust_length(name, &len); again: mutex_lock(&hpfs_i(inode)->i_parent_mutex); @@ -416,7 +415,7 @@ again: dentry_unhash(dentry); if (!d_unhashed(dentry)) { dput(dentry); - unlock_kernel(); + hpfs_unlock(dir->i_sb); return -ENOSPC; } if (generic_permission(inode, MAY_WRITE, 0, NULL) || @@ -435,7 +434,7 @@ again: if (!err) goto again; } - unlock_kernel(); + hpfs_unlock(dir->i_sb); return -ENOSPC; default: drop_nlink(inode); @@ -448,7 +447,7 @@ out1: out: mutex_unlock(&hpfs_i(dir)->i_mutex); mutex_unlock(&hpfs_i(inode)->i_parent_mutex); - unlock_kernel(); + hpfs_unlock(dir->i_sb); return err; } @@ -466,7 +465,7 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry) int r; hpfs_adjust_length(name, &len); - lock_kernel(); + hpfs_lock(dir->i_sb); mutex_lock(&hpfs_i(inode)->i_parent_mutex); mutex_lock(&hpfs_i(dir)->i_mutex); err = -ENOENT; @@ -508,7 +507,7 @@ out1: out: mutex_unlock(&hpfs_i(dir)->i_mutex); mutex_unlock(&hpfs_i(inode)->i_parent_mutex); - unlock_kernel(); + hpfs_unlock(dir->i_sb); return err; } @@ -521,21 +520,21 @@ static int hpfs_symlink_readpage(struct file *file, struct page *page) int err; err = -EIO; - lock_kernel(); + hpfs_lock(i->i_sb); if (!(fnode = hpfs_map_fnode(i->i_sb, i->i_ino, &bh))) goto fail; err = hpfs_read_ea(i->i_sb, fnode, "SYMLINK", link, PAGE_SIZE); brelse(bh); if (err) goto fail; - unlock_kernel(); + hpfs_unlock(i->i_sb); SetPageUptodate(page); kunmap(page); unlock_page(page); return 0; fail: - unlock_kernel(); + hpfs_unlock(i->i_sb); SetPageError(page); kunmap(page); unlock_page(page); @@ -567,7 +566,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry, err = 0; hpfs_adjust_length(old_name, &old_len); - lock_kernel(); + hpfs_lock(i->i_sb); /* order doesn't matter, due to VFS exclusion */ mutex_lock(&hpfs_i(i)->i_parent_mutex); if (new_inode) @@ -659,7 +658,7 @@ end1: mutex_unlock(&hpfs_i(i)->i_parent_mutex); if (new_inode) mutex_unlock(&hpfs_i(new_inode)->i_parent_mutex); - unlock_kernel(); + hpfs_unlock(i->i_sb); return err; } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index b30426b..c89b408 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include @@ -103,15 +102,11 @@ static void hpfs_put_super(struct super_block *s) { struct hpfs_sb_info *sbi = hpfs_sb(s); - lock_kernel(); - kfree(sbi->sb_cp_table); kfree(sbi->sb_bmp_dir); unmark_dirty(s); s->s_fs_info = NULL; kfree(sbi); - - unlock_kernel(); } unsigned hpfs_count_one_bitmap(struct super_block *s, secno secno) @@ -143,7 +138,7 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf) struct super_block *s = dentry->d_sb; struct hpfs_sb_info *sbi = hpfs_sb(s); u64 id = huge_encode_dev(s->s_bdev->bd_dev); - lock_kernel(); + hpfs_lock(s); /*if (sbi->sb_n_free == -1) {*/ sbi->sb_n_free = count_bitmaps(s); @@ -160,7 +155,7 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_fsid.val[1] = (u32)(id >> 32); buf->f_namelen = 254; - unlock_kernel(); + hpfs_unlock(s); return 0; } @@ -406,7 +401,7 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) *flags |= MS_NOATIME; - lock_kernel(); + hpfs_lock(s); lock_super(s); uid = sbi->sb_uid; gid = sbi->sb_gid; umask = 0777 & ~sbi->sb_mode; @@ -441,12 +436,12 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) replace_mount_options(s, new_opts); unlock_super(s); - unlock_kernel(); + hpfs_unlock(s); return 0; out_err: unlock_super(s); - unlock_kernel(); + hpfs_unlock(s); kfree(new_opts); return -EINVAL; } @@ -484,13 +479,15 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) int o; - lock_kernel(); + if (num_possible_cpus() > 1) { + printk(KERN_ERR "HPFS is not SMP safe\n"); + return -EINVAL; + } save_mount_options(s, options); sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); if (!sbi) { - unlock_kernel(); return -ENOMEM; } s->s_fs_info = sbi; @@ -677,7 +674,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) root->i_blocks = 5; hpfs_brelse4(&qbh); } - unlock_kernel(); return 0; bail4: brelse(bh2); @@ -689,7 +685,6 @@ bail0: kfree(sbi->sb_cp_table); s->s_fs_info = NULL; kfree(sbi); - unlock_kernel(); return -EINVAL; } -- cgit v0.10.2 From 788257d6101d986ac8f2741aaa35974af47f574c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Jan 2011 10:14:12 +0100 Subject: ufs: remove the BKL This introduces a new per-superblock mutex in UFS to replace the big kernel lock. I have been careful to avoid nested calls to lock_ufs and to get the lock order right with respect to other mutexes, in particular lock_super. I did not make any attempt to prove that the big kernel lock is not needed in a particular place in the code, which is very possible. The mutex has a significant performance impact, so it is only used on SMP or PREEMPT configurations. As Nick Piggin noticed, any allocation inside of the lock may end up deadlocking when we get to ufs_getfrag_block in the reclaim task, so we now use GFP_NOFS. Signed-off-by: Arnd Bergmann Tested-by: Nick Bowler Cc: Evgeniy Dushistov Cc: Nick Piggin diff --git a/fs/ufs/Kconfig b/fs/ufs/Kconfig index 30c8f22..e4f10a4 100644 --- a/fs/ufs/Kconfig +++ b/fs/ufs/Kconfig @@ -1,7 +1,6 @@ config UFS_FS tristate "UFS file system support (read only)" depends on BLOCK - depends on BKL # probably fixable help BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD, OpenBSD and NeXTstep) use a file system called UFS. Some System V diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 2b251f2..03c255f 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include @@ -43,7 +42,7 @@ #include "swab.h" #include "util.h" -static u64 ufs_frag_map(struct inode *inode, sector_t frag); +static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock); static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t offsets[4]) { @@ -82,7 +81,7 @@ static int ufs_block_to_path(struct inode *inode, sector_t i_block, sector_t off * the begining of the filesystem. */ -static u64 ufs_frag_map(struct inode *inode, sector_t frag) +static u64 ufs_frag_map(struct inode *inode, sector_t frag, bool needs_lock) { struct ufs_inode_info *ufsi = UFS_I(inode); struct super_block *sb = inode->i_sb; @@ -107,7 +106,8 @@ static u64 ufs_frag_map(struct inode *inode, sector_t frag) p = offsets; - lock_kernel(); + if (needs_lock) + lock_ufs(sb); if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) goto ufs2; @@ -152,7 +152,8 @@ ufs2: ret = temp + (u64) (frag & uspi->s_fpbmask); out: - unlock_kernel(); + if (needs_lock) + unlock_ufs(sb); return ret; } @@ -415,14 +416,16 @@ out: int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create) { struct super_block * sb = inode->i_sb; - struct ufs_sb_private_info * uspi = UFS_SB(sb)->s_uspi; + struct ufs_sb_info * sbi = UFS_SB(sb); + struct ufs_sb_private_info * uspi = sbi->s_uspi; struct buffer_head * bh; int ret, err, new; unsigned long ptr,phys; u64 phys64 = 0; + bool needs_lock = (sbi->mutex_owner != current); if (!create) { - phys64 = ufs_frag_map(inode, fragment); + phys64 = ufs_frag_map(inode, fragment, needs_lock); UFSD("phys64 = %llu\n", (unsigned long long)phys64); if (phys64) map_bh(bh_result, sb, phys64); @@ -436,7 +439,8 @@ int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buffer_head ret = 0; bh = NULL; - lock_kernel(); + if (needs_lock) + lock_ufs(sb); UFSD("ENTER, ino %lu, fragment %llu\n", inode->i_ino, (unsigned long long)fragment); if (fragment > @@ -498,7 +502,9 @@ out: set_buffer_new(bh_result); map_bh(bh_result, sb, phys); abort: - unlock_kernel(); + if (needs_lock) + unlock_ufs(sb); + return err; abort_too_big: @@ -506,48 +512,6 @@ abort_too_big: goto abort; } -static struct buffer_head *ufs_getfrag(struct inode *inode, - unsigned int fragment, - int create, int *err) -{ - struct buffer_head dummy; - int error; - - dummy.b_state = 0; - dummy.b_blocknr = -1000; - error = ufs_getfrag_block(inode, fragment, &dummy, create); - *err = error; - if (!error && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); - if (buffer_new(&dummy)) { - memset(bh->b_data, 0, inode->i_sb->s_blocksize); - set_buffer_uptodate(bh); - mark_buffer_dirty(bh); - } - return bh; - } - return NULL; -} - -struct buffer_head * ufs_bread (struct inode * inode, unsigned fragment, - int create, int * err) -{ - struct buffer_head * bh; - - UFSD("ENTER, ino %lu, fragment %u\n", inode->i_ino, fragment); - bh = ufs_getfrag (inode, fragment, create, err); - if (!bh || buffer_uptodate(bh)) - return bh; - ll_rw_block (READ, 1, &bh); - wait_on_buffer (bh); - if (buffer_uptodate(bh)) - return bh; - brelse (bh); - *err = -EIO; - return NULL; -} - static int ufs_writepage(struct page *page, struct writeback_control *wbc) { return block_write_full_page(page,ufs_getfrag_block,wbc); @@ -900,9 +864,9 @@ static int ufs_update_inode(struct inode * inode, int do_sync) int ufs_write_inode(struct inode *inode, struct writeback_control *wbc) { int ret; - lock_kernel(); + lock_ufs(inode->i_sb); ret = ufs_update_inode(inode, wbc->sync_mode == WB_SYNC_ALL); - unlock_kernel(); + unlock_ufs(inode->i_sb); return ret; } @@ -922,22 +886,22 @@ void ufs_evict_inode(struct inode * inode) if (want_delete) { loff_t old_i_size; /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ - lock_kernel(); + lock_ufs(inode->i_sb); mark_inode_dirty(inode); ufs_update_inode(inode, IS_SYNC(inode)); old_i_size = inode->i_size; inode->i_size = 0; if (inode->i_blocks && ufs_truncate(inode, old_i_size)) ufs_warning(inode->i_sb, __func__, "ufs_truncate failed\n"); - unlock_kernel(); + unlock_ufs(inode->i_sb); } invalidate_inode_buffers(inode); end_writeback(inode); if (want_delete) { - lock_kernel(); + lock_ufs(inode->i_sb); ufs_free_inode (inode); - unlock_kernel(); + unlock_ufs(inode->i_sb); } } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index 12f39b9..205030a 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -29,7 +29,6 @@ #include #include -#include #include "ufs_fs.h" #include "ufs.h" @@ -55,16 +54,16 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, stru if (dentry->d_name.len > UFS_MAXNAMLEN) return ERR_PTR(-ENAMETOOLONG); - lock_kernel(); + lock_ufs(dir->i_sb); ino = ufs_inode_by_name(dir, &dentry->d_name); if (ino) { inode = ufs_iget(dir->i_sb, ino); if (IS_ERR(inode)) { - unlock_kernel(); + unlock_ufs(dir->i_sb); return ERR_CAST(inode); } } - unlock_kernel(); + unlock_ufs(dir->i_sb); d_add(dentry, inode); return NULL; } @@ -93,9 +92,9 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, int mode, inode->i_fop = &ufs_file_operations; inode->i_mapping->a_ops = &ufs_aops; mark_inode_dirty(inode); - lock_kernel(); + lock_ufs(dir->i_sb); err = ufs_add_nondir(dentry, inode); - unlock_kernel(); + unlock_ufs(dir->i_sb); } UFSD("END: err=%d\n", err); return err; @@ -115,9 +114,9 @@ static int ufs_mknod (struct inode * dir, struct dentry *dentry, int mode, dev_t init_special_inode(inode, mode, rdev); ufs_set_inode_dev(inode->i_sb, UFS_I(inode), rdev); mark_inode_dirty(inode); - lock_kernel(); + lock_ufs(dir->i_sb); err = ufs_add_nondir(dentry, inode); - unlock_kernel(); + unlock_ufs(dir->i_sb); } return err; } @@ -133,7 +132,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, if (l > sb->s_blocksize) goto out_notlocked; - lock_kernel(); + lock_ufs(dir->i_sb); inode = ufs_new_inode(dir, S_IFLNK | S_IRWXUGO); err = PTR_ERR(inode); if (IS_ERR(inode)) @@ -156,7 +155,7 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, err = ufs_add_nondir(dentry, inode); out: - unlock_kernel(); + unlock_ufs(dir->i_sb); out_notlocked: return err; @@ -172,9 +171,9 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, struct inode *inode = old_dentry->d_inode; int error; - lock_kernel(); + lock_ufs(dir->i_sb); if (inode->i_nlink >= UFS_LINK_MAX) { - unlock_kernel(); + unlock_ufs(dir->i_sb); return -EMLINK; } @@ -183,7 +182,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, ihold(inode); error = ufs_add_nondir(dentry, inode); - unlock_kernel(); + unlock_ufs(dir->i_sb); return error; } @@ -195,7 +194,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode) if (dir->i_nlink >= UFS_LINK_MAX) goto out; - lock_kernel(); + lock_ufs(dir->i_sb); inode_inc_link_count(dir); inode = ufs_new_inode(dir, S_IFDIR|mode); @@ -216,7 +215,7 @@ static int ufs_mkdir(struct inode * dir, struct dentry * dentry, int mode) err = ufs_add_link(dentry, inode); if (err) goto out_fail; - unlock_kernel(); + unlock_ufs(dir->i_sb); d_instantiate(dentry, inode); out: @@ -228,7 +227,7 @@ out_fail: iput (inode); out_dir: inode_dec_link_count(dir); - unlock_kernel(); + unlock_ufs(dir->i_sb); goto out; } @@ -259,7 +258,7 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) struct inode * inode = dentry->d_inode; int err= -ENOTEMPTY; - lock_kernel(); + lock_ufs(dir->i_sb); if (ufs_empty_dir (inode)) { err = ufs_unlink(dir, dentry); if (!err) { @@ -268,7 +267,7 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry) inode_dec_link_count(dir); } } - unlock_kernel(); + unlock_ufs(dir->i_sb); return err; } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 2c61ac5..7693d62 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -84,7 +84,6 @@ #include #include #include -#include #include #include #include @@ -96,6 +95,26 @@ #include "swab.h" #include "util.h" +void lock_ufs(struct super_block *sb) +{ +#if defined(CONFIG_SMP) || defined (CONFIG_PREEMPT) + struct ufs_sb_info *sbi = UFS_SB(sb); + + mutex_lock(&sbi->mutex); + sbi->mutex_owner = current; +#endif +} + +void unlock_ufs(struct super_block *sb) +{ +#if defined(CONFIG_SMP) || defined (CONFIG_PREEMPT) + struct ufs_sb_info *sbi = UFS_SB(sb); + + sbi->mutex_owner = NULL; + mutex_unlock(&sbi->mutex); +#endif +} + static struct inode *ufs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) { struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; @@ -313,7 +332,6 @@ void ufs_panic (struct super_block * sb, const char * function, struct ufs_super_block_first * usb1; va_list args; - lock_kernel(); uspi = UFS_SB(sb)->s_uspi; usb1 = ubh_get_usb_first(uspi); @@ -521,7 +539,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb) */ size = uspi->s_cssize; blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; - base = space = kmalloc(size, GFP_KERNEL); + base = space = kmalloc(size, GFP_NOFS); if (!base) goto failed; sbi->s_csp = (struct ufs_csum *)space; @@ -546,7 +564,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb) * Read cylinder group (we read only first fragment from block * at this time) and prepare internal data structures for cg caching. */ - if (!(sbi->s_ucg = kmalloc (sizeof(struct buffer_head *) * uspi->s_ncg, GFP_KERNEL))) + if (!(sbi->s_ucg = kmalloc (sizeof(struct buffer_head *) * uspi->s_ncg, GFP_NOFS))) goto failed; for (i = 0; i < uspi->s_ncg; i++) sbi->s_ucg[i] = NULL; @@ -564,7 +582,7 @@ static int ufs_read_cylinder_structures(struct super_block *sb) ufs_print_cylinder_stuff(sb, (struct ufs_cylinder_group *) sbi->s_ucg[i]->b_data); } for (i = 0; i < UFS_MAX_GROUP_LOADED; i++) { - if (!(sbi->s_ucpi[i] = kmalloc (sizeof(struct ufs_cg_private_info), GFP_KERNEL))) + if (!(sbi->s_ucpi[i] = kmalloc (sizeof(struct ufs_cg_private_info), GFP_NOFS))) goto failed; sbi->s_cgno[i] = UFS_CGNO_EMPTY; } @@ -646,8 +664,6 @@ static void ufs_put_super_internal(struct super_block *sb) UFSD("ENTER\n"); - lock_kernel(); - ufs_put_cstotal(sb); size = uspi->s_cssize; blks = (size + uspi->s_fsize - 1) >> uspi->s_fshift; @@ -676,8 +692,6 @@ static void ufs_put_super_internal(struct super_block *sb) kfree (sbi->s_ucg); kfree (base); - unlock_kernel(); - UFSD("EXIT\n"); } @@ -696,8 +710,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) unsigned maxsymlen; int ret = -EINVAL; - lock_kernel(); - uspi = NULL; ubh = NULL; flags = 0; @@ -718,6 +730,7 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) goto failed; } #endif + mutex_init(&sbi->mutex); /* * Set default mount options * Parse mount options @@ -1165,7 +1178,6 @@ magic_found: goto failed; UFSD("EXIT\n"); - unlock_kernel(); return 0; dalloc_failed: @@ -1177,12 +1189,10 @@ failed: kfree(sbi); sb->s_fs_info = NULL; UFSD("EXIT (FAILED)\n"); - unlock_kernel(); return ret; failed_nomem: UFSD("EXIT (NOMEM)\n"); - unlock_kernel(); return -ENOMEM; } @@ -1193,8 +1203,8 @@ static int ufs_sync_fs(struct super_block *sb, int wait) struct ufs_super_block_third * usb3; unsigned flags; + lock_ufs(sb); lock_super(sb); - lock_kernel(); UFSD("ENTER\n"); @@ -1213,8 +1223,8 @@ static int ufs_sync_fs(struct super_block *sb, int wait) sb->s_dirt = 0; UFSD("EXIT\n"); - unlock_kernel(); unlock_super(sb); + unlock_ufs(sb); return 0; } @@ -1256,7 +1266,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) unsigned new_mount_opt, ufstype; unsigned flags; - lock_kernel(); + lock_ufs(sb); lock_super(sb); uspi = UFS_SB(sb)->s_uspi; flags = UFS_SB(sb)->s_flags; @@ -1272,7 +1282,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufs_set_opt (new_mount_opt, ONERROR_LOCK); if (!ufs_parse_options (data, &new_mount_opt)) { unlock_super(sb); - unlock_kernel(); + unlock_ufs(sb); return -EINVAL; } if (!(new_mount_opt & UFS_MOUNT_UFSTYPE)) { @@ -1280,14 +1290,14 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) } else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) { printk("ufstype can't be changed during remount\n"); unlock_super(sb); - unlock_kernel(); + unlock_ufs(sb); return -EINVAL; } if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { UFS_SB(sb)->s_mount_opt = new_mount_opt; unlock_super(sb); - unlock_kernel(); + unlock_ufs(sb); return 0; } @@ -1313,7 +1323,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) printk("ufs was compiled with read-only support, " "can't be mounted as read-write\n"); unlock_super(sb); - unlock_kernel(); + unlock_ufs(sb); return -EINVAL; #else if (ufstype != UFS_MOUNT_UFSTYPE_SUN && @@ -1323,13 +1333,13 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) ufstype != UFS_MOUNT_UFSTYPE_UFS2) { printk("this ufstype is read-only supported\n"); unlock_super(sb); - unlock_kernel(); + unlock_ufs(sb); return -EINVAL; } if (!ufs_read_cylinder_structures(sb)) { printk("failed during remounting\n"); unlock_super(sb); - unlock_kernel(); + unlock_ufs(sb); return -EPERM; } sb->s_flags &= ~MS_RDONLY; @@ -1337,7 +1347,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) } UFS_SB(sb)->s_mount_opt = new_mount_opt; unlock_super(sb); - unlock_kernel(); + unlock_ufs(sb); return 0; } @@ -1371,7 +1381,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) struct ufs_super_block_third *usb3; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); - lock_kernel(); + lock_ufs(sb); usb1 = ubh_get_usb_first(uspi); usb2 = ubh_get_usb_second(uspi); @@ -1395,7 +1405,7 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_fsid.val[0] = (u32)id; buf->f_fsid.val[1] = (u32)(id >> 32); - unlock_kernel(); + unlock_ufs(sb); return 0; } @@ -1405,7 +1415,7 @@ static struct kmem_cache * ufs_inode_cachep; static struct inode *ufs_alloc_inode(struct super_block *sb) { struct ufs_inode_info *ei; - ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_KERNEL); + ei = (struct ufs_inode_info *)kmem_cache_alloc(ufs_inode_cachep, GFP_NOFS); if (!ei) return NULL; ei->vfs_inode.i_version = 1; diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index a58f915..e56a4f5 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -467,7 +466,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size) block_truncate_page(inode->i_mapping, inode->i_size, ufs_getfrag_block); - lock_kernel(); while (1) { retry = ufs_trunc_direct(inode); retry |= ufs_trunc_indirect(inode, UFS_IND_BLOCK, @@ -487,7 +485,6 @@ int ufs_truncate(struct inode *inode, loff_t old_i_size) inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; ufsi->i_lastfrag = DIRECT_FRAGMENT; - unlock_kernel(); mark_inode_dirty(inode); out: UFSD("EXIT: err %d\n", err); @@ -510,7 +507,9 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr) /* XXX(truncate): truncate_setsize should be called last */ truncate_setsize(inode, attr->ia_size); + lock_ufs(inode->i_sb); error = ufs_truncate(inode, old_i_size); + unlock_ufs(inode->i_sb); if (error) return error; } diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h index c08782e..5be2755 100644 --- a/fs/ufs/ufs.h +++ b/fs/ufs/ufs.h @@ -18,6 +18,8 @@ struct ufs_sb_info { unsigned s_cgno[UFS_MAX_GROUP_LOADED]; unsigned short s_cg_loaded; unsigned s_mount_opt; + struct mutex mutex; + struct task_struct *mutex_owner; }; struct ufs_inode_info { @@ -109,7 +111,6 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long); extern int ufs_write_inode (struct inode *, struct writeback_control *); extern int ufs_sync_inode (struct inode *); extern void ufs_evict_inode (struct inode *); -extern struct buffer_head * ufs_bread (struct inode *, unsigned, int, int *); extern int ufs_getfrag_block (struct inode *inode, sector_t fragment, struct buffer_head *bh_result, int create); /* namei.c */ @@ -154,4 +155,7 @@ static inline u32 ufs_dtogd(struct ufs_sb_private_info * uspi, u64 b) return do_div(b, uspi->s_fpg); } +extern void lock_ufs(struct super_block *sb); +extern void unlock_ufs(struct super_block *sb); + #endif /* _UFS_UFS_H */ diff --git a/fs/ufs/util.c b/fs/ufs/util.c index d2c36d5..95425b5 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -27,7 +27,7 @@ struct ufs_buffer_head * _ubh_bread_ (struct ufs_sb_private_info * uspi, if (count > UFS_MAXFRAG) return NULL; ubh = (struct ufs_buffer_head *) - kmalloc (sizeof (struct ufs_buffer_head), GFP_KERNEL); + kmalloc (sizeof (struct ufs_buffer_head), GFP_NOFS); if (!ubh) return NULL; ubh->fragment = fragment; -- cgit v0.10.2 From 77b2283604bdd7053494a97b0e2fee97148206c6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sat, 22 Jan 2011 23:44:59 +0100 Subject: x25: remove the BKL This replaces all instances of lock_kernel in x25 with lock_sock, taking care to release the socket lock around sleeping functions (sock_alloc_send_skb and skb_recv_datagram). It is not clear whether this is a correct solution, but it seem to be what other protocols do in the same situation. Includes a fix suggested by Eric Dumazet. Signed-off-by: Arnd Bergmann Acked-by: David S. Miller Tested-by: Andrew Hendry Cc: linux-x25@vger.kernel.org Cc: netdev@vger.kernel.org Cc: Eric Dumazet diff --git a/net/x25/Kconfig b/net/x25/Kconfig index 2196e55..e6759c9 100644 --- a/net/x25/Kconfig +++ b/net/x25/Kconfig @@ -5,7 +5,6 @@ config X25 tristate "CCITT X.25 Packet Layer (EXPERIMENTAL)" depends on EXPERIMENTAL - depends on BKL # should be fixable ---help--- X.25 is a set of standardized network protocols, similar in scope to frame relay; the one physical line from your box to the X.25 network diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index ad96ee9..4680b1e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -432,15 +431,6 @@ void x25_destroy_socket_from_timer(struct sock *sk) sock_put(sk); } -static void x25_destroy_socket(struct sock *sk) -{ - sock_hold(sk); - lock_sock(sk); - __x25_destroy_socket(sk); - release_sock(sk); - sock_put(sk); -} - /* * Handling for system calls applied via the various interfaces to a * X.25 socket object. @@ -647,18 +637,19 @@ static int x25_release(struct socket *sock) struct sock *sk = sock->sk; struct x25_sock *x25; - lock_kernel(); if (!sk) - goto out; + return 0; x25 = x25_sk(sk); + sock_hold(sk); + lock_sock(sk); switch (x25->state) { case X25_STATE_0: case X25_STATE_2: x25_disconnect(sk, 0, 0, 0); - x25_destroy_socket(sk); + __x25_destroy_socket(sk); goto out; case X25_STATE_1: @@ -678,7 +669,8 @@ static int x25_release(struct socket *sock) sock_orphan(sk); out: - unlock_kernel(); + release_sock(sk); + sock_put(sk); return 0; } @@ -1085,7 +1077,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, size_t size; int qbit = 0, rc = -EINVAL; - lock_kernel(); + lock_sock(sk); if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_OOB|MSG_EOR|MSG_CMSG_COMPAT)) goto out; @@ -1148,7 +1140,9 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, size = len + X25_MAX_L2_LEN + X25_EXT_MIN_LEN; + release_sock(sk); skb = sock_alloc_send_skb(sk, size, noblock, &rc); + lock_sock(sk); if (!skb) goto out; X25_SKB_CB(skb)->flags = msg->msg_flags; @@ -1231,26 +1225,10 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, len++; } - /* - * lock_sock() is currently only used to serialize this x25_kick() - * against input-driven x25_kick() calls. It currently only blocks - * incoming packets for this socket and does not protect against - * any other socket state changes and is not called from anywhere - * else. As x25_kick() cannot block and as long as all socket - * operations are BKL-wrapped, we don't need take to care about - * purging the backlog queue in x25_release(). - * - * Using lock_sock() to protect all socket operations entirely - * (and making the whole x25 stack SMP aware) unfortunately would - * require major changes to {send,recv}msg and skb allocation methods. - * -> 2.5 ;) - */ - lock_sock(sk); x25_kick(sk); - release_sock(sk); rc = len; out: - unlock_kernel(); + release_sock(sk); return rc; out_kfree_skb: kfree_skb(skb); @@ -1271,7 +1249,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, unsigned char *asmptr; int rc = -ENOTCONN; - lock_kernel(); + lock_sock(sk); /* * This works for seqpacket too. The receiver has ordered the queue for * us! We do one quick check first though @@ -1300,8 +1278,10 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_flags |= MSG_OOB; } else { /* Now we can treat all alike */ + release_sock(sk); skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &rc); + lock_sock(sk); if (!skb) goto out; @@ -1338,14 +1318,12 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = sizeof(struct sockaddr_x25); - lock_sock(sk); x25_check_rbuf(sk); - release_sock(sk); rc = copied; out_free_dgram: skb_free_datagram(sk, skb); out: - unlock_kernel(); + release_sock(sk); return rc; } @@ -1581,18 +1559,18 @@ out_cud_release: case SIOCX25CALLACCPTAPPRV: { rc = -EINVAL; - lock_kernel(); + lock_sock(sk); if (sk->sk_state != TCP_CLOSE) break; clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); - unlock_kernel(); + release_sock(sk); rc = 0; break; } case SIOCX25SENDCALLACCPT: { rc = -EINVAL; - lock_kernel(); + lock_sock(sk); if (sk->sk_state != TCP_ESTABLISHED) break; /* must call accptapprv above */ @@ -1600,7 +1578,7 @@ out_cud_release: break; x25_write_internal(sk, X25_CALL_ACCEPTED); x25->state = X25_STATE_3; - unlock_kernel(); + release_sock(sk); rc = 0; break; } diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c index d00649f..0144271 100644 --- a/net/x25/x25_out.c +++ b/net/x25/x25_out.c @@ -68,8 +68,11 @@ int x25_output(struct sock *sk, struct sk_buff *skb) frontlen = skb_headroom(skb); while (skb->len > 0) { - if ((skbn = sock_alloc_send_skb(sk, frontlen + max_len, - noblock, &err)) == NULL){ + release_sock(sk); + skbn = sock_alloc_send_skb(sk, frontlen + max_len, + noblock, &err); + lock_sock(sk); + if (!skbn) { if (err == -EWOULDBLOCK && noblock){ kfree_skb(skb); return sent; -- cgit v0.10.2 From 60d9f461a20ba59219fdcdc30cbf8e3a4ad3f625 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 23 Jan 2011 00:21:11 +0100 Subject: appletalk: remove the BKL This changes appletalk to use lock_sock instead of lock_kernel for serialization. I tried to make sure that we don't hold the socket lock during sleeping functions, but I did not try to prove whether the locks are necessary in the first place. Compile-tested only. Signed-off-by: Arnd Bergmann Acked-by: David S. Miller Cc: Arnaldo Carvalho de Melo Cc: David Miller Cc: netdev@vger.kernel.org diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig index 0b376a9..f5a8916 100644 --- a/drivers/net/appletalk/Kconfig +++ b/drivers/net/appletalk/Kconfig @@ -3,7 +3,6 @@ # config ATALK tristate "Appletalk protocol support" - depends on BKL # waiting to be removed from net/appletalk/ddp.c select LLC ---help--- AppleTalk is the protocol that Apple computers can use to communicate diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index c410b93..3d4f4b0 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -54,7 +54,6 @@ #include #include #include -#include #include /* For TIOCOUTQ/INQ */ #include #include @@ -1052,13 +1051,13 @@ static int atalk_release(struct socket *sock) { struct sock *sk = sock->sk; - lock_kernel(); + lock_sock(sk); if (sk) { sock_orphan(sk); sock->sk = NULL; atalk_destroy_socket(sk); } - unlock_kernel(); + release_sock(sk); return 0; } @@ -1143,7 +1142,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr->sat_family != AF_APPLETALK) return -EAFNOSUPPORT; - lock_kernel(); + lock_sock(sk); if (addr->sat_addr.s_net == htons(ATADDR_ANYNET)) { struct atalk_addr *ap = atalk_find_primary(); @@ -1179,7 +1178,7 @@ static int atalk_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) sock_reset_flag(sk, SOCK_ZAPPED); err = 0; out: - unlock_kernel(); + release_sock(sk); return err; } @@ -1215,7 +1214,7 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr, #endif } - lock_kernel(); + lock_sock(sk); err = -EBUSY; if (sock_flag(sk, SOCK_ZAPPED)) if (atalk_autobind(sk) < 0) @@ -1233,7 +1232,7 @@ static int atalk_connect(struct socket *sock, struct sockaddr *uaddr, sk->sk_state = TCP_ESTABLISHED; err = 0; out: - unlock_kernel(); + release_sock(sk); return err; } @@ -1249,7 +1248,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, struct atalk_sock *at = at_sk(sk); int err; - lock_kernel(); + lock_sock(sk); err = -ENOBUFS; if (sock_flag(sk, SOCK_ZAPPED)) if (atalk_autobind(sk) < 0) @@ -1277,17 +1276,7 @@ static int atalk_getname(struct socket *sock, struct sockaddr *uaddr, memcpy(uaddr, &sat, sizeof(sat)); out: - unlock_kernel(); - return err; -} - -static unsigned int atalk_poll(struct file *file, struct socket *sock, - poll_table *wait) -{ - int err; - lock_kernel(); - err = datagram_poll(file, sock, wait); - unlock_kernel(); + release_sock(sk); return err; } @@ -1596,7 +1585,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr if (len > DDP_MAXSZ) return -EMSGSIZE; - lock_kernel(); + lock_sock(sk); if (usat) { err = -EBUSY; if (sock_flag(sk, SOCK_ZAPPED)) @@ -1651,7 +1640,9 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr sk, size, dev->name); size += dev->hard_header_len; + release_sock(sk); skb = sock_alloc_send_skb(sk, size, (flags & MSG_DONTWAIT), &err); + lock_sock(sk); if (!skb) goto out; @@ -1738,7 +1729,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr SOCK_DEBUG(sk, "SK %p: Done write (%Zd).\n", sk, len); out: - unlock_kernel(); + release_sock(sk); return err ? : len; } @@ -1753,9 +1744,10 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr int err = 0; struct sk_buff *skb; - lock_kernel(); skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); + lock_sock(sk); + if (!skb) goto out; @@ -1787,7 +1779,7 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr skb_free_datagram(sk, skb); /* Free the datagram. */ out: - unlock_kernel(); + release_sock(sk); return err ? : copied; } @@ -1887,7 +1879,7 @@ static const struct proto_ops atalk_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = atalk_getname, - .poll = atalk_poll, + .poll = datagram_poll, .ioctl = atalk_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = atalk_compat_ioctl, -- cgit v0.10.2 From b0d0d915d1d1a0fe486849f3e41333c66df620c4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Jan 2011 21:49:56 +0100 Subject: ipx: remove the BKL This replaces all instances of lock_kernel in the IPX code with lock_sock. As far as I can tell, this is safe to do, because there is no global state that needs to be locked in IPX, and the code does not recursively take the lock or sleep indefinitely while holding it. Compile-tested only. Signed-off-by: Arnd Bergmann Acked-by: David S. Miller Cc: Arnaldo Carvalho de Melo Cc: netdev@vger.kernel.org diff --git a/net/ipx/Kconfig b/net/ipx/Kconfig index 02549cb..e9ad006 100644 --- a/net/ipx/Kconfig +++ b/net/ipx/Kconfig @@ -3,7 +3,6 @@ # config IPX tristate "The IPX protocol" - depends on BKL # should be fixable select LLC ---help--- This is support for the Novell networking protocol, IPX, commonly diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index da3d21c..2731b51 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include @@ -1299,7 +1298,7 @@ static int ipx_setsockopt(struct socket *sock, int level, int optname, int opt; int rc = -EINVAL; - lock_kernel(); + lock_sock(sk); if (optlen != sizeof(int)) goto out; @@ -1314,7 +1313,7 @@ static int ipx_setsockopt(struct socket *sock, int level, int optname, ipx_sk(sk)->type = opt; rc = 0; out: - unlock_kernel(); + release_sock(sk); return rc; } @@ -1326,7 +1325,7 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname, int len; int rc = -ENOPROTOOPT; - lock_kernel(); + lock_sock(sk); if (!(level == SOL_IPX && optname == IPX_TYPE)) goto out; @@ -1347,7 +1346,7 @@ static int ipx_getsockopt(struct socket *sock, int level, int optname, rc = 0; out: - unlock_kernel(); + release_sock(sk); return rc; } @@ -1396,7 +1395,7 @@ static int ipx_release(struct socket *sock) if (!sk) goto out; - lock_kernel(); + lock_sock(sk); if (!sock_flag(sk, SOCK_DEAD)) sk->sk_state_change(sk); @@ -1404,7 +1403,7 @@ static int ipx_release(struct socket *sock) sock->sk = NULL; sk_refcnt_debug_release(sk); ipx_destroy_socket(sk); - unlock_kernel(); + release_sock(sk); out: return 0; } @@ -1530,11 +1529,12 @@ out: static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { + struct sock *sk = sock->sk; int rc; - lock_kernel(); + lock_sock(sk); rc = __ipx_bind(sock, uaddr, addr_len); - unlock_kernel(); + release_sock(sk); return rc; } @@ -1551,7 +1551,7 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; - lock_kernel(); + lock_sock(sk); if (addr_len != sizeof(*addr)) goto out; addr = (struct sockaddr_ipx *)uaddr; @@ -1598,7 +1598,7 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, ipxrtr_put(rt); rc = 0; out: - unlock_kernel(); + release_sock(sk); return rc; } @@ -1614,7 +1614,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, *uaddr_len = sizeof(struct sockaddr_ipx); - lock_kernel(); + lock_sock(sk); if (peer) { rc = -ENOTCONN; if (sk->sk_state != TCP_ESTABLISHED) @@ -1649,19 +1649,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, rc = 0; out: - unlock_kernel(); - return rc; -} - -static unsigned int ipx_datagram_poll(struct file *file, struct socket *sock, - poll_table *wait) -{ - int rc; - - lock_kernel(); - rc = datagram_poll(file, sock, wait); - unlock_kernel(); - + release_sock(sk); return rc; } @@ -1736,7 +1724,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock, int rc = -EINVAL; int flags = msg->msg_flags; - lock_kernel(); + lock_sock(sk); /* Socket gets bound below anyway */ /* if (sk->sk_zapped) return -EIO; */ /* Socket not bound */ @@ -1788,7 +1776,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock, if (rc >= 0) rc = len; out: - unlock_kernel(); + release_sock(sk); return rc; } @@ -1803,7 +1791,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb; int copied, rc; - lock_kernel(); + lock_sock(sk); /* put the autobinding in */ if (!ipxs->port) { struct sockaddr_ipx uaddr; @@ -1862,7 +1850,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, out_free: skb_free_datagram(sk, skb); out: - unlock_kernel(); + release_sock(sk); return rc; } @@ -1874,7 +1862,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) struct sock *sk = sock->sk; void __user *argp = (void __user *)arg; - lock_kernel(); + lock_sock(sk); switch (cmd) { case TIOCOUTQ: amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); @@ -1937,7 +1925,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = -ENOIOCTLCMD; break; } - unlock_kernel(); + release_sock(sk); return rc; } @@ -1984,7 +1972,7 @@ static const struct proto_ops ipx_dgram_ops = { .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = ipx_getname, - .poll = ipx_datagram_poll, + .poll = datagram_poll, .ioctl = ipx_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ipx_compat_ioctl, -- cgit v0.10.2 From ae7eb8979ccfa5e9e888101b9c940f20bd0f4115 Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Sun, 27 Feb 2011 13:58:00 +0000 Subject: fs/locks.c: Remove stale FIXME left over from BKL conversion The comment is no longer true as (now that the BKL conversion is finished) a spinlock _is_ now used to protect file_lock_list, blocked_list and inode->i_flock. Signed-off-by: Matt Fleming Signed-off-by: Arnd Bergmann diff --git a/fs/locks.c b/fs/locks.c index 0f39982..822c3d1 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -145,7 +145,6 @@ static DEFINE_SPINLOCK(file_lock_lock); /* * Protects the two list heads above, plus the inode->i_flock list - * FIXME: should use a spinlock, once lockd and ceph are ready. */ void lock_flocks(void) { -- cgit v0.10.2 From 4ba8216cd90560bc402f52076f64d8546e8aefcb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 25 Jan 2011 22:52:22 +0100 Subject: BKL: That's all, folks This removes the implementation of the big kernel lock, at last. A lot of people have worked on this in the past, I so the credit for this patch should be with everyone who participated in the hunt. The names on the Cc list are the people that were the most active in this, according to the recorded git history, in alphabetical order. Signed-off-by: Arnd Bergmann Acked-by: Alan Cox Cc: Alessio Igor Bogani Cc: Al Viro Cc: Andrew Hendry Cc: Andrew Morton Cc: Christoph Hellwig Cc: Eric W. Biederman Cc: Frederic Weisbecker Cc: Hans Verkuil Acked-by: Ingo Molnar Cc: Jan Blunck Cc: John Kacur Cc: Jonathan Corbet Cc: Linus Torvalds Cc: Matthew Wilcox Cc: Oliver Neukum Cc: Paul Menage Acked-by: Thomas Gleixner Cc: Trond Myklebust diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 32f9fd6..ba36217 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -93,13 +93,6 @@ */ #define in_nmi() (preempt_count() & NMI_MASK) -#if defined(CONFIG_PREEMPT) && defined(CONFIG_BKL) -# include -# define PREEMPT_INATOMIC_BASE (current->lock_depth >= 0) -#else -# define PREEMPT_INATOMIC_BASE 0 -#endif - #if defined(CONFIG_PREEMPT) # define PREEMPT_CHECK_OFFSET 1 #else @@ -113,7 +106,7 @@ * used in the general case to determine whether sleeping is possible. * Do not use in_atomic() in driver code. */ -#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE) +#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) /* * Check whether we were atomic before we did preempt_disable(): diff --git a/include/linux/smp_lock.h b/include/linux/smp_lock.h deleted file mode 100644 index 3a19882..0000000 --- a/include/linux/smp_lock.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef __LINUX_SMPLOCK_H -#define __LINUX_SMPLOCK_H - -#ifdef CONFIG_LOCK_KERNEL -#include - -extern int __lockfunc __reacquire_kernel_lock(void); -extern void __lockfunc __release_kernel_lock(void); - -/* - * Release/re-acquire global kernel lock for the scheduler - */ -#define release_kernel_lock(tsk) do { \ - if (unlikely((tsk)->lock_depth >= 0)) \ - __release_kernel_lock(); \ -} while (0) - -static inline int reacquire_kernel_lock(struct task_struct *task) -{ - if (unlikely(task->lock_depth >= 0)) - return __reacquire_kernel_lock(); - return 0; -} - -extern void __lockfunc -_lock_kernel(const char *func, const char *file, int line) -__acquires(kernel_lock); - -extern void __lockfunc -_unlock_kernel(const char *func, const char *file, int line) -__releases(kernel_lock); - -#define lock_kernel() do { \ - _lock_kernel(__func__, __FILE__, __LINE__); \ -} while (0) - -#define unlock_kernel() do { \ - _unlock_kernel(__func__, __FILE__, __LINE__); \ -} while (0) - -/* - * Various legacy drivers don't really need the BKL in a specific - * function, but they *do* need to know that the BKL became available. - * This function just avoids wrapping a bunch of lock/unlock pairs - * around code which doesn't really need it. - */ -static inline void cycle_kernel_lock(void) -{ - lock_kernel(); - unlock_kernel(); -} - -#else - -#ifdef CONFIG_BKL /* provoke build bug if not set */ -#define lock_kernel() -#define unlock_kernel() -#define cycle_kernel_lock() do { } while(0) -#endif /* CONFIG_BKL */ - -#define release_kernel_lock(task) do { } while(0) -#define reacquire_kernel_lock(task) 0 - -#endif /* CONFIG_LOCK_KERNEL */ -#endif /* __LINUX_SMPLOCK_H */ diff --git a/init/Kconfig b/init/Kconfig index be788c0..a88d1c9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -69,11 +69,6 @@ config BROKEN_ON_SMP depends on BROKEN || !SMP default y -config LOCK_KERNEL - bool - depends on (SMP || PREEMPT) && BKL - default y - config INIT_ENV_ARG_LIMIT int default 32 if !UML diff --git a/kernel/sched.c b/kernel/sched.c index 18d38e4..827c170 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -3945,9 +3944,6 @@ need_resched: rcu_note_context_switch(cpu); prev = rq->curr; - release_kernel_lock(prev); -need_resched_nonpreemptible: - schedule_debug(prev); if (sched_feat(HRTICK)) @@ -4010,9 +4006,6 @@ need_resched_nonpreemptible: post_schedule(rq); - if (unlikely(reacquire_kernel_lock(prev))) - goto need_resched_nonpreemptible; - preempt_enable_no_resched(); if (need_resched()) goto need_resched; @@ -8074,7 +8067,7 @@ static inline int preempt_count_equals(int preempt_offset) { int nested = (preempt_count() & ~PREEMPT_ACTIVE) + rcu_preempt_depth(); - return (nested == PREEMPT_INATOMIC_BASE + preempt_offset); + return (nested == preempt_offset); } void __might_sleep(const char *file, int line, int preempt_offset) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2b97418..6f440d8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -470,15 +470,6 @@ config DEBUG_MUTEXES This feature allows mutex semantics violations to be detected and reported. -config BKL - bool "Big Kernel Lock" if (SMP || PREEMPT) - default y - help - This is the traditional lock that is used in old code instead - of proper locking. All drivers that use the BKL should depend - on this symbol. - Say Y here unless you are working on removing the BKL. - config DEBUG_LOCK_ALLOC bool "Lock debugging: detect incorrect freeing of live locks" depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT diff --git a/lib/Makefile b/lib/Makefile index cbb774f..de6c609 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -43,7 +43,6 @@ obj-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o -obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o obj-$(CONFIG_BTREE) += btree.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c deleted file mode 100644 index d80e122..0000000 --- a/lib/kernel_lock.c +++ /dev/null @@ -1,136 +0,0 @@ -/* - * lib/kernel_lock.c - * - * This is the traditional BKL - big kernel lock. Largely - * relegated to obsolescence, but used by various less - * important (or lazy) subsystems. - */ -#include -#include -#include -#include - -/* - * The 'big kernel lock' - * - * This spinlock is taken and released recursively by lock_kernel() - * and unlock_kernel(). It is transparently dropped and reacquired - * over schedule(). It is used to protect legacy code that hasn't - * been migrated to a proper locking design yet. - * - * Don't use in new code. - */ -static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag); - - -/* - * Acquire/release the underlying lock from the scheduler. - * - * This is called with preemption disabled, and should - * return an error value if it cannot get the lock and - * TIF_NEED_RESCHED gets set. - * - * If it successfully gets the lock, it should increment - * the preemption count like any spinlock does. - * - * (This works on UP too - do_raw_spin_trylock will never - * return false in that case) - */ -int __lockfunc __reacquire_kernel_lock(void) -{ - while (!do_raw_spin_trylock(&kernel_flag)) { - if (need_resched()) - return -EAGAIN; - cpu_relax(); - } - preempt_disable(); - return 0; -} - -void __lockfunc __release_kernel_lock(void) -{ - do_raw_spin_unlock(&kernel_flag); - preempt_enable_no_resched(); -} - -/* - * These are the BKL spinlocks - we try to be polite about preemption. - * If SMP is not on (ie UP preemption), this all goes away because the - * do_raw_spin_trylock() will always succeed. - */ -#ifdef CONFIG_PREEMPT -static inline void __lock_kernel(void) -{ - preempt_disable(); - if (unlikely(!do_raw_spin_trylock(&kernel_flag))) { - /* - * If preemption was disabled even before this - * was called, there's nothing we can be polite - * about - just spin. - */ - if (preempt_count() > 1) { - do_raw_spin_lock(&kernel_flag); - return; - } - - /* - * Otherwise, let's wait for the kernel lock - * with preemption enabled.. - */ - do { - preempt_enable(); - while (raw_spin_is_locked(&kernel_flag)) - cpu_relax(); - preempt_disable(); - } while (!do_raw_spin_trylock(&kernel_flag)); - } -} - -#else - -/* - * Non-preemption case - just get the spinlock - */ -static inline void __lock_kernel(void) -{ - do_raw_spin_lock(&kernel_flag); -} -#endif - -static inline void __unlock_kernel(void) -{ - /* - * the BKL is not covered by lockdep, so we open-code the - * unlocking sequence (and thus avoid the dep-chain ops): - */ - do_raw_spin_unlock(&kernel_flag); - preempt_enable(); -} - -/* - * Getting the big kernel lock. - * - * This cannot happen asynchronously, so we only need to - * worry about other CPU's. - */ -void __lockfunc _lock_kernel(const char *func, const char *file, int line) -{ - int depth = current->lock_depth + 1; - - if (likely(!depth)) { - might_sleep(); - __lock_kernel(); - } - current->lock_depth = depth; -} - -void __lockfunc _unlock_kernel(const char *func, const char *file, int line) -{ - BUG_ON(current->lock_depth < 0); - if (likely(--current->lock_depth < 0)) - __unlock_kernel(); -} - -EXPORT_SYMBOL(_lock_kernel); -EXPORT_SYMBOL(_unlock_kernel); - -- cgit v0.10.2