From 278f6679f454bf185a07d9a4ca355b153482d17a Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 8 Aug 2013 17:34:46 -0400 Subject: reiserfs: locking, handle nested locks properly The reiserfs write lock replaced the BKL and uses similar semantics. Frederic's locking code makes a distinction between when the lock is nested and when it's being acquired/released, but I don't think that's the right distinction to make. The right distinction is between the lock being released at end-of-use and the lock being released for a schedule. The unlock should return the depth and the lock should restore it, rather than the other way around as it is now. This patch implements that and adds a number of places where the lock should be dropped. Signed-off-by: Jeff Mahoney diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index a98b774..881f8ea 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -1340,10 +1340,11 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, "reading failed", __func__, block); else { if (buffer_locked(bh)) { + int depth; PROC_INFO_INC(sb, scan_bitmap.wait); - reiserfs_write_unlock(sb); + depth = reiserfs_write_unlock_nested(sb); __wait_on_buffer(bh); - reiserfs_write_lock(sb); + reiserfs_write_lock_nested(sb, depth); } BUG_ON(!buffer_uptodate(bh)); BUG_ON(atomic_read(&bh->b_count) == 0); diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 03e4ca5..1fd2051 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c @@ -71,6 +71,7 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) char small_buf[32]; /* avoid kmalloc if we can */ struct reiserfs_dir_entry de; int ret = 0; + int depth; reiserfs_write_lock(inode->i_sb); @@ -181,17 +182,17 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) * Since filldir might sleep, we can release * the write lock here for other waiters */ - reiserfs_write_unlock(inode->i_sb); + depth = reiserfs_write_unlock_nested(inode->i_sb); if (!dir_emit (ctx, local_buf, d_reclen, d_ino, DT_UNKNOWN)) { - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(inode->i_sb, depth); if (local_buf != small_buf) { kfree(local_buf); } goto end; } - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(inode->i_sb, depth); if (local_buf != small_buf) { kfree(local_buf); } diff --git a/fs/reiserfs/fix_node.c b/fs/reiserfs/fix_node.c index 430e065..dc4d415 100644 --- a/fs/reiserfs/fix_node.c +++ b/fs/reiserfs/fix_node.c @@ -1022,9 +1022,9 @@ static int get_far_parent(struct tree_balance *tb, if (buffer_locked(*pcom_father)) { /* Release the write lock while the buffer is busy */ - reiserfs_write_unlock(tb->tb_sb); + int depth = reiserfs_write_unlock_nested(tb->tb_sb); __wait_on_buffer(*pcom_father); - reiserfs_write_lock(tb->tb_sb); + reiserfs_write_lock_nested(tb->tb_sb, depth); if (FILESYSTEM_CHANGED_TB(tb)) { brelse(*pcom_father); return REPEAT_SEARCH; @@ -1929,9 +1929,9 @@ static int get_direct_parent(struct tree_balance *tb, int h) return REPEAT_SEARCH; if (buffer_locked(bh)) { - reiserfs_write_unlock(tb->tb_sb); + int depth = reiserfs_write_unlock_nested(tb->tb_sb); __wait_on_buffer(bh); - reiserfs_write_lock(tb->tb_sb); + reiserfs_write_lock_nested(tb->tb_sb, depth); if (FILESYSTEM_CHANGED_TB(tb)) return REPEAT_SEARCH; } @@ -1952,6 +1952,7 @@ static int get_neighbors(struct tree_balance *tb, int h) unsigned long son_number; struct super_block *sb = tb->tb_sb; struct buffer_head *bh; + int depth; PROC_INFO_INC(sb, get_neighbors[h]); @@ -1969,9 +1970,9 @@ static int get_neighbors(struct tree_balance *tb, int h) tb->FL[h]) ? tb->lkey[h] : B_NR_ITEMS(tb-> FL[h]); son_number = B_N_CHILD_NUM(tb->FL[h], child_position); - reiserfs_write_unlock(sb); + depth = reiserfs_write_unlock_nested(tb->tb_sb); bh = sb_bread(sb, son_number); - reiserfs_write_lock(sb); + reiserfs_write_lock_nested(tb->tb_sb, depth); if (!bh) return IO_ERROR; if (FILESYSTEM_CHANGED_TB(tb)) { @@ -2009,9 +2010,9 @@ static int get_neighbors(struct tree_balance *tb, int h) child_position = (bh == tb->FR[h]) ? tb->rkey[h] + 1 : 0; son_number = B_N_CHILD_NUM(tb->FR[h], child_position); - reiserfs_write_unlock(sb); + depth = reiserfs_write_unlock_nested(tb->tb_sb); bh = sb_bread(sb, son_number); - reiserfs_write_lock(sb); + reiserfs_write_lock_nested(tb->tb_sb, depth); if (!bh) return IO_ERROR; if (FILESYSTEM_CHANGED_TB(tb)) { @@ -2272,6 +2273,7 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) } if (locked) { + int depth; #ifdef CONFIG_REISERFS_CHECK repeat_counter++; if ((repeat_counter % 10000) == 0) { @@ -2286,9 +2288,9 @@ static int wait_tb_buffers_until_unlocked(struct tree_balance *tb) REPEAT_SEARCH : CARRY_ON; } #endif - reiserfs_write_unlock(tb->tb_sb); + depth = reiserfs_write_unlock_nested(tb->tb_sb); __wait_on_buffer(locked); - reiserfs_write_lock(tb->tb_sb); + reiserfs_write_lock_nested(tb->tb_sb, depth); if (FILESYSTEM_CHANGED_TB(tb)) return REPEAT_SEARCH; } @@ -2359,9 +2361,9 @@ int fix_nodes(int op_mode, struct tree_balance *tb, /* if it possible in indirect_to_direct conversion */ if (buffer_locked(tbS0)) { - reiserfs_write_unlock(tb->tb_sb); + int depth = reiserfs_write_unlock_nested(tb->tb_sb); __wait_on_buffer(tbS0); - reiserfs_write_lock(tb->tb_sb); + reiserfs_write_lock_nested(tb->tb_sb, depth); if (FILESYSTEM_CHANGED_TB(tb)) return REPEAT_SEARCH; } diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index bf1331a..4a3a57c 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -41,11 +41,10 @@ void reiserfs_evict_inode(struct inode *inode) /* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */ if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */ - int depth; reiserfs_delete_xattrs(inode); - depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); if (journal_begin(&th, inode->i_sb, jbegin_count)) goto out; @@ -74,7 +73,7 @@ void reiserfs_evict_inode(struct inode *inode) remove_save_link(inode, 0 /* not truncate */ ); /* we can't do anything * about an error here */ out: - reiserfs_write_unlock_once(inode->i_sb, depth); + reiserfs_write_unlock(inode->i_sb); } else { /* no object items are in the tree */ ; @@ -611,7 +610,6 @@ int reiserfs_get_block(struct inode *inode, sector_t block, __le32 *item; int done; int fs_gen; - int lock_depth; struct reiserfs_transaction_handle *th = NULL; /* space reserved in transaction batch: . 3 balancings in direct->indirect conversion @@ -627,11 +625,11 @@ int reiserfs_get_block(struct inode *inode, sector_t block, loff_t new_offset = (((loff_t) block) << inode->i_sb->s_blocksize_bits) + 1; - lock_depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); version = get_inode_item_key_version(inode); if (!file_capable(inode, block)) { - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); return -EFBIG; } @@ -643,7 +641,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, /* find number of block-th logical block of the file */ ret = _get_block_create_0(inode, block, bh_result, create | GET_BLOCK_READ_DIRECT); - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); return ret; } /* @@ -761,7 +759,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, if (!dangle && th) retval = reiserfs_end_persistent_transaction(th); - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); /* the item was found, so new blocks were not added to the file ** there is no need to make sure the inode is updated with this @@ -1012,11 +1010,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, * long time. reschedule if needed and also release the write * lock for others. */ - if (need_resched()) { - reiserfs_write_unlock_once(inode->i_sb, lock_depth); - schedule(); - lock_depth = reiserfs_write_lock_once(inode->i_sb); - } + reiserfs_cond_resched(inode->i_sb); retval = search_for_position_by_key(inode->i_sb, &key, &path); if (retval == IO_ERROR) { @@ -1051,7 +1045,7 @@ int reiserfs_get_block(struct inode *inode, sector_t block, retval = err; } - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); reiserfs_check_path(&path); return retval; } @@ -1510,14 +1504,15 @@ struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key) { struct inode *inode; struct reiserfs_iget_args args; + int depth; args.objectid = key->on_disk_key.k_objectid; args.dirid = key->on_disk_key.k_dir_id; - reiserfs_write_unlock(s); + depth = reiserfs_write_unlock_nested(s); inode = iget5_locked(s, key->on_disk_key.k_objectid, reiserfs_find_actor, reiserfs_init_locked_inode, (void *)(&args)); - reiserfs_write_lock(s); + reiserfs_write_lock_nested(s, depth); if (!inode) return ERR_PTR(-ENOMEM); @@ -1781,6 +1776,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, struct stat_data sd; int retval; int err; + int depth; BUG_ON(!th->t_trans_id); @@ -1813,10 +1809,10 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th, memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); - reiserfs_write_unlock(inode->i_sb); + depth = reiserfs_write_unlock_nested(inode->i_sb); err = insert_inode_locked4(inode, args.objectid, reiserfs_find_actor, &args); - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(inode->i_sb, depth); if (err) { err = -EINVAL; goto out_bad_inode; @@ -2108,9 +2104,8 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps) int error; struct buffer_head *bh = NULL; int err2; - int lock_depth; - lock_depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); if (inode->i_size > 0) { error = grab_tail_page(inode, &page, &bh); @@ -2179,7 +2174,7 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps) page_cache_release(page); } - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); return 0; out: @@ -2188,7 +2183,7 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps) page_cache_release(page); } - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); return error; } @@ -2653,10 +2648,11 @@ int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len) struct inode *inode = page->mapping->host; int ret; int old_ref = 0; + int depth; - reiserfs_write_unlock(inode->i_sb); + depth = reiserfs_write_unlock_nested(inode->i_sb); reiserfs_wait_on_write_block(inode->i_sb); - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(inode->i_sb, depth); fix_tail_page_for_writing(page); if (reiserfs_transaction_running(inode->i_sb)) { @@ -2713,7 +2709,6 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, int update_sd = 0; struct reiserfs_transaction_handle *th; unsigned start; - int lock_depth = 0; bool locked = false; if ((unsigned long)fsdata & AOP_FLAG_CONT_EXPAND) @@ -2742,7 +2737,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, */ if (pos + copied > inode->i_size) { struct reiserfs_transaction_handle myth; - lock_depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); locked = true; /* If the file have grown beyond the border where it can have a tail, unmark it as needing a tail @@ -2773,7 +2768,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, } if (th) { if (!locked) { - lock_depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); locked = true; } if (!update_sd) @@ -2785,7 +2780,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, out: if (locked) - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); unlock_page(page); page_cache_release(page); @@ -2795,7 +2790,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, return ret == 0 ? copied : ret; journal_error: - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); locked = false; if (th) { if (!update_sd) @@ -2813,10 +2808,11 @@ int reiserfs_commit_write(struct file *f, struct page *page, int ret = 0; int update_sd = 0; struct reiserfs_transaction_handle *th = NULL; + int depth; - reiserfs_write_unlock(inode->i_sb); + depth = reiserfs_write_unlock_nested(inode->i_sb); reiserfs_wait_on_write_block(inode->i_sb); - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(inode->i_sb, depth); if (reiserfs_transaction_running(inode->i_sb)) { th = current->journal_info; @@ -3115,7 +3111,6 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; unsigned int ia_valid; - int depth; int error; error = inode_change_ok(inode, attr); @@ -3127,14 +3122,14 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) if (is_quota_modification(inode, attr)) dquot_initialize(inode); - depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); if (attr->ia_valid & ATTR_SIZE) { /* version 2 items will be caught by the s_maxbytes check ** done for us in vmtruncate */ if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5 && attr->ia_size > MAX_NON_LFS) { - reiserfs_write_unlock_once(inode->i_sb, depth); + reiserfs_write_unlock(inode->i_sb); error = -EFBIG; goto out; } @@ -3157,7 +3152,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) error = err; } if (error) { - reiserfs_write_unlock_once(inode->i_sb, depth); + reiserfs_write_unlock(inode->i_sb); goto out; } /* @@ -3167,7 +3162,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid |= (ATTR_MTIME | ATTR_CTIME); } } - reiserfs_write_unlock_once(inode->i_sb, depth); + reiserfs_write_unlock(inode->i_sb); if ((((attr->ia_valid & ATTR_UID) && (from_kuid(&init_user_ns, attr->ia_uid) & ~0xffff)) || ((attr->ia_valid & ATTR_GID) && (from_kgid(&init_user_ns, attr->ia_gid) & ~0xffff))) && @@ -3192,16 +3187,16 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) return error; /* (user+group)*(old+new) structure - we count quota info and , inode write (sb, inode) */ - depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); error = journal_begin(&th, inode->i_sb, jbegin_count); - reiserfs_write_unlock_once(inode->i_sb, depth); + reiserfs_write_unlock(inode->i_sb); if (error) goto out; error = dquot_transfer(inode, attr); - depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); if (error) { journal_end(&th, inode->i_sb, jbegin_count); - reiserfs_write_unlock_once(inode->i_sb, depth); + reiserfs_write_unlock(inode->i_sb); goto out; } @@ -3213,7 +3208,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr) inode->i_gid = attr->ia_gid; mark_inode_dirty(inode); error = journal_end(&th, inode->i_sb, jbegin_count); - reiserfs_write_unlock_once(inode->i_sb, depth); + reiserfs_write_unlock(inode->i_sb); if (error) goto out; } diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 15cb5fe..946ccbf 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -167,7 +167,6 @@ int reiserfs_commit_write(struct file *f, struct page *page, int reiserfs_unpack(struct inode *inode, struct file *filp) { int retval = 0; - int depth; int index; struct page *page; struct address_space *mapping; @@ -183,11 +182,11 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) return 0; } - depth = reiserfs_write_lock_once(inode->i_sb); - /* we need to make sure nobody is changing the file size beneath us */ reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb); + reiserfs_write_lock(inode->i_sb); + write_from = inode->i_size & (blocksize - 1); /* if we are on a block boundary, we are already unpacked. */ if (write_from == 0) { @@ -221,6 +220,6 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) out: mutex_unlock(&inode->i_mutex); - reiserfs_write_unlock_once(inode->i_sb, depth); + reiserfs_write_unlock(inode->i_sb); return retval; } diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 742fdd4..73feacc4 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -947,9 +947,11 @@ static int reiserfs_async_progress_wait(struct super_block *s) struct reiserfs_journal *j = SB_JOURNAL(s); if (atomic_read(&j->j_async_throttle)) { - reiserfs_write_unlock(s); + int depth; + + depth = reiserfs_write_unlock_nested(s); congestion_wait(BLK_RW_ASYNC, HZ / 10); - reiserfs_write_lock(s); + reiserfs_write_lock_nested(s, depth); } return 0; @@ -972,6 +974,7 @@ static int flush_commit_list(struct super_block *s, struct reiserfs_journal *journal = SB_JOURNAL(s); int retval = 0; int write_len; + int depth; reiserfs_check_lock_depth(s, "flush_commit_list"); @@ -1018,12 +1021,12 @@ static int flush_commit_list(struct super_block *s, * We might sleep in numerous places inside * write_ordered_buffers. Relax the write lock. */ - reiserfs_write_unlock(s); + depth = reiserfs_write_unlock_nested(s); ret = write_ordered_buffers(&journal->j_dirty_buffers_lock, journal, jl, &jl->j_bh_list); if (ret < 0 && retval == 0) retval = ret; - reiserfs_write_lock(s); + reiserfs_write_lock_nested(s, depth); } BUG_ON(!list_empty(&jl->j_bh_list)); /* @@ -1043,9 +1046,9 @@ static int flush_commit_list(struct super_block *s, tbh = journal_find_get_block(s, bn); if (tbh) { if (buffer_dirty(tbh)) { - reiserfs_write_unlock(s); + depth = reiserfs_write_unlock_nested(s); ll_rw_block(WRITE, 1, &tbh); - reiserfs_write_lock(s); + reiserfs_write_lock_nested(s, depth); } put_bh(tbh) ; } @@ -1057,17 +1060,17 @@ static int flush_commit_list(struct super_block *s, (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); tbh = journal_find_get_block(s, bn); - reiserfs_write_unlock(s); - wait_on_buffer(tbh); - reiserfs_write_lock(s); + depth = reiserfs_write_unlock_nested(s); + __wait_on_buffer(tbh); + reiserfs_write_lock_nested(s, depth); // since we're using ll_rw_blk above, it might have skipped over // a locked buffer. Double check here // /* redundant, sync_dirty_buffer() checks */ if (buffer_dirty(tbh)) { - reiserfs_write_unlock(s); + depth = reiserfs_write_unlock_nested(s); sync_dirty_buffer(tbh); - reiserfs_write_lock(s); + reiserfs_write_lock_nested(s, depth); } if (unlikely(!buffer_uptodate(tbh))) { #ifdef CONFIG_REISERFS_CHECK @@ -1091,12 +1094,12 @@ static int flush_commit_list(struct super_block *s, if (buffer_dirty(jl->j_commit_bh)) BUG(); mark_buffer_dirty(jl->j_commit_bh) ; - reiserfs_write_unlock(s); + depth = reiserfs_write_unlock_nested(s); if (reiserfs_barrier_flush(s)) __sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA); else sync_dirty_buffer(jl->j_commit_bh); - reiserfs_write_lock(s); + reiserfs_write_lock_nested(s, depth); } /* If there was a write error in the journal - we can't commit this @@ -1228,15 +1231,16 @@ static int _update_journal_header_block(struct super_block *sb, { struct reiserfs_journal_header *jh; struct reiserfs_journal *journal = SB_JOURNAL(sb); + int depth; if (reiserfs_is_journal_aborted(journal)) return -EIO; if (trans_id >= journal->j_last_flush_trans_id) { if (buffer_locked((journal->j_header_bh))) { - reiserfs_write_unlock(sb); - wait_on_buffer((journal->j_header_bh)); - reiserfs_write_lock(sb); + depth = reiserfs_write_unlock_nested(sb); + __wait_on_buffer(journal->j_header_bh); + reiserfs_write_lock_nested(sb, depth); if (unlikely(!buffer_uptodate(journal->j_header_bh))) { #ifdef CONFIG_REISERFS_CHECK reiserfs_warning(sb, "journal-699", @@ -1254,14 +1258,14 @@ static int _update_journal_header_block(struct super_block *sb, jh->j_mount_id = cpu_to_le32(journal->j_mount_id); set_buffer_dirty(journal->j_header_bh); - reiserfs_write_unlock(sb); + depth = reiserfs_write_unlock_nested(sb); if (reiserfs_barrier_flush(sb)) __sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA); else sync_dirty_buffer(journal->j_header_bh); - reiserfs_write_lock(sb); + reiserfs_write_lock_nested(sb, depth); if (!buffer_uptodate(journal->j_header_bh)) { reiserfs_warning(sb, "journal-837", "IO error during journal replay"); @@ -1341,6 +1345,7 @@ static int flush_journal_list(struct super_block *s, unsigned long j_len_saved = jl->j_len; struct reiserfs_journal *journal = SB_JOURNAL(s); int err = 0; + int depth; BUG_ON(j_len_saved <= 0); @@ -1495,9 +1500,9 @@ static int flush_journal_list(struct super_block *s, "cn->bh is NULL"); } - reiserfs_write_unlock(s); - wait_on_buffer(cn->bh); - reiserfs_write_lock(s); + depth = reiserfs_write_unlock_nested(s); + __wait_on_buffer(cn->bh); + reiserfs_write_lock_nested(s, depth); if (!cn->bh) { reiserfs_panic(s, "journal-1012", @@ -1974,6 +1979,7 @@ static int journal_compare_desc_commit(struct super_block *sb, /* returns 0 if it did not find a description block ** returns -1 if it found a corrupt commit block ** returns 1 if both desc and commit were valid +** NOTE: only called during fs mount */ static int journal_transaction_is_valid(struct super_block *sb, struct buffer_head *d_bh, @@ -2073,8 +2079,9 @@ static void brelse_array(struct buffer_head **heads, int num) /* ** given the start, and values for the oldest acceptable transactions, -** this either reads in a replays a transaction, or returns because the transaction -** is invalid, or too old. +** this either reads in a replays a transaction, or returns because the +** transaction is invalid, or too old. +** NOTE: only called during fs mount */ static int journal_read_transaction(struct super_block *sb, unsigned long cur_dblock, @@ -2208,10 +2215,7 @@ static int journal_read_transaction(struct super_block *sb, ll_rw_block(READ, get_desc_trans_len(desc), log_blocks); for (i = 0; i < get_desc_trans_len(desc); i++) { - reiserfs_write_unlock(sb); wait_on_buffer(log_blocks[i]); - reiserfs_write_lock(sb); - if (!buffer_uptodate(log_blocks[i])) { reiserfs_warning(sb, "journal-1212", "REPLAY FAILURE fsck required! " @@ -2318,12 +2322,13 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev, /* ** read and replay the log -** on a clean unmount, the journal header's next unflushed pointer will be to an invalid -** transaction. This tests that before finding all the transactions in the log, which makes normal mount times fast. -** -** After a crash, this starts with the next unflushed transaction, and replays until it finds one too old, or invalid. -** +** on a clean unmount, the journal header's next unflushed pointer will +** be to an invalid transaction. This tests that before finding all the +** transactions in the log, which makes normal mount times fast. +** After a crash, this starts with the next unflushed transaction, and +** replays until it finds one too old, or invalid. ** On exit, it sets things up so the first transaction will work correctly. +** NOTE: only called during fs mount */ static int journal_read(struct super_block *sb) { @@ -2501,14 +2506,18 @@ static int journal_read(struct super_block *sb) "replayed %d transactions in %lu seconds\n", replay_count, get_seconds() - start); } + /* needed to satisfy the locking in _update_journal_header_block */ + reiserfs_write_lock(sb); if (!bdev_read_only(sb->s_bdev) && _update_journal_header_block(sb, journal->j_start, journal->j_last_flush_trans_id)) { + reiserfs_write_unlock(sb); /* replay failed, caller must call free_journal_ram and abort ** the mount */ return -1; } + reiserfs_write_unlock(sb); return 0; } @@ -2828,13 +2837,7 @@ int journal_init(struct super_block *sb, const char *j_dev_name, goto free_and_return; } - /* - * Journal_read needs to be inspected in order to push down - * the lock further inside (or even remove it). - */ - reiserfs_write_lock(sb); ret = journal_read(sb); - reiserfs_write_unlock(sb); if (ret < 0) { reiserfs_warning(sb, "reiserfs-2006", "Replay Failure, unable to mount"); @@ -2923,9 +2926,9 @@ static void queue_log_writer(struct super_block *s) add_wait_queue(&journal->j_join_wait, &wait); set_current_state(TASK_UNINTERRUPTIBLE); if (test_bit(J_WRITERS_QUEUED, &journal->j_state)) { - reiserfs_write_unlock(s); + int depth = reiserfs_write_unlock_nested(s); schedule(); - reiserfs_write_lock(s); + reiserfs_write_lock_nested(s, depth); } __set_current_state(TASK_RUNNING); remove_wait_queue(&journal->j_join_wait, &wait); @@ -2943,9 +2946,12 @@ static void let_transaction_grow(struct super_block *sb, unsigned int trans_id) struct reiserfs_journal *journal = SB_JOURNAL(sb); unsigned long bcount = journal->j_bcount; while (1) { - reiserfs_write_unlock(sb); + int depth; + + depth = reiserfs_write_unlock_nested(sb); schedule_timeout_uninterruptible(1); - reiserfs_write_lock(sb); + reiserfs_write_lock_nested(sb, depth); + journal->j_current_jl->j_state |= LIST_COMMIT_PENDING; while ((atomic_read(&journal->j_wcount) > 0 || atomic_read(&journal->j_jlock)) && @@ -2976,6 +2982,7 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, struct reiserfs_transaction_handle myth; int sched_count = 0; int retval; + int depth; reiserfs_check_lock_depth(sb, "journal_begin"); BUG_ON(nblocks > journal->j_trans_max); @@ -2996,9 +3003,9 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, if (test_bit(J_WRITERS_BLOCKED, &journal->j_state)) { unlock_journal(sb); - reiserfs_write_unlock(sb); + depth = reiserfs_write_unlock_nested(sb); reiserfs_wait_on_write_block(sb); - reiserfs_write_lock(sb); + reiserfs_write_lock_nested(sb, depth); PROC_INFO_INC(sb, journal.journal_relock_writers); goto relock; } @@ -3821,6 +3828,7 @@ void reiserfs_restore_prepared_buffer(struct super_block *sb, if (test_clear_buffer_journal_restore_dirty(bh) && buffer_journal_dirty(bh)) { struct reiserfs_journal_cnode *cn; + reiserfs_write_lock(sb); cn = get_journal_hash_dev(sb, journal->j_list_hash_table, bh->b_blocknr); @@ -3828,6 +3836,7 @@ void reiserfs_restore_prepared_buffer(struct super_block *sb, set_buffer_journal_test(bh); mark_buffer_dirty(bh); } + reiserfs_write_unlock(sb); } clear_buffer_journal_prepared(bh); } @@ -3911,6 +3920,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, unsigned long jindex; unsigned int commit_trans_id; int trans_half; + int depth; BUG_ON(th->t_refcount > 1); BUG_ON(!th->t_trans_id); @@ -4116,9 +4126,7 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, next = cn->next; free_cnode(sb, cn); cn = next; - reiserfs_write_unlock(sb); - cond_resched(); - reiserfs_write_lock(sb); + reiserfs_cond_resched(sb); } /* we are done with both the c_bh and d_bh, but @@ -4165,10 +4173,10 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, * is lost. */ if (!list_empty(&jl->j_tail_bh_list)) { - reiserfs_write_unlock(sb); + depth = reiserfs_write_unlock_nested(sb); write_ordered_buffers(&journal->j_dirty_buffers_lock, journal, jl, &jl->j_tail_bh_list); - reiserfs_write_lock(sb); + reiserfs_write_lock_nested(sb, depth); } BUG_ON(!list_empty(&jl->j_tail_bh_list)); mutex_unlock(&jl->j_commit_mutex); diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c index d735bc8..045b83e 100644 --- a/fs/reiserfs/lock.c +++ b/fs/reiserfs/lock.c @@ -48,30 +48,35 @@ void reiserfs_write_unlock(struct super_block *s) } } -/* - * If we already own the lock, just exit and don't increase the depth. - * Useful when we don't want to lock more than once. - * - * We always return the lock_depth we had before calling - * this function. - */ -int reiserfs_write_lock_once(struct super_block *s) +int __must_check reiserfs_write_unlock_nested(struct super_block *s) { struct reiserfs_sb_info *sb_i = REISERFS_SB(s); + int depth; - if (sb_i->lock_owner != current) { - mutex_lock(&sb_i->lock); - sb_i->lock_owner = current; - return sb_i->lock_depth++; - } + /* this can happen when the lock isn't always held */ + if (sb_i->lock_owner != current) + return -1; + + depth = sb_i->lock_depth; + + sb_i->lock_depth = -1; + sb_i->lock_owner = NULL; + mutex_unlock(&sb_i->lock); - return sb_i->lock_depth; + return depth; } -void reiserfs_write_unlock_once(struct super_block *s, int lock_depth) +void reiserfs_write_lock_nested(struct super_block *s, int depth) { - if (lock_depth == -1) - reiserfs_write_unlock(s); + struct reiserfs_sb_info *sb_i = REISERFS_SB(s); + + /* this can happen when the lock isn't always held */ + if (depth == -1) + return; + + mutex_lock(&sb_i->lock); + sb_i->lock_owner = current; + sb_i->lock_depth = depth; } /* @@ -82,9 +87,7 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller) { struct reiserfs_sb_info *sb_i = REISERFS_SB(sb); - if (sb_i->lock_depth < 0) - reiserfs_panic(sb, "%s called without kernel lock held %d", - caller); + WARN_ON(sb_i->lock_depth < 0); } #ifdef CONFIG_REISERFS_CHECK diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 8567fb8..dc5236f 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -325,7 +325,6 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { int retval; - int lock_depth; struct inode *inode = NULL; struct reiserfs_dir_entry de; INITIALIZE_PATH(path_to_entry); @@ -333,12 +332,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, if (REISERFS_MAX_NAME(dir->i_sb->s_blocksize) < dentry->d_name.len) return ERR_PTR(-ENAMETOOLONG); - /* - * Might be called with or without the write lock, must be careful - * to not recursively hold it in case we want to release the lock - * before rescheduling. - */ - lock_depth = reiserfs_write_lock_once(dir->i_sb); + reiserfs_write_lock(dir->i_sb); de.de_gen_number_bit_string = NULL; retval = @@ -349,7 +343,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); if (!inode || IS_ERR(inode)) { - reiserfs_write_unlock_once(dir->i_sb, lock_depth); + reiserfs_write_unlock(dir->i_sb); return ERR_PTR(-EACCES); } @@ -358,7 +352,7 @@ static struct dentry *reiserfs_lookup(struct inode *dir, struct dentry *dentry, if (IS_PRIVATE(dir)) inode->i_flags |= S_PRIVATE; } - reiserfs_write_unlock_once(dir->i_sb, lock_depth); + reiserfs_write_unlock(dir->i_sb); if (retval == IO_ERROR) { return ERR_PTR(-EIO); } @@ -727,7 +721,6 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode struct inode *inode; struct reiserfs_transaction_handle th; struct reiserfs_security_handle security; - int lock_depth; /* We need blocks for transaction + (user+group)*(quotas for new inode + update of quota for directory owner) */ int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + @@ -753,7 +746,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode return retval; } jbegin_count += retval; - lock_depth = reiserfs_write_lock_once(dir->i_sb); + reiserfs_write_lock(dir->i_sb); retval = journal_begin(&th, dir->i_sb, jbegin_count); if (retval) { @@ -804,7 +797,7 @@ static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode d_instantiate(dentry, inode); retval = journal_end(&th, dir->i_sb, jbegin_count); out_failed: - reiserfs_write_unlock_once(dir->i_sb, lock_depth); + reiserfs_write_unlock(dir->i_sb); return retval; } @@ -920,7 +913,6 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) struct reiserfs_transaction_handle th; int jbegin_count; unsigned long savelink; - int depth; dquot_initialize(dir); @@ -934,7 +926,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) JOURNAL_PER_BALANCE_CNT * 2 + 2 + 4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb); - depth = reiserfs_write_lock_once(dir->i_sb); + reiserfs_write_lock(dir->i_sb); retval = journal_begin(&th, dir->i_sb, jbegin_count); if (retval) goto out_unlink; @@ -995,7 +987,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) retval = journal_end(&th, dir->i_sb, jbegin_count); reiserfs_check_path(&path); - reiserfs_write_unlock_once(dir->i_sb, depth); + reiserfs_write_unlock(dir->i_sb); return retval; end_unlink: @@ -1005,7 +997,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry) if (err) retval = err; out_unlink: - reiserfs_write_unlock_once(dir->i_sb, depth); + reiserfs_write_unlock(dir->i_sb); return retval; } diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index c0b1112..54944d5 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -358,12 +358,13 @@ void __reiserfs_panic(struct super_block *sb, const char *id, dump_stack(); #endif if (sb) - panic(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n", + printk(KERN_WARNING "REISERFS panic (device %s): %s%s%s: %s\n", sb->s_id, id ? id : "", id ? " " : "", function, error_buf); else - panic(KERN_WARNING "REISERFS panic: %s%s%s: %s\n", + printk(KERN_WARNING "REISERFS panic: %s%s%s: %s\n", id ? id : "", id ? " " : "", function, error_buf); + BUG(); } void __reiserfs_error(struct super_block *sb, const char *id, diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 3df5ce6..f8adaee 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -630,8 +630,8 @@ static inline int __reiserfs_is_journal_aborted(struct reiserfs_journal */ void reiserfs_write_lock(struct super_block *s); void reiserfs_write_unlock(struct super_block *s); -int reiserfs_write_lock_once(struct super_block *s); -void reiserfs_write_unlock_once(struct super_block *s, int lock_depth); +int __must_check reiserfs_write_unlock_nested(struct super_block *s); +void reiserfs_write_lock_nested(struct super_block *s, int depth); #ifdef CONFIG_REISERFS_CHECK void reiserfs_lock_check_recursive(struct super_block *s); @@ -667,31 +667,33 @@ static inline void reiserfs_lock_check_recursive(struct super_block *s) { } * - The inode mutex */ static inline void reiserfs_mutex_lock_safe(struct mutex *m, - struct super_block *s) + struct super_block *s) { - reiserfs_lock_check_recursive(s); - reiserfs_write_unlock(s); + int depth; + + depth = reiserfs_write_unlock_nested(s); mutex_lock(m); - reiserfs_write_lock(s); + reiserfs_write_lock_nested(s, depth); } static inline void reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass, - struct super_block *s) + struct super_block *s) { - reiserfs_lock_check_recursive(s); - reiserfs_write_unlock(s); + int depth; + + depth = reiserfs_write_unlock_nested(s); mutex_lock_nested(m, subclass); - reiserfs_write_lock(s); + reiserfs_write_lock_nested(s, depth); } static inline void reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) { - reiserfs_lock_check_recursive(s); - reiserfs_write_unlock(s); - down_read(sem); - reiserfs_write_lock(s); + int depth; + depth = reiserfs_write_unlock_nested(s); + down_read(sem); + reiserfs_write_lock_nested(s, depth); } /* @@ -701,9 +703,11 @@ reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s) static inline void reiserfs_cond_resched(struct super_block *s) { if (need_resched()) { - reiserfs_write_unlock(s); + int depth; + + depth = reiserfs_write_unlock_nested(s); schedule(); - reiserfs_write_lock(s); + reiserfs_write_lock_nested(s, depth); } } diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 3ce02cf..a4ef5cd 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -34,6 +34,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) unsigned long int block_count, free_blocks; int i; int copy_size; + int depth; sb = SB_DISK_SUPER_BLOCK(s); @@ -43,7 +44,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) } /* check the device size */ + depth = reiserfs_write_unlock_nested(s); bh = sb_bread(s, block_count_new - 1); + reiserfs_write_lock_nested(s, depth); if (!bh) { printk("reiserfs_resize: can\'t read last block\n"); return -EINVAL; @@ -125,9 +128,12 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) * transaction begins, and the new bitmaps don't matter if the * transaction fails. */ for (i = bmap_nr; i < bmap_nr_new; i++) { + int depth; /* don't use read_bitmap_block since it will cache * the uninitialized bitmap */ + depth = reiserfs_write_unlock_nested(s); bh = sb_bread(s, i * s->s_blocksize * 8); + reiserfs_write_lock_nested(s, depth); if (!bh) { vfree(bitmap); return -EIO; @@ -138,9 +144,9 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) set_buffer_uptodate(bh); mark_buffer_dirty(bh); - reiserfs_write_unlock(s); + depth = reiserfs_write_unlock_nested(s); sync_dirty_buffer(bh); - reiserfs_write_lock(s); + reiserfs_write_lock_nested(s, depth); // update bitmap_info stuff bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; brelse(bh); diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 2f40a4c..4d7d476 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -524,14 +524,14 @@ static int is_tree_node(struct buffer_head *bh, int level) * the caller (search_by_key) will perform other schedule-unsafe * operations just after calling this function. * - * @return true if we have unlocked + * @return depth of lock to be restored after read completes */ -static bool search_by_key_reada(struct super_block *s, +static int search_by_key_reada(struct super_block *s, struct buffer_head **bh, b_blocknr_t *b, int num) { int i, j; - bool unlocked = false; + int depth = -1; for (i = 0; i < num; i++) { bh[i] = sb_getblk(s, b[i]); @@ -549,15 +549,13 @@ static bool search_by_key_reada(struct super_block *s, * you have to make sure the prepared bit isn't set on this buffer */ if (!buffer_uptodate(bh[j])) { - if (!unlocked) { - reiserfs_write_unlock(s); - unlocked = true; - } + if (depth == -1) + depth = reiserfs_write_unlock_nested(s); ll_rw_block(READA, 1, bh + j); } brelse(bh[j]); } - return unlocked; + return depth; } /************************************************************************** @@ -645,26 +643,26 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s have a pointer to it. */ if ((bh = last_element->pe_buffer = sb_getblk(sb, block_number))) { - bool unlocked = false; - if (!buffer_uptodate(bh) && reada_count > 1) - /* may unlock the write lock */ - unlocked = search_by_key_reada(sb, reada_bh, - reada_blocks, reada_count); /* - * If we haven't already unlocked the write lock, - * then we need to do that here before reading - * the current block + * We'll need to drop the lock if we encounter any + * buffers that need to be read. If all of them are + * already up to date, we don't need to drop the lock. */ - if (!buffer_uptodate(bh) && !unlocked) { - reiserfs_write_unlock(sb); - unlocked = true; - } + int depth = -1; + + if (!buffer_uptodate(bh) && reada_count > 1) + depth = search_by_key_reada(sb, reada_bh, + reada_blocks, reada_count); + + if (!buffer_uptodate(bh) && depth == -1) + depth = reiserfs_write_unlock_nested(sb); + ll_rw_block(READ, 1, &bh); wait_on_buffer(bh); - if (unlocked) - reiserfs_write_lock(sb); + if (depth != -1) + reiserfs_write_lock_nested(sb, depth); if (!buffer_uptodate(bh)) goto io_error; } else { @@ -1059,9 +1057,7 @@ static char prepare_for_delete_or_cut(struct reiserfs_transaction_handle *th, st reiserfs_free_block(th, inode, block, 1); } - reiserfs_write_unlock(sb); - cond_resched(); - reiserfs_write_lock(sb); + reiserfs_cond_resched(sb); if (item_moved (&s_ih, path)) { need_re_search = 1; diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 7e81d97..60d0932 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -624,7 +624,6 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags) struct reiserfs_transaction_handle th; int err = 0; - int lock_depth; if (inode->i_sb->s_flags & MS_RDONLY) { reiserfs_warning(inode->i_sb, "clm-6006", @@ -632,7 +631,7 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags) inode->i_ino); return; } - lock_depth = reiserfs_write_lock_once(inode->i_sb); + reiserfs_write_lock(inode->i_sb); /* this is really only used for atime updates, so they don't have ** to be included in O_SYNC or fsync @@ -645,7 +644,7 @@ static void reiserfs_dirty_inode(struct inode *inode, int flags) journal_end(&th, inode->i_sb, 1); out: - reiserfs_write_unlock_once(inode->i_sb, lock_depth); + reiserfs_write_unlock(inode->i_sb); } static int reiserfs_show_options(struct seq_file *seq, struct dentry *root) -- cgit v0.10.2