From 63894ab9f63a688f6b0b8cdd01ac0a9f36d507b8 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Tue, 1 Nov 2011 10:06:19 +0800 Subject: ext3: call ext3_mark_recovery_complete() when recovery is really needed Call ext3_mark_recovery_complete() in ext3_fill_super() only if needs_recovery is non-zero. Besides that, print out "recovery complete" message after calling ext3_mark_recovery_complete(). Cc: Jan Kara Signed-off-by: Eryu Guan Signed-off-by: Jan Kara diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 922d289..767fa3a 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2060,9 +2060,10 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS; ext3_orphan_cleanup(sb, es); EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS; - if (needs_recovery) + if (needs_recovery) { + ext3_mark_recovery_complete(sb, es); ext3_msg(sb, KERN_INFO, "recovery complete"); - ext3_mark_recovery_complete(sb, es); + } ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode", test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal": test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": -- cgit v0.10.2 From 8c111b3f56332a216b18cd57950bdf04ac8f2a98 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Sat, 19 Nov 2011 17:34:29 +0800 Subject: jbd: clear revoked flag on buffers before a new transaction started Currently, we clear revoked flag only when a block is reused. However, this can tigger a false journal error. Consider a situation when a block is used as a meta block and is deleted(revoked) in ordered mode, then the block is allocated as a data block to a file. At this moment, user changes the file's journal mode from ordered to journaled and truncates the file. The block will be considered re-revoked by journal because it has revoked flag still pending from the last transaction and an assertion triggers. We fix the problem by keeping the revoked status more uptodate - we clear revoked flag when switching revoke tables to reflect there is no revoked buffers in current transaction any more. Signed-off-by: Yongqiang Yang Signed-off-by: Jan Kara diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 8799207..f2b9a57 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -392,6 +392,12 @@ void journal_commit_transaction(journal_t *journal) jbd_debug (3, "JBD: commit phase 1\n"); /* + * Clear revoked flag to reflect there is no revoked buffers + * in the next transaction which is going to be started. + */ + journal_clear_buffer_revoked_flags(journal); + + /* * Switch to a new revoke table. */ journal_switch_revoke_table(journal); diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index 305a907..25c713e 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -47,6 +47,10 @@ * overwriting the new data. We don't even need to clear the revoke * bit here. * + * We cache revoke status of a buffer in the current transaction in b_states + * bits. As the name says, revokevalid flag indicates that the cached revoke + * status of a buffer is valid and we can rely on the cached status. + * * Revoke information on buffers is a tri-state value: * * RevokeValid clear: no cached revoke status, need to look it up @@ -479,6 +483,36 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh) return did_revoke; } +/* + * journal_clear_revoked_flags clears revoked flag of buffers in + * revoke table to reflect there is no revoked buffer in the next + * transaction which is going to be started. + */ +void journal_clear_buffer_revoked_flags(journal_t *journal) +{ + struct jbd_revoke_table_s *revoke = journal->j_revoke; + int i = 0; + + for (i = 0; i < revoke->hash_size; i++) { + struct list_head *hash_list; + struct list_head *list_entry; + hash_list = &revoke->hash_table[i]; + + list_for_each(list_entry, hash_list) { + struct jbd_revoke_record_s *record; + struct buffer_head *bh; + record = (struct jbd_revoke_record_s *)list_entry; + bh = __find_get_block(journal->j_fs_dev, + record->blocknr, + journal->j_blocksize); + if (bh) { + clear_buffer_revoked(bh); + __brelse(bh); + } + } + } +} + /* journal_switch_revoke table select j_revoke for next transaction * we do not want to suspend any processing until all revokes are * written -bzzz diff --git a/include/linux/jbd.h b/include/linux/jbd.h index c7acdde..492cc12 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -913,6 +913,7 @@ extern int journal_set_revoke(journal_t *, unsigned int, tid_t); extern int journal_test_revoke(journal_t *, unsigned int, tid_t); extern void journal_clear_revoke(journal_t *); extern void journal_switch_revoke_table(journal_t *journal); +extern void journal_clear_buffer_revoked_flags(journal_t *journal); /* * The log thread user interface: -- cgit v0.10.2 From bcdd0c1600903e9222abfcde28947406020ccb5d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 22 Nov 2011 11:00:20 +0300 Subject: ext3: NULL dereference in ext3_evict_inode() This is an fsfuzzer bug. ->s_journal is set at the end of ext3_load_journal() but we try to use it in the error handling from ext3_get_journal() while it's still NULL. [ 337.039041] BUG: unable to handle kernel NULL pointer dereference at 0000000000000024 [ 337.040380] IP: [] _raw_spin_lock+0x9/0x30 [ 337.041687] PGD 0 [ 337.043118] Oops: 0002 [#1] SMP [ 337.044483] CPU 3 [ 337.044495] Modules linked in: ecb md4 cifs fuse kvm_intel kvm brcmsmac brcmutil crc8 cordic r8169 [last unloaded: scsi_wait_scan] [ 337.047633] [ 337.049259] Pid: 8308, comm: mount Not tainted 3.2.0-rc2-next-20111121+ #24 SAMSUNG ELECTRONICS CO., LTD. RV411/RV511/E3511/S3511 /RV411/RV511/E3511/S3511 [ 337.051064] RIP: 0010:[] [] _raw_spin_lock+0x9/0x30 [ 337.052879] RSP: 0018:ffff8800b1d11ae8 EFLAGS: 00010282 [ 337.054668] RAX: 0000000000000100 RBX: 0000000000000000 RCX: ffff8800b77c2000 [ 337.056400] RDX: ffff8800a97b5c00 RSI: 0000000000000000 RDI: 0000000000000024 [ 337.058099] RBP: ffff8800b1d11ae8 R08: 6000000000000000 R09: e018000000000000 [ 337.059841] R10: ff67366cc2607c03 R11: 00000000110688e6 R12: 0000000000000000 [ 337.061607] R13: 0000000000000000 R14: 0000000000000000 R15: ffff8800a78f06e8 [ 337.063385] FS: 00007f9d95652800(0000) GS:ffff8800b7180000(0000) knlGS:0000000000000000 [ 337.065110] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 337.066801] CR2: 0000000000000024 CR3: 00000000aef2c000 CR4: 00000000000006e0 [ 337.068581] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 337.070321] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 337.072105] Process mount (pid: 8308, threadinfo ffff8800b1d10000, task ffff8800b1d02be0) [ 337.073800] Stack: [ 337.075487] ffff8800b1d11b08 ffffffff811f48cf ffff88007ac9b158 0000000000000000 [ 337.077255] ffff8800b1d11b38 ffffffff8119405d ffff88007ac9b158 ffff88007ac9b250 [ 337.078851] ffffffff8181bda0 ffffffff8181bda0 ffff8800b1d11b68 ffffffff81131e31 [ 337.080284] Call Trace: [ 337.081706] [] log_start_commit+0x1f/0x40 [ 337.083107] [] ext3_evict_inode+0x1fd/0x2a0 [ 337.084490] [] evict+0xa1/0x1a0 [ 337.085857] [] iput+0x101/0x210 [ 337.087220] [] iget_failed+0x21/0x30 [ 337.088581] [] ext3_iget+0x15c/0x450 [ 337.089936] [] ? ext3_rsv_window_add+0x81/0x100 [ 337.091284] [] ext3_get_journal+0x15/0xde [ 337.092641] [] ext3_fill_super+0xf2b/0x1c30 [ 337.093991] [] ? register_shrinker+0x4d/0x60 [ 337.095332] [] mount_bdev+0x1a2/0x1e0 [ 337.096680] [] ? ext3_setup_super+0x210/0x210 [ 337.098026] [] ext3_mount+0x10/0x20 [ 337.099362] [] mount_fs+0x3e/0x1b0 [ 337.100759] [] ? __alloc_percpu+0xb/0x10 [ 337.102330] [] vfs_kern_mount+0x65/0xc0 [ 337.103889] [] do_kern_mount+0x4f/0x100 [ 337.105442] [] do_mount+0x19c/0x890 [ 337.106989] [] ? memdup_user+0x46/0x90 [ 337.108572] [] ? strndup_user+0x53/0x70 [ 337.110114] [] sys_mount+0x8b/0xe0 [ 337.111617] [] system_call_fastpath+0x16/0x1b [ 337.113133] Code: 38 c2 74 0f 66 0f 1f 44 00 00 f3 90 0f b6 03 38 c2 75 f7 48 83 c4 08 5b 5d c3 0f 1f 84 00 00 00 00 00 55 b8 00 01 00 00 48 89 e5 66 0f c1 07 0f b6 d4 38 c2 74 0c 0f 1f 00 f3 90 0f b6 07 38 [ 337.116588] RIP [] _raw_spin_lock+0x9/0x30 [ 337.118260] RSP [ 337.119998] CR2: 0000000000000024 [ 337.188701] ---[ end trace c36d790becac1615 ]--- Signed-off-by: Dan Carpenter Signed-off-by: Jan Kara diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 85fe655..9da9125 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -223,8 +223,12 @@ void ext3_evict_inode (struct inode *inode) * * Note that directories do not have this problem because they don't * use page cache. + * + * The s_journal check handles the case when ext3_get_journal() fails + * and puts the journal inode. */ if (inode->i_nlink && ext3_should_journal_data(inode) && + EXT3_SB(inode->i_sb)->s_journal && (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { tid_t commit_tid = atomic_read(&ei->i_datasync_tid); journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; -- cgit v0.10.2 From d03e1292c46721f60830c5d2e334966472002ed0 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 5 Dec 2011 15:55:11 +0800 Subject: ext3: replace ll_rw_block with other functions ll_rw_block() is deprecated. Thus we replace it with other functions. CC: Jan Kara Signed-off-by: Zheng Liu Signed-off-by: Jan Kara diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 9da9125..a8d3217 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1136,9 +1136,11 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode, bh = ext3_getblk(handle, inode, block, create, err); if (!bh) return bh; - if (buffer_uptodate(bh)) + if (bh_uptodate_or_lock(bh)) return bh; - ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -2068,12 +2070,10 @@ static int ext3_block_truncate_page(struct inode *inode, loff_t from) if (PageUptodate(page)) set_buffer_uptodate(bh); - if (!buffer_uptodate(bh)) { - err = -EIO; - ll_rw_block(READ, 1, &bh); - wait_on_buffer(bh); + if (!bh_uptodate_or_lock(bh)) { + err = bh_submit_read(bh); /* Uhhuh. Read error. Complain and punt. */ - if (!buffer_uptodate(bh)) + if (err) goto unlock; } diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 642dc6d..337c3f3 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -921,9 +921,12 @@ restart: num++; bh = ext3_getblk(NULL, dir, b++, 0, &err); bh_use[ra_max] = bh; - if (bh) - ll_rw_block(READ | REQ_META | REQ_PRIO, - 1, &bh); + if (bh && !bh_uptodate_or_lock(bh)) { + get_bh(bh); + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ | REQ_META | REQ_PRIO, + bh); + } } } if ((bh = bh_use[ra_ptr++]) == NULL) diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 767fa3a..662cef4 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2231,11 +2231,11 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb, goto out_bdev; } journal->j_private = sb; - ll_rw_block(READ, 1, &journal->j_sb_buffer); - wait_on_buffer(journal->j_sb_buffer); - if (!buffer_uptodate(journal->j_sb_buffer)) { - ext3_msg(sb, KERN_ERR, "I/O error on journal device"); - goto out_journal; + if (!bh_uptodate_or_lock(journal->j_sb_buffer)) { + if (bh_submit_read(journal->j_sb_buffer)) { + ext3_msg(sb, KERN_ERR, "I/O error on journal device"); + goto out_journal; + } } if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) { ext3_msg(sb, KERN_ERR, -- cgit v0.10.2 From 1415dd8705394399d59a3df1ab48d149e1e41e77 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 8 Dec 2011 21:13:46 +0100 Subject: ext3: Fix error handling on inode bitmap corruption When insert_inode_locked() fails in ext3_new_inode() it most likely means inode bitmap got corrupted and we allocated again inode which is already in use. Also doing unlock_new_inode() during error recovery is wrong since inode does not have I_NEW set. Fix the problem by jumping to fail: (instead of fail_drop:) which declares filesystem error and does not call unlock_new_inode(). Reviewed-by: Eric Sandeen Signed-off-by: Jan Kara diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 5c866e0..adae962 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -525,8 +525,12 @@ got: if (IS_DIRSYNC(inode)) handle->h_sync = 1; if (insert_inode_locked(inode) < 0) { - err = -EINVAL; - goto fail_drop; + /* + * Likely a bitmap corruption causing inode to be allocated + * twice. + */ + err = -EIO; + goto fail; } spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; -- cgit v0.10.2 From ef6919c283257155def420bd247140e9fd2e9843 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 9 Dec 2011 00:08:58 +0100 Subject: ext2: Fix error handling on inode bitmap corruption When insert_inode_locked() fails in ext2_new_inode() it most likely means inode bitmap got corrupted and we allocated again inode which is already in use. Also doing unlock_new_inode() during error recovery is wrong since the inode does not have I_NEW set. Fix the problem by informing about filesystem error and jumping to fail: (instead of fail_drop:) which doesn't call unlock_new_inode(). Reviewed-by: Eric Sandeen Signed-off-by: Jan Kara diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index c4e81df..78502c1 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c @@ -573,8 +573,11 @@ got: inode->i_generation = sbi->s_next_generation++; spin_unlock(&sbi->s_next_gen_lock); if (insert_inode_locked(inode) < 0) { - err = -EINVAL; - goto fail_drop; + ext2_error(sb, "ext2_new_inode", + "inode number already in use - inode=%lu", + (unsigned long) ino); + err = -EIO; + goto fail; } dquot_initialize(inode); -- cgit v0.10.2 From 7b0b0933a3ff6052addf4d49ea99f75ab27df2d0 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 10 Dec 2011 01:43:33 +0100 Subject: udf: Cleanup calling convention of inode_getblk() inode_getblk() always returned NULL and passed results in its parameters. Make the function return something useful - found block number. Signed-off-by: Jan Kara diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 4fd1d80..1bd2c42 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -53,8 +53,7 @@ static int udf_update_inode(struct inode *, int); static void udf_fill_inode(struct inode *, struct buffer_head *); static int udf_sync_inode(struct inode *inode); static int udf_alloc_i_data(struct inode *inode, size_t size); -static struct buffer_head *inode_getblk(struct inode *, sector_t, int *, - sector_t *, int *); +static sector_t inode_getblk(struct inode *, sector_t, int *, int *); static int8_t udf_insert_aext(struct inode *, struct extent_position, struct kernel_lb_addr, uint32_t); static void udf_split_extents(struct inode *, int *, int, int, @@ -310,7 +309,6 @@ static int udf_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_result, int create) { int err, new; - struct buffer_head *bh; sector_t phys = 0; struct udf_inode_info *iinfo; @@ -323,7 +321,6 @@ static int udf_get_block(struct inode *inode, sector_t block, err = -EIO; new = 0; - bh = NULL; iinfo = UDF_I(inode); down_write(&iinfo->i_data_sem); @@ -332,13 +329,10 @@ static int udf_get_block(struct inode *inode, sector_t block, iinfo->i_next_alloc_goal++; } - err = 0; - bh = inode_getblk(inode, block, &err, &phys, &new); - BUG_ON(bh); - if (err) + phys = inode_getblk(inode, block, &err, &new); + if (!phys) goto abort; - BUG_ON(!phys); if (new) set_buffer_new(bh_result); @@ -547,11 +541,10 @@ out: return err; } -static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, - int *err, sector_t *phys, int *new) +static sector_t inode_getblk(struct inode *inode, sector_t block, + int *err, int *new) { static sector_t last_block; - struct buffer_head *result = NULL; struct kernel_long_ad laarr[EXTENT_MERGE_SIZE]; struct extent_position prev_epos, cur_epos, next_epos; int count = 0, startnum = 0, endnum = 0; @@ -566,6 +559,8 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, int goal = 0, pgoal = iinfo->i_location.logicalBlockNum; int lastblock = 0; + *err = 0; + *new = 0; prev_epos.offset = udf_file_entry_alloc_offset(inode); prev_epos.block = iinfo->i_location; prev_epos.bh = NULL; @@ -635,8 +630,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, brelse(cur_epos.bh); brelse(next_epos.bh); newblock = udf_get_lb_pblock(inode->i_sb, &eloc, offset); - *phys = newblock; - return NULL; + return newblock; } last_block = block; @@ -664,7 +658,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, brelse(cur_epos.bh); brelse(next_epos.bh); *err = ret; - return NULL; + return 0; } c = 0; offset = 0; @@ -729,7 +723,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, if (!newblocknum) { brelse(prev_epos.bh); *err = -ENOSPC; - return NULL; + return 0; } iinfo->i_lenExtents += inode->i_sb->s_blocksize; } @@ -761,10 +755,10 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, newblock = udf_get_pblock(inode->i_sb, newblocknum, iinfo->i_location.partitionReferenceNum, 0); - if (!newblock) - return NULL; - *phys = newblock; - *err = 0; + if (!newblock) { + *err = -EIO; + return 0; + } *new = 1; iinfo->i_next_alloc_block = block; iinfo->i_next_alloc_goal = newblocknum; @@ -775,7 +769,7 @@ static struct buffer_head *inode_getblk(struct inode *inode, sector_t block, else mark_inode_dirty(inode); - return result; + return newblock; } static void udf_split_extents(struct inode *inode, int *c, int offset, -- cgit v0.10.2 From d2eb8c359309ec45d6bf5b147303ab8e13be86ea Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Sat, 10 Dec 2011 02:30:48 +0100 Subject: udf: Fix deadlock when converting file from in-ICB one to normal one During BKL removal in 2.6.38, conversion of files from in-ICB format to normal format got broken. We call ->writepage with i_data_sem held but udf_get_block() also acquires i_data_sem thus creating A-A deadlock. We fix the problem by dropping i_data_sem before calling ->writepage() which is safe since i_mutex still protects us against any changes in the file. Also fix pagelock - i_data_sem lock inversion in udf_expand_file_adinicb() by dropping i_data_sem before calling find_or_create_page(). CC: stable@kernel.org Reported-by: Matthias Matiak Tested-by: Matthias Matiak Reviewed-by: Namjae Jeon Signed-off-by: Jan Kara diff --git a/fs/udf/file.c b/fs/udf/file.c index d8ffa7c..dca0c38 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -125,7 +125,6 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, err = udf_expand_file_adinicb(inode); if (err) { udf_debug("udf_expand_adinicb: err=%d\n", err); - up_write(&iinfo->i_data_sem); return err; } } else { @@ -133,9 +132,10 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, iinfo->i_lenAlloc = pos + count; else iinfo->i_lenAlloc = inode->i_size; + up_write(&iinfo->i_data_sem); } - } - up_write(&iinfo->i_data_sem); + } else + up_write(&iinfo->i_data_sem); retval = generic_file_aio_write(iocb, iov, nr_segs, ppos); if (retval > 0) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 1bd2c42..4f7b1ff 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -150,6 +150,12 @@ const struct address_space_operations udf_aops = { .bmap = udf_bmap, }; +/* + * Expand file stored in ICB to a normal one-block-file + * + * This function requires i_data_sem for writing and releases it. + * This function requires i_mutex held + */ int udf_expand_file_adinicb(struct inode *inode) { struct page *page; @@ -168,9 +174,15 @@ int udf_expand_file_adinicb(struct inode *inode) iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; /* from now on we have normal address_space methods */ inode->i_data.a_ops = &udf_aops; + up_write(&iinfo->i_data_sem); mark_inode_dirty(inode); return 0; } + /* + * Release i_data_sem so that we can lock a page - page lock ranks + * above i_data_sem. i_mutex still protects us against file changes. + */ + up_write(&iinfo->i_data_sem); page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS); if (!page) @@ -186,6 +198,7 @@ int udf_expand_file_adinicb(struct inode *inode) SetPageUptodate(page); kunmap(page); } + down_write(&iinfo->i_data_sem); memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00, iinfo->i_lenAlloc); iinfo->i_lenAlloc = 0; @@ -195,17 +208,20 @@ int udf_expand_file_adinicb(struct inode *inode) iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; /* from now on we have normal address_space methods */ inode->i_data.a_ops = &udf_aops; + up_write(&iinfo->i_data_sem); err = inode->i_data.a_ops->writepage(page, &udf_wbc); if (err) { /* Restore everything back so that we don't lose data... */ lock_page(page); kaddr = kmap(page); + down_write(&iinfo->i_data_sem); memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr, inode->i_size); kunmap(page); unlock_page(page); iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; inode->i_data.a_ops = &udf_adinicb_aops; + up_write(&iinfo->i_data_sem); } page_cache_release(page); mark_inode_dirty(inode); @@ -1105,10 +1121,9 @@ int udf_setsize(struct inode *inode, loff_t newsize) if (bsize < (udf_file_entry_alloc_offset(inode) + newsize)) { err = udf_expand_file_adinicb(inode); - if (err) { - up_write(&iinfo->i_data_sem); + if (err) return err; - } + down_write(&iinfo->i_data_sem); } else iinfo->i_lenAlloc = newsize; } -- cgit v0.10.2 From fef2e9f3301934773e4f1b3cc5c7bffb119346b8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 12 Dec 2011 15:13:50 +0100 Subject: udf: Treat symlink component of type 2 as / MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, we ignore symlink component of type 2. But mkisofs and other OS' seem to treat it as / so do the same for compatibility. Reported-by: "Gábor S." Signed-off-by: Jan Kara diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index b1d4488..d7c6dbe 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -41,10 +41,16 @@ static void udf_pc_to_char(struct super_block *sb, unsigned char *from, pc = (struct pathComponent *)(from + elen); switch (pc->componentType) { case 1: - if (pc->lengthComponentIdent == 0) { - p = to; - *p++ = '/'; - } + /* + * Symlink points to some place which should be agreed + * upon between originator and receiver of the media. Ignore. + */ + if (pc->lengthComponentIdent > 0) + break; + /* Fall through */ + case 2: + p = to; + *p++ = '/'; break; case 3: memcpy(p, "../", 3); -- cgit v0.10.2 From a06d789b424190e9f59da391681f908486db2554 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Dec 2011 17:35:34 +0100 Subject: reiserfs: Fix quota mount option parsing When jqfmt mount option is not specified on remount, we mistakenly clear s_jquota_fmt value stored in superblock. Fix the problem. CC: stable@kernel.org CC: reiserfs-devel@vger.kernel.org Signed-off-by: Jan Kara diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 14363b9..f9eaa4a 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1164,7 +1164,8 @@ static void handle_quota_files(struct super_block *s, char **qf_names, kfree(REISERFS_SB(s)->s_qf_names[i]); REISERFS_SB(s)->s_qf_names[i] = qf_names[i]; } - REISERFS_SB(s)->s_jquota_fmt = *qfmt; + if (*qfmt) + REISERFS_SB(s)->s_jquota_fmt = *qfmt; } #endif -- cgit v0.10.2 From a9e36da655e54545c3289b2a0700b5c443de0edd Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 21 Dec 2011 21:18:43 +0100 Subject: reiserfs: Force inode evictions before umount to avoid crash This patch fixes a crash in reiserfs_delete_xattrs during umount. When shrink_dcache_for_umount clears the dcache from generic_shutdown_super, delayed evictions are forced to disk. If an evicted inode has extended attributes associated with it, it will need to walk the xattr tree to locate and remove them. But since shrink_dcache_for_umount will BUG if it encounters active dentries, the xattr tree must be released before it's called or it will crash during every umount. This patch forces the evictions to occur before generic_shutdown_super by calling shrink_dcache_sb first. The additional evictions caused by the removal of each associated xattr file and dir will be automatically handled as they're added to the LRU list. CC: reiserfs-devel@vger.kernel.org CC: stable@kernel.org Signed-off-by: Jeff Mahoney Signed-off-by: Jan Kara diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f9eaa4a..5e3527b 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -453,16 +453,20 @@ int remove_save_link(struct inode *inode, int truncate) static void reiserfs_kill_sb(struct super_block *s) { if (REISERFS_SB(s)) { - if (REISERFS_SB(s)->xattr_root) { - d_invalidate(REISERFS_SB(s)->xattr_root); - dput(REISERFS_SB(s)->xattr_root); - REISERFS_SB(s)->xattr_root = NULL; - } - if (REISERFS_SB(s)->priv_root) { - d_invalidate(REISERFS_SB(s)->priv_root); - dput(REISERFS_SB(s)->priv_root); - REISERFS_SB(s)->priv_root = NULL; - } + /* + * Force any pending inode evictions to occur now. Any + * inodes to be removed that have extended attributes + * associated with them need to clean them up before + * we can release the extended attribute root dentries. + * shrink_dcache_for_umount will BUG if we don't release + * those before it's called so ->put_super is too late. + */ + shrink_dcache_sb(s); + + dput(REISERFS_SB(s)->xattr_root); + REISERFS_SB(s)->xattr_root = NULL; + dput(REISERFS_SB(s)->priv_root); + REISERFS_SB(s)->priv_root = NULL; } kill_block_super(s); -- cgit v0.10.2 From 0048278552e9752fd578c3d8deee756987c10873 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 22 Dec 2011 14:52:21 +0100 Subject: jbd: Remove j_barrier mutex j_barrier mutex is used for serializing different journal lock operations. The problem with it is that e.g. FIFREEZE ioctl results in process leaving kernel with j_barrier mutex held which makes lockdep freak out. Also hibernation code wants to freeze filesystem but it cannot do so because it then cannot hibernate the system because of mutex being locked. So we remove j_barrier mutex and use direct wait on j_barrier_count instead. Since locking journal is a rare operation we don't have to care about fairness or such things. CC: Andrew Morton Acked-by: Joel Becker Signed-off-by: Jan Kara diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index fea8dd6..1656dc2 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -721,7 +721,6 @@ static journal_t * journal_init_common (void) init_waitqueue_head(&journal->j_wait_checkpoint); init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); - mutex_init(&journal->j_barrier); mutex_init(&journal->j_checkpoint_mutex); spin_lock_init(&journal->j_revoke_lock); spin_lock_init(&journal->j_list_lock); diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 7e59c6e..7fce94b 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -426,17 +426,34 @@ int journal_restart(handle_t *handle, int nblocks) * void journal_lock_updates () - establish a transaction barrier. * @journal: Journal to establish a barrier on. * - * This locks out any further updates from being started, and blocks - * until all existing updates have completed, returning only once the - * journal is in a quiescent state with no updates running. - * - * The journal lock should not be held on entry. + * This locks out any further updates from being started, and blocks until all + * existing updates have completed, returning only once the journal is in a + * quiescent state with no updates running. + * + * We do not use simple mutex for synchronization as there are syscalls which + * want to return with filesystem locked and that trips up lockdep. Also + * hibernate needs to lock filesystem but locked mutex then blocks hibernation. + * Since locking filesystem is rare operation, we use simple counter and + * waitqueue for locking. */ void journal_lock_updates(journal_t *journal) { DEFINE_WAIT(wait); +wait: + /* Wait for previous locked operation to finish */ + wait_event(journal->j_wait_transaction_locked, + journal->j_barrier_count == 0); + spin_lock(&journal->j_state_lock); + /* + * Check reliably under the lock whether we are the ones winning the race + * and locking the journal + */ + if (journal->j_barrier_count > 0) { + spin_unlock(&journal->j_state_lock); + goto wait; + } ++journal->j_barrier_count; /* Wait until there are no running updates */ @@ -460,14 +477,6 @@ void journal_lock_updates(journal_t *journal) spin_lock(&journal->j_state_lock); } spin_unlock(&journal->j_state_lock); - - /* - * We have now established a barrier against other normal updates, but - * we also need to barrier against other journal_lock_updates() calls - * to make sure that we serialise special journal-locked operations - * too. - */ - mutex_lock(&journal->j_barrier); } /** @@ -475,14 +484,11 @@ void journal_lock_updates(journal_t *journal) * @journal: Journal to release the barrier on. * * Release a transaction barrier obtained with journal_lock_updates(). - * - * Should be called without the journal lock held. */ void journal_unlock_updates (journal_t *journal) { J_ASSERT(journal->j_barrier_count != 0); - mutex_unlock(&journal->j_barrier); spin_lock(&journal->j_state_lock); --journal->j_barrier_count; spin_unlock(&journal->j_state_lock); diff --git a/include/linux/jbd.h b/include/linux/jbd.h index 492cc12..d211732 100644 --- a/include/linux/jbd.h +++ b/include/linux/jbd.h @@ -497,7 +497,6 @@ struct transaction_s * @j_format_version: Version of the superblock format * @j_state_lock: Protect the various scalars in the journal * @j_barrier_count: Number of processes waiting to create a barrier lock - * @j_barrier: The barrier lock itself * @j_running_transaction: The current running transaction.. * @j_committing_transaction: the transaction we are pushing to disk * @j_checkpoint_transactions: a linked circular list of all transactions @@ -580,9 +579,6 @@ struct journal_s */ int j_barrier_count; - /* The barrier lock itself */ - struct mutex j_barrier; - /* * Transactions: The current running transaction... * [j_state_lock] [caller holding open handle] -- cgit v0.10.2 From 33c104d415e92a51aaf638dc3d93920cfa601e5c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 22 Dec 2011 16:49:05 +0100 Subject: ext3: Don't warn from writepage when readonly inode is spotted after error WARN_ON_ONCE(IS_RDONLY(inode)) tends to trip when filesystem hits error and is remounted read-only. This unnecessarily scares users (well, they should be scared because of filesystem error, but the stack trace distracts them from the right source of their fear ;-). We could as well just remove the WARN_ON but it's not hard to fix it to not trip on filesystem with errors and not use more cycles in the common case so that's what we do. CC: stable@kernel.org Signed-off-by: Jan Kara diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index a8d3217..fbf6599 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1623,7 +1623,13 @@ static int ext3_ordered_writepage(struct page *page, int err; J_ASSERT(PageLocked(page)); - WARN_ON_ONCE(IS_RDONLY(inode)); + /* + * We don't want to warn for emergency remount. The condition is + * ordered to avoid dereferencing inode->i_sb in non-error case to + * avoid slow-downs. + */ + WARN_ON_ONCE(IS_RDONLY(inode) && + !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); /* * We give up here if we're reentered, because it might be for a @@ -1698,7 +1704,13 @@ static int ext3_writeback_writepage(struct page *page, int err; J_ASSERT(PageLocked(page)); - WARN_ON_ONCE(IS_RDONLY(inode)); + /* + * We don't want to warn for emergency remount. The condition is + * ordered to avoid dereferencing inode->i_sb in non-error case to + * avoid slow-downs. + */ + WARN_ON_ONCE(IS_RDONLY(inode) && + !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); if (ext3_journal_current_handle()) goto out_fail; @@ -1741,7 +1753,13 @@ static int ext3_journalled_writepage(struct page *page, int err; J_ASSERT(PageLocked(page)); - WARN_ON_ONCE(IS_RDONLY(inode)); + /* + * We don't want to warn for emergency remount. The condition is + * ordered to avoid dereferencing inode->i_sb in non-error case to + * avoid slow-downs. + */ + WARN_ON_ONCE(IS_RDONLY(inode) && + !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS)); if (ext3_journal_current_handle()) goto no_write; -- cgit v0.10.2 From 853a0c25baf96b028de1654bea1e0c8857eadf3d Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 23 Dec 2011 11:53:07 +0100 Subject: udf: Mark LVID buffer as uptodate before marking it dirty When we hit EIO while writing LVID, the buffer uptodate bit is cleared. This then results in an anoying warning from mark_buffer_dirty() when we write the buffer again. So just set uptodate flag unconditionally. Reviewed-by: Namjae Jeon Signed-off-by: Jan Kara diff --git a/fs/udf/super.c b/fs/udf/super.c index e185253..87cb24a 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -1799,6 +1799,12 @@ static void udf_close_lvid(struct super_block *sb) le16_to_cpu(lvid->descTag.descCRCLength))); lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag); + /* + * We set buffer uptodate unconditionally here to avoid spurious + * warnings from mark_buffer_dirty() when previous EIO has marked + * the buffer as !uptodate + */ + set_buffer_uptodate(bh); mark_buffer_dirty(bh); sbi->s_lvid_dirty = 0; mutex_unlock(&sbi->s_alloc_mutex); -- cgit v0.10.2 From 6c2155b9cc5a193e85194bbeaae2e2e4512dd597 Mon Sep 17 00:00:00 2001 From: Djalal Harouni Date: Tue, 3 Jan 2012 02:31:52 +0100 Subject: ext{3,4}: Fix potential race when setversion ioctl updates inode The EXT{3,4}_IOC_SETVERSION ioctl() updates i_ctime and i_generation without i_mutex. This can lead to a race with the other operations that update i_ctime. This is not a big issue but let's make the ioctl consistent with how we handle e.g. other timestamp updates and use i_mutex to protect inode changes. Signed-off-by: Djalal Harouni Signed-off-by: Jan Kara diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index ba1b54e..e7b2ed9 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -134,10 +134,11 @@ flags_out: goto setversion_out; } + mutex_lock(&inode->i_mutex); handle = ext3_journal_start(inode, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); - goto setversion_out; + goto unlock_out; } err = ext3_reserve_inode_write(handle, inode, &iloc); if (err == 0) { @@ -146,6 +147,9 @@ flags_out: err = ext3_mark_iloc_dirty(handle, inode, &iloc); } ext3_journal_stop(handle); + +unlock_out: + mutex_unlock(&inode->i_mutex); setversion_out: mnt_drop_write(filp->f_path.mnt); return err; diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a567968..46a8de6 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -158,10 +158,11 @@ flags_out: goto setversion_out; } + mutex_lock(&inode->i_mutex); handle = ext4_journal_start(inode, 1); if (IS_ERR(handle)) { err = PTR_ERR(handle); - goto setversion_out; + goto unlock_out; } err = ext4_reserve_inode_write(handle, inode, &iloc); if (err == 0) { @@ -170,6 +171,9 @@ flags_out: err = ext4_mark_iloc_dirty(handle, inode, &iloc); } ext4_journal_stop(handle); + +unlock_out: + mutex_unlock(&inode->i_mutex); setversion_out: mnt_drop_write(filp->f_path.mnt); return err; -- cgit v0.10.2 From 302bf2f3259948c93361d501b04a5ed69c3bd4f8 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 4 Jan 2012 15:59:47 -0500 Subject: ext2/3/4: delete unneeded includes of module.h Delete any instances of include module.h that were not strictly required. In the case of ext2, the declaration of MODULE_LICENSE etc. were in inode.c but the module_init/exit were in super.c, so relocate the MODULE_LICENCE/AUTHOR block to super.c which makes it consistent with ext3 and ext4 at the same time. Signed-off-by: Paul Gortmaker Signed-off-by: Jan Kara diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 91a6945..740cad8 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include @@ -36,10 +35,6 @@ #include "acl.h" #include "xip.h" -MODULE_AUTHOR("Remy Card and others"); -MODULE_DESCRIPTION("Second Extended Filesystem"); -MODULE_LICENSE("GPL"); - static int __ext2_write_inode(struct inode *inode, int do_sync); /* diff --git a/fs/ext2/super.c b/fs/ext2/super.c index bd8ac16..9e7e203 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1521,5 +1521,8 @@ static void __exit exit_ext2_fs(void) exit_ext2_xattr(); } +MODULE_AUTHOR("Remy Card and others"); +MODULE_DESCRIPTION("Second Extended Filesystem"); +MODULE_LICENSE("GPL"); module_init(init_ext2_fs) module_exit(exit_ext2_fs) diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index d27b71f..6dcafc7 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -54,7 +54,6 @@ */ #include -#include #include #include #include diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index c922adc..be7a8d0 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c @@ -3,7 +3,6 @@ * Handler for storing security labels as extended attributes. */ -#include #include #include #include diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c index 667e46a..2989467 100644 --- a/fs/ext2/xattr_trusted.c +++ b/fs/ext2/xattr_trusted.c @@ -5,7 +5,6 @@ * Copyright (C) 2003 by Andreas Gruenbacher, */ -#include #include #include #include diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c index 099d20f..f470e44 100644 --- a/fs/ext2/xattr_user.c +++ b/fs/ext2/xattr_user.c @@ -6,7 +6,6 @@ */ #include -#include #include #include "ext2.h" #include "xattr.h" diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index fbf6599..261979f 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -22,7 +22,6 @@ * Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000 */ -#include #include #include #include diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c index 3c218b8..ea26f2a 100644 --- a/fs/ext3/xattr_security.c +++ b/fs/ext3/xattr_security.c @@ -3,7 +3,6 @@ * Handler for storing security labels as extended attributes. */ -#include #include #include #include diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c index dc8edda..2526a88 100644 --- a/fs/ext3/xattr_trusted.c +++ b/fs/ext3/xattr_trusted.c @@ -5,7 +5,6 @@ * Copyright (C) 2003 by Andreas Gruenbacher, */ -#include #include #include #include diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c index 7a32197..b32e473 100644 --- a/fs/ext3/xattr_user.c +++ b/fs/ext3/xattr_user.c @@ -5,7 +5,6 @@ * Copyright (C) 2001 by Andreas Gruenbacher, */ -#include #include #include #include diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c index 8efb2f0..3f11656 100644 --- a/fs/ext4/block_validity.c +++ b/fs/ext4/block_validity.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 61fa9e1..130ffe4 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -29,7 +29,6 @@ * - smart tree reduction */ -#include #include #include #include diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 3cfc73f..830e1b2 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -20,7 +20,6 @@ * (sct@redhat.com), 1993, 1998 */ -#include #include "ext4_jbd2.h" #include "truncate.h" diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 240f6e2..e538580 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -18,7 +18,6 @@ * Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000 */ -#include #include #include #include diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 16ac228..e7d6bb0 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -12,7 +12,6 @@ * */ -#include #include #include "ext4_jbd2.h" diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 7ce1d0b..b175853 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -6,7 +6,6 @@ * Written by Theodore Ts'o, 2010. */ -#include #include #include #include diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index 34e4350..b60f9f8 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c @@ -3,7 +3,6 @@ * Handler for storing security labels as extended attributes. */ -#include #include #include #include diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c index 37e6ebc..95f1f4a 100644 --- a/fs/ext4/xattr_trusted.c +++ b/fs/ext4/xattr_trusted.c @@ -5,7 +5,6 @@ * Copyright (C) 2003 by Andreas Gruenbacher, */ -#include #include #include #include diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c index 98c3753..0edb7611 100644 --- a/fs/ext4/xattr_user.c +++ b/fs/ext4/xattr_user.c @@ -5,7 +5,6 @@ * Copyright (C) 2001 by Andreas Gruenbacher, */ -#include #include #include #include "ext4_jbd2.h" -- cgit v0.10.2