From 69d75671d985ccdb291e4d19ddfdecd12440e857 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 12 Sep 2013 22:00:15 +0200 Subject: udf: Fortify LVID loading A user has reported an oops in udf_statfs() that was caused by numOfPartitions entry in LVID structure being corrupted. Fix the problem by verifying whether numOfPartitions makes sense at least to the extent that LVID fits into a single block as it should. Reported-by: Juergen Weigert Signed-off-by: Jan Kara diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index 7e5aae4..6eaf5ed 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -30,18 +30,17 @@ void udf_free_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; struct udf_sb_info *sbi = UDF_SB(sb); + struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb); - mutex_lock(&sbi->s_alloc_mutex); - if (sbi->s_lvid_bh) { - struct logicalVolIntegrityDescImpUse *lvidiu = - udf_sb_lvidiu(sbi); + if (lvidiu) { + mutex_lock(&sbi->s_alloc_mutex); if (S_ISDIR(inode->i_mode)) le32_add_cpu(&lvidiu->numDirs, -1); else le32_add_cpu(&lvidiu->numFiles, -1); udf_updated_lvid(sb); + mutex_unlock(&sbi->s_alloc_mutex); } - mutex_unlock(&sbi->s_alloc_mutex); udf_free_blocks(sb, NULL, &UDF_I(inode)->i_location, 0, 1); } @@ -55,6 +54,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) uint32_t start = UDF_I(dir)->i_location.logicalBlockNum; struct udf_inode_info *iinfo; struct udf_inode_info *dinfo = UDF_I(dir); + struct logicalVolIntegrityDescImpUse *lvidiu; inode = new_inode(sb); @@ -92,12 +92,10 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode, int *err) return NULL; } - if (sbi->s_lvid_bh) { - struct logicalVolIntegrityDescImpUse *lvidiu; - + lvidiu = udf_sb_lvidiu(sb); + if (lvidiu) { iinfo->i_unique = lvid_get_unique_id(sb); mutex_lock(&sbi->s_alloc_mutex); - lvidiu = udf_sb_lvidiu(sbi); if (S_ISDIR(mode)) le32_add_cpu(&lvidiu->numDirs, 1); else diff --git a/fs/udf/super.c b/fs/udf/super.c index 839a2ba..9121938 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -94,13 +94,25 @@ static unsigned int udf_count_free(struct super_block *); static int udf_statfs(struct dentry *, struct kstatfs *); static int udf_show_options(struct seq_file *, struct dentry *); -struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi) +struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb) { - struct logicalVolIntegrityDesc *lvid = - (struct logicalVolIntegrityDesc *)sbi->s_lvid_bh->b_data; - __u32 number_of_partitions = le32_to_cpu(lvid->numOfPartitions); - __u32 offset = number_of_partitions * 2 * - sizeof(uint32_t)/sizeof(uint8_t); + struct logicalVolIntegrityDesc *lvid; + unsigned int partnum; + unsigned int offset; + + if (!UDF_SB(sb)->s_lvid_bh) + return NULL; + lvid = (struct logicalVolIntegrityDesc *)UDF_SB(sb)->s_lvid_bh->b_data; + partnum = le32_to_cpu(lvid->numOfPartitions); + if ((sb->s_blocksize - sizeof(struct logicalVolIntegrityDescImpUse) - + offsetof(struct logicalVolIntegrityDesc, impUse)) / + (2 * sizeof(uint32_t)) < partnum) { + udf_err(sb, "Logical volume integrity descriptor corrupted " + "(numOfPartitions = %u)!\n", partnum); + return NULL; + } + /* The offset is to skip freeSpaceTable and sizeTable arrays */ + offset = partnum * 2 * sizeof(uint32_t); return (struct logicalVolIntegrityDescImpUse *)&(lvid->impUse[offset]); } @@ -629,9 +641,10 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) struct udf_options uopt; struct udf_sb_info *sbi = UDF_SB(sb); int error = 0; + struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb); - if (sbi->s_lvid_bh) { - int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev); + if (lvidiu) { + int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev); if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY)) return -EACCES; } @@ -1905,11 +1918,12 @@ static void udf_open_lvid(struct super_block *sb) if (!bh) return; - - mutex_lock(&sbi->s_alloc_mutex); lvid = (struct logicalVolIntegrityDesc *)bh->b_data; - lvidiu = udf_sb_lvidiu(sbi); + lvidiu = udf_sb_lvidiu(sb); + if (!lvidiu) + return; + mutex_lock(&sbi->s_alloc_mutex); lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; udf_time_to_disk_stamp(&lvid->recordingDateAndTime, @@ -1937,10 +1951,12 @@ static void udf_close_lvid(struct super_block *sb) if (!bh) return; + lvid = (struct logicalVolIntegrityDesc *)bh->b_data; + lvidiu = udf_sb_lvidiu(sb); + if (!lvidiu) + return; mutex_lock(&sbi->s_alloc_mutex); - lvid = (struct logicalVolIntegrityDesc *)bh->b_data; - lvidiu = udf_sb_lvidiu(sbi); lvidiu->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX; lvidiu->impIdent.identSuffix[1] = UDF_OS_ID_LINUX; udf_time_to_disk_stamp(&lvid->recordingDateAndTime, CURRENT_TIME); @@ -2093,15 +2109,19 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (sbi->s_lvid_bh) { struct logicalVolIntegrityDescImpUse *lvidiu = - udf_sb_lvidiu(sbi); - uint16_t minUDFReadRev = le16_to_cpu(lvidiu->minUDFReadRev); - uint16_t minUDFWriteRev = le16_to_cpu(lvidiu->minUDFWriteRev); - /* uint16_t maxUDFWriteRev = - le16_to_cpu(lvidiu->maxUDFWriteRev); */ + udf_sb_lvidiu(sb); + uint16_t minUDFReadRev; + uint16_t minUDFWriteRev; + if (!lvidiu) { + ret = -EINVAL; + goto error_out; + } + minUDFReadRev = le16_to_cpu(lvidiu->minUDFReadRev); + minUDFWriteRev = le16_to_cpu(lvidiu->minUDFWriteRev); if (minUDFReadRev > UDF_MAX_READ_VERSION) { udf_err(sb, "minUDFReadRev=%x (max is %x)\n", - le16_to_cpu(lvidiu->minUDFReadRev), + minUDFReadRev, UDF_MAX_READ_VERSION); ret = -EINVAL; goto error_out; @@ -2265,11 +2285,7 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf) struct logicalVolIntegrityDescImpUse *lvidiu; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); - if (sbi->s_lvid_bh != NULL) - lvidiu = udf_sb_lvidiu(sbi); - else - lvidiu = NULL; - + lvidiu = udf_sb_lvidiu(sb); buf->f_type = UDF_SUPER_MAGIC; buf->f_bsize = sb->s_blocksize; buf->f_blocks = sbi->s_partmaps[sbi->s_partition].s_partition_len; diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index ed401e9..1f32c7b 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -162,7 +162,7 @@ static inline struct udf_sb_info *UDF_SB(struct super_block *sb) return sb->s_fs_info; } -struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi); +struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct super_block *sb); int udf_compute_nr_groups(struct super_block *sb, u32 partition); -- cgit v0.10.2 From 7bc9cc07ee5bbac58bab88e8a5d6e32785f8fd32 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 23 Sep 2013 16:50:28 -0400 Subject: reiserfs: remove useless flush_old_journal_lists Commit a3172027 introduced test_transaction as a requirement for flushing old lists -- but it can never return 1 unless the transaction has already been flushed. As a result, we have a routine that iterates the j_realblocks list but doesn't actually do anything. Since it's been this way since 2006 and the latency numbers were what Chris expected, let's just rip it out. Signed-off-by: Jeff Mahoney Signed-off-by: Jan Kara diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 73feacc4..7a37473 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1163,21 +1163,6 @@ static struct reiserfs_journal_list *find_newer_jl_for_cn(struct return NULL; } -static int newer_jl_done(struct reiserfs_journal_cnode *cn) -{ - struct super_block *sb = cn->sb; - b_blocknr_t blocknr = cn->blocknr; - - cn = cn->hprev; - while (cn) { - if (cn->sb == sb && cn->blocknr == blocknr && cn->jlist && - atomic_read(&cn->jlist->j_commit_left) != 0) - return 0; - cn = cn->hprev; - } - return 1; -} - static void remove_journal_hash(struct super_block *, struct reiserfs_journal_cnode **, struct reiserfs_journal_list *, unsigned long, @@ -1593,31 +1578,6 @@ static int flush_journal_list(struct super_block *s, return err; } -static int test_transaction(struct super_block *s, - struct reiserfs_journal_list *jl) -{ - struct reiserfs_journal_cnode *cn; - - if (jl->j_len == 0 || atomic_read(&jl->j_nonzerolen) == 0) - return 1; - - cn = jl->j_realblock; - while (cn) { - /* if the blocknr == 0, this has been cleared from the hash, - ** skip it - */ - if (cn->blocknr == 0) { - goto next; - } - if (cn->bh && !newer_jl_done(cn)) - return 0; - next: - cn = cn->next; - cond_resched(); - } - return 0; -} - static int write_one_transaction(struct super_block *s, struct reiserfs_journal_list *jl, struct buffer_chunk *chunk) @@ -3868,27 +3828,6 @@ int reiserfs_prepare_for_journal(struct super_block *sb, return 1; } -static void flush_old_journal_lists(struct super_block *s) -{ - struct reiserfs_journal *journal = SB_JOURNAL(s); - struct reiserfs_journal_list *jl; - struct list_head *entry; - time_t now = get_seconds(); - - while (!list_empty(&journal->j_journal_list)) { - entry = journal->j_journal_list.next; - jl = JOURNAL_LIST_ENTRY(entry); - /* this check should always be run, to send old lists to disk */ - if (jl->j_timestamp < (now - (JOURNAL_MAX_TRANS_AGE * 4)) && - atomic_read(&jl->j_commit_left) == 0 && - test_transaction(s, jl)) { - flush_used_journal_lists(s, jl); - } else { - break; - } - } -} - /* ** long and ugly. If flush, will not return until all commit ** blocks and all real buffers in the trans are on disk. @@ -4232,7 +4171,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, } } } - flush_old_journal_lists(sb); journal->j_current_jl->j_list_bitmap = get_list_bitmap(sb, journal->j_current_jl); -- cgit v0.10.2 From 721a769c034204e8e5f6ca4ecbff249e0225333b Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Mon, 23 Sep 2013 16:50:42 -0400 Subject: reiserfs: fix race with flush_used_journal_lists and flush_journal_list There are two locks involved in managing the journal lists. The general reiserfs_write_lock and the journal->j_flush_mutex. While flush_journal_list is sleeping to acquire the j_flush_mutex or to submit a block for write, it will drop the write lock. This allows another thread to acquire the write lock and ultimately call flush_used_journal_lists to traverse the list of journal lists and select one for flushing. It can select the journal_list that has just had flush_journal_list called on it in the original thread and call it again with the same journal_list. The second thread then drops the write lock to acquire j_flush_mutex and the first thread reacquires it and continues execution and eventually clears and frees the journal list before dropping j_flush_mutex and returning. The second thread acquires j_flush_mutex and ends up operating on a journal_list that has already been released. If the memory hasn't been reused, we'll soon after hit a BUG_ON because the transaction id has already been cleared. If it's been reused, we'll crash in other fun ways. Since flush_journal_list will synchronize on j_flush_mutex, we can fix the race by taking a proper reference in flush_used_journal_lists and checking to see if it's still valid after the mutex is taken. It's safe to iterate the list of journal lists and pick a list with just the write lock as long as a reference is taken on the journal list before we drop the lock. We already have code to handle whether a transaction has been flushed already so we can use that to handle the race and get rid of the trans_id BUG_ON. Signed-off-by: Jeff Mahoney Signed-off-by: Jan Kara diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 7a37473..fd77703 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1338,7 +1338,6 @@ static int flush_journal_list(struct super_block *s, reiserfs_warning(s, "clm-2048", "called with wcount %d", atomic_read(&journal->j_wcount)); } - BUG_ON(jl->j_trans_id == 0); /* if flushall == 0, the lock is already held */ if (flushall) { @@ -1765,6 +1764,8 @@ static int flush_used_journal_lists(struct super_block *s, break; tjl = JOURNAL_LIST_ENTRY(tjl->j_list.next); } + get_journal_list(jl); + get_journal_list(flush_jl); /* try to find a group of blocks we can flush across all the ** transactions, but only bother if we've actually spanned ** across multiple lists @@ -1773,6 +1774,8 @@ static int flush_used_journal_lists(struct super_block *s, ret = kupdate_transactions(s, jl, &tjl, &trans_id, len, i); } flush_journal_list(s, flush_jl, 1); + put_journal_list(s, flush_jl); + put_journal_list(s, jl); return 0; } -- cgit v0.10.2