From 1bbfc848a915081237660d898bbcf50e4a8fc59f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 21 Mar 2011 16:26:42 +0200 Subject: UBIFS: make debugfs files non-seekable Turn the debufs files UBIFS maintains into non-seekable. Indeed, none of them is supposed to be seek'ed. Do this by making the '.lseek()' handler to be 'no_llseek()' and by using 'nonseekable_open()' in the '.open()' operation. This does mean an API break but this debugging API is only used by a couple of test scripts which do not rely in the 'llseek()' operation. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 004d374..da9ecdd 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2784,7 +2784,7 @@ void dbg_debugfs_exit(void) static int open_debugfs_file(struct inode *inode, struct file *file) { file->private_data = inode->i_private; - return 0; + return nonseekable_open(inode, file); } static ssize_t write_debugfs_file(struct file *file, const char __user *buf, @@ -2814,7 +2814,7 @@ static const struct file_operations dfs_fops = { .open = open_debugfs_file, .write = write_debugfs_file, .owner = THIS_MODULE, - .llseek = default_llseek, + .llseek = no_llseek, }; /** -- cgit v0.10.2 From c43615702f9c5957981693a4d966ed81d8fc1ecc Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 25 Mar 2011 15:27:40 +0200 Subject: UBIFS: fix minor stylistic issues Fix several minor stylistic issues: * lines longer than 80 characters * space before closing parenthesis ')' * spaces in the indentations Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index da9ecdd..fa65b29 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2421,7 +2421,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) hashb = key_block(c, &sb->key); if (hasha > hashb) { - ubifs_err("larger hash %u goes before %u", hasha, hashb); + ubifs_err("larger hash %u goes before %u", + hasha, hashb); goto error_dump; } } diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index b286db7..3f254e3 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -971,11 +971,11 @@ static int do_writepage(struct page *page, int len) * the page locked, and it locks @ui_mutex. However, write-back does take inode * @i_mutex, which means other VFS operations may be run on this inode at the * same time. And the problematic one is truncation to smaller size, from where - * we have to call 'truncate_setsize()', which first changes @inode->i_size, then - * drops the truncated pages. And while dropping the pages, it takes the page - * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with - * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This - * means that @inode->i_size is changed while @ui_mutex is unlocked. + * we have to call 'truncate_setsize()', which first changes @inode->i_size, + * then drops the truncated pages. And while dropping the pages, it takes the + * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' + * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. + * This means that @inode->i_size is changed while @ui_mutex is unlocked. * * XXX(truncate): with the new truncate sequence this is not true anymore, * and the calls to truncate_setsize can be move around freely. They should @@ -1432,10 +1432,11 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) } /* - * mmap()d file has taken write protection fault and is being made - * writable. UBIFS must ensure page is budgeted for. + * mmap()d file has taken write protection fault and is being made writable. + * UBIFS must ensure page is budgeted for. */ -static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) +static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) { struct page *page = vmf->page; struct inode *inode = vma->vm_file->f_path.dentry->d_inode; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index dfd168b..6cc0931 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -393,7 +393,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) ubifs_assert(wbuf->size % c->min_io_size == 0); ubifs_assert(!c->ro_media && !c->ro_mount); if (c->leb_size - wbuf->offs >= c->max_write_size) - ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); + ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); if (c->ro_error) return -EROFS; @@ -588,7 +588,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); ubifs_assert(!c->ro_media && !c->ro_mount); if (c->leb_size - wbuf->offs >= c->max_write_size) - ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); + ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { err = -ENOSPC; diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 0c9c69b..9fe376a 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -586,7 +586,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, if (nnode->nbranch[iip].lnum) break; } - } while (iip >= UBIFS_LPT_FANOUT); + } while (iip >= UBIFS_LPT_FANOUT); /* Go right */ nnode = ubifs_get_nnode(c, nnode, iip); diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 09df318..bd644bf 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -673,7 +673,8 @@ static int kill_orphans(struct ubifs_info *c) sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); if (IS_ERR(sleb)) { if (PTR_ERR(sleb) == -EUCLEAN) - sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); + sleb = ubifs_recover_leb(c, lnum, 0, + c->sbuf, 0); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); break; diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 04ad07f..a089cca 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1144,7 +1144,7 @@ static int check_free_space(struct ubifs_info *c) { ubifs_assert(c->dark_wm > 0); if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { - ubifs_err("insufficient free space to mount in read/write mode"); + ubifs_err("insufficient free space to mount in R/W mode"); dbg_dump_budg(c); dbg_dump_lprops(c); return -ENOSPC; @@ -1456,7 +1456,7 @@ static int mount_ubifs(struct ubifs_info *c) dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", - UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, + UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); dbg_msg("dead watermark: %d", c->dead_wm); dbg_msg("dark watermark: %d", c->dark_wm); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index de48597..8119b1f 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -2557,11 +2557,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, if (err) { /* Ensure the znode is dirtied */ if (znode->cnext || !ubifs_zn_dirty(znode)) { - znode = dirty_cow_bottom_up(c, znode); - if (IS_ERR(znode)) { - err = PTR_ERR(znode); - goto out_unlock; - } + znode = dirty_cow_bottom_up(c, znode); + if (IS_ERR(znode)) { + err = PTR_ERR(znode); + goto out_unlock; + } } err = tnc_delete(c, znode, n); } diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 8c40ad3..d4c07a9 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -389,9 +389,9 @@ struct ubifs_gced_idx_leb { * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot * make sure @inode->i_size is always changed under @ui_mutex, because it - * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock - * with 'ubifs_writepage()' (see file.c). All the other inode fields are - * changed under @ui_mutex, so they do not need "shadow" fields. Note, one + * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would + * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields + * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one * could consider to rework locking and base it on "shadow" fields. */ struct ubifs_inode { -- cgit v0.10.2 From cc64f774b4acd4954abe54f5919f50d78aba1e5f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 25 Mar 2011 15:37:35 +0200 Subject: UBIFS: use __packed instead of __attribute__((packed)) There was an attempt to standartize various "__attribute__" and other macros in order to have potentially portable and more consistent code, see commit 82ddcb040570411fc2d421d96b3e69711c670328. Note, that commit refers Rober Love's blog post, but the URL is broken, the valid URL is: http://blog.rlove.org/2005/10/with-little-help-from-your-compiler.html Moreover, nowadays checkpatch.pl warns about using __attribute__((packed)): "WARNING: __packed is preferred over __attribute__((packed))" It is not a big deal for UBIFS to use __packed, so let's do it. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 191ca78..b922f03 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -434,7 +434,7 @@ struct ubifs_ch { __u8 node_type; __u8 group_type; __u8 padding[2]; -} __attribute__ ((packed)); +} __packed; /** * union ubifs_dev_desc - device node descriptor. @@ -448,7 +448,7 @@ struct ubifs_ch { union ubifs_dev_desc { __le32 new; __le64 huge; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_ino_node - inode node. @@ -509,7 +509,7 @@ struct ubifs_ino_node { __le16 compr_type; __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ __u8 data[]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_dent_node - directory entry node. @@ -534,7 +534,7 @@ struct ubifs_dent_node { __le16 nlen; __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ __u8 name[]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_data_node - data node. @@ -555,7 +555,7 @@ struct ubifs_data_node { __le16 compr_type; __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ __u8 data[]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_trun_node - truncation node. @@ -575,7 +575,7 @@ struct ubifs_trun_node { __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ __le64 old_size; __le64 new_size; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_pad_node - padding node. @@ -586,7 +586,7 @@ struct ubifs_trun_node { struct ubifs_pad_node { struct ubifs_ch ch; __le32 pad_len; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_sb_node - superblock node. @@ -644,7 +644,7 @@ struct ubifs_sb_node { __u8 uuid[16]; __le32 ro_compat_version; __u8 padding2[3968]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_mst_node - master node. @@ -711,7 +711,7 @@ struct ubifs_mst_node { __le32 idx_lebs; __le32 leb_cnt; __u8 padding[344]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_ref_node - logical eraseblock reference node. @@ -727,7 +727,7 @@ struct ubifs_ref_node { __le32 offs; __le32 jhead; __u8 padding[28]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_branch - key/reference/length branch @@ -741,7 +741,7 @@ struct ubifs_branch { __le32 offs; __le32 len; __u8 key[]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_idx_node - indexing node. @@ -755,7 +755,7 @@ struct ubifs_idx_node { __le16 child_cnt; __le16 level; __u8 branches[]; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_cs_node - commit start node. @@ -765,7 +765,7 @@ struct ubifs_idx_node { struct ubifs_cs_node { struct ubifs_ch ch; __le64 cmt_no; -} __attribute__ ((packed)); +} __packed; /** * struct ubifs_orph_node - orphan node. @@ -777,6 +777,6 @@ struct ubifs_orph_node { struct ubifs_ch ch; __le64 cmt_no; __le64 inos[]; -} __attribute__ ((packed)); +} __packed; #endif /* __UBIFS_MEDIA_H__ */ -- cgit v0.10.2 From b137545c44fc0c80fb778abb0c582bda5601e8f8 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 29 Mar 2011 18:04:05 +0300 Subject: UBIFS: introduce a separate structure for budgeting info This patch separates out all the budgeting-related information from 'struct ubifs_info' to 'struct ubifs_budg_info'. This way the code looks a bit cleaner. However, the main driver for this is that we want to save budgeting information and print it later, so a separate data structure for this is helpful. This patch is a preparation for the further debugging output improvements. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 8b3a7da..315de66 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -106,7 +106,7 @@ static long long get_liability(struct ubifs_info *c) long long liab; spin_lock(&c->space_lock); - liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; + liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth; spin_unlock(&c->space_lock); return liab; } @@ -180,7 +180,7 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) int idx_lebs; long long idx_size; - idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; + idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx; /* And make sure we have thrice the index size of space reserved */ idx_size += idx_size << 1; /* @@ -292,13 +292,13 @@ static int can_use_rp(struct ubifs_info *c) * budgeted index space to the size of the current index, multiplies this by 3, * and makes sure this does not exceed the amount of free LEBs. * - * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: + * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables: * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might * be large, because UBIFS does not do any index consolidation as long as * there is free space. IOW, the index may take a lot of LEBs, but the LEBs * will contain a lot of dirt. - * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, - * the index may be consolidated to take up to @c->min_idx_lebs LEBs. + * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW, + * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs. * * This function returns zero in case of success, and %-ENOSPC in case of * failure. @@ -343,13 +343,13 @@ static int do_budget_space(struct ubifs_info *c) c->lst.taken_empty_lebs; if (unlikely(rsvd_idx_lebs > lebs)) { dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " - "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, + "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs, rsvd_idx_lebs); return -ENOSPC; } available = ubifs_calc_available(c, min_idx_lebs); - outstanding = c->budg_data_growth + c->budg_dd_growth; + outstanding = c->bi.data_growth + c->bi.dd_growth; if (unlikely(available < outstanding)) { dbg_budg("out of data space: available %lld, outstanding %lld", @@ -360,7 +360,7 @@ static int do_budget_space(struct ubifs_info *c) if (available - outstanding <= c->rp_size && !can_use_rp(c)) return -ENOSPC; - c->min_idx_lebs = min_idx_lebs; + c->bi.min_idx_lebs = min_idx_lebs; return 0; } @@ -393,11 +393,11 @@ static int calc_data_growth(const struct ubifs_info *c, { int data_growth; - data_growth = req->new_ino ? c->inode_budget : 0; + data_growth = req->new_ino ? c->bi.inode_budget : 0; if (req->new_page) - data_growth += c->page_budget; + data_growth += c->bi.page_budget; if (req->new_dent) - data_growth += c->dent_budget; + data_growth += c->bi.dent_budget; data_growth += req->new_ino_d; return data_growth; } @@ -413,12 +413,12 @@ static int calc_dd_growth(const struct ubifs_info *c, { int dd_growth; - dd_growth = req->dirtied_page ? c->page_budget : 0; + dd_growth = req->dirtied_page ? c->bi.page_budget : 0; if (req->dirtied_ino) - dd_growth += c->inode_budget << (req->dirtied_ino - 1); + dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1); if (req->mod_dent) - dd_growth += c->dent_budget; + dd_growth += c->bi.dent_budget; dd_growth += req->dirtied_ino_d; return dd_growth; } @@ -460,19 +460,19 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) again: spin_lock(&c->space_lock); - ubifs_assert(c->budg_idx_growth >= 0); - ubifs_assert(c->budg_data_growth >= 0); - ubifs_assert(c->budg_dd_growth >= 0); + ubifs_assert(c->bi.idx_growth >= 0); + ubifs_assert(c->bi.data_growth >= 0); + ubifs_assert(c->bi.dd_growth >= 0); - if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { + if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) { dbg_budg("no space"); spin_unlock(&c->space_lock); return -ENOSPC; } - c->budg_idx_growth += idx_growth; - c->budg_data_growth += data_growth; - c->budg_dd_growth += dd_growth; + c->bi.idx_growth += idx_growth; + c->bi.data_growth += data_growth; + c->bi.dd_growth += dd_growth; err = do_budget_space(c); if (likely(!err)) { @@ -484,9 +484,9 @@ again: } /* Restore the old values */ - c->budg_idx_growth -= idx_growth; - c->budg_data_growth -= data_growth; - c->budg_dd_growth -= dd_growth; + c->bi.idx_growth -= idx_growth; + c->bi.data_growth -= data_growth; + c->bi.dd_growth -= dd_growth; spin_unlock(&c->space_lock); if (req->fast) { @@ -506,9 +506,9 @@ again: goto again; } dbg_budg("FS is full, -ENOSPC"); - c->nospace = 1; + c->bi.nospace = 1; if (can_use_rp(c) || c->rp_size == 0) - c->nospace_rp = 1; + c->bi.nospace_rp = 1; smp_wmb(); } else ubifs_err("cannot budget space, error %d", err); @@ -523,8 +523,8 @@ again: * This function releases the space budgeted by 'ubifs_budget_space()'. Note, * since the index changes (which were budgeted for in @req->idx_growth) will * only be written to the media on commit, this function moves the index budget - * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be - * zeroed by the commit operation. + * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed + * by the commit operation. */ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) { @@ -553,23 +553,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) if (!req->data_growth && !req->dd_growth) return; - c->nospace = c->nospace_rp = 0; + c->bi.nospace = c->bi.nospace_rp = 0; smp_wmb(); spin_lock(&c->space_lock); - c->budg_idx_growth -= req->idx_growth; - c->budg_uncommitted_idx += req->idx_growth; - c->budg_data_growth -= req->data_growth; - c->budg_dd_growth -= req->dd_growth; - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); - - ubifs_assert(c->budg_idx_growth >= 0); - ubifs_assert(c->budg_data_growth >= 0); - ubifs_assert(c->budg_dd_growth >= 0); - ubifs_assert(c->min_idx_lebs < c->main_lebs); - ubifs_assert(!(c->budg_idx_growth & 7)); - ubifs_assert(!(c->budg_data_growth & 7)); - ubifs_assert(!(c->budg_dd_growth & 7)); + c->bi.idx_growth -= req->idx_growth; + c->bi.uncommitted_idx += req->idx_growth; + c->bi.data_growth -= req->data_growth; + c->bi.dd_growth -= req->dd_growth; + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + ubifs_assert(c->bi.idx_growth >= 0); + ubifs_assert(c->bi.data_growth >= 0); + ubifs_assert(c->bi.dd_growth >= 0); + ubifs_assert(c->bi.min_idx_lebs < c->main_lebs); + ubifs_assert(!(c->bi.idx_growth & 7)); + ubifs_assert(!(c->bi.data_growth & 7)); + ubifs_assert(!(c->bi.dd_growth & 7)); spin_unlock(&c->space_lock); } @@ -586,13 +586,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c) { spin_lock(&c->space_lock); /* Release the index growth reservation */ - c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; + c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; /* Release the data growth reservation */ - c->budg_data_growth -= c->page_budget; + c->bi.data_growth -= c->bi.page_budget; /* Increase the dirty data growth reservation instead */ - c->budg_dd_growth += c->page_budget; + c->bi.dd_growth += c->bi.page_budget; /* And re-calculate the indexing space reservation */ - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); spin_unlock(&c->space_lock); } @@ -612,7 +612,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, memset(&req, 0, sizeof(struct ubifs_budget_req)); /* The "no space" flags will be cleared because dd_growth is > 0 */ - req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); + req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8); ubifs_release_budget(c, &req); } @@ -682,9 +682,9 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) int rsvd_idx_lebs, lebs; long long available, outstanding, free; - ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); - outstanding = c->budg_data_growth + c->budg_dd_growth; - available = ubifs_calc_available(c, c->min_idx_lebs); + ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); + outstanding = c->bi.data_growth + c->bi.dd_growth; + available = ubifs_calc_available(c, c->bi.min_idx_lebs); /* * When reporting free space to user-space, UBIFS guarantees that it is @@ -697,8 +697,8 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) * Note, the calculations below are similar to what we have in * 'do_budget_space()', so refer there for comments. */ - if (c->min_idx_lebs > c->lst.idx_lebs) - rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; + if (c->bi.min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; else rsvd_idx_lebs = 0; lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 1bd01de..87cd0ea 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -182,7 +182,7 @@ static int do_commit(struct ubifs_info *c) c->mst_node->root_len = cpu_to_le32(zroot.len); c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); - c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); + c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz); c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index fa65b29..c58867c 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -614,14 +614,14 @@ void dbg_dump_budg(struct ubifs_info *c) spin_lock(&dbg_lock); printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, - c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); + c->bi.data_growth, c->bi.dd_growth, c->bi.idx_growth); printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " - "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, - c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, + "freeable_cnt %d\n", c->bi.data_growth + c->bi.dd_growth, + c->bi.data_growth + c->bi.dd_growth + c->bi.idx_growth, c->freeable_cnt); printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " - "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, - c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); + "calc_idx_sz %lld, idx_gc_cnt %d\n", c->bi.min_idx_lebs, + c->bi.old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), atomic_long_read(&c->dirty_zn_cnt), @@ -648,8 +648,8 @@ void dbg_dump_budg(struct ubifs_info *c) printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); /* Print budgeting predictions */ - available = ubifs_calc_available(c, c->min_idx_lebs); - outstanding = c->budg_data_growth + c->budg_dd_growth; + available = ubifs_calc_available(c, c->bi.min_idx_lebs); + outstanding = c->bi.data_growth + c->bi.dd_growth; free = ubifs_get_free_space_nolock(c); printk(KERN_DEBUG "Budgeting predictions:\n"); printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 7217d67..ef5abd3 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -603,7 +603,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) ubifs_release_budget(c, &req); else { /* We've deleted something - clean the "no space" flags */ - c->nospace = c->nospace_rp = 0; + c->bi.nospace = c->bi.nospace_rp = 0; smp_wmb(); } return 0; @@ -693,7 +693,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) ubifs_release_budget(c, &req); else { /* We've deleted something - clean the "no space" flags */ - c->nospace = c->nospace_rp = 0; + c->bi.nospace = c->bi.nospace_rp = 0; smp_wmb(); } return 0; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 3f254e3..3594aae 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -212,7 +212,7 @@ static void release_new_page_budget(struct ubifs_info *c) */ static void release_existing_page_budget(struct ubifs_info *c) { - struct ubifs_budget_req req = { .dd_growth = c->page_budget}; + struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget}; ubifs_release_budget(c, &req); } @@ -1189,7 +1189,7 @@ out_budg: if (budgeted) ubifs_release_budget(c, &req); else { - c->nospace = c->nospace_rp = 0; + c->bi.nospace = c->bi.nospace_rp = 0; smp_wmb(); } return err; diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 1d54383..2559d17 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, * But if the index takes fewer LEBs than it is reserved for it, * this function must avoid picking those reserved LEBs. */ - if (c->min_idx_lebs >= c->lst.idx_lebs) { - rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; + if (c->bi.min_idx_lebs >= c->lst.idx_lebs) { + rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; exclude_index = 1; } spin_unlock(&c->space_lock); @@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, pick_free = 0; } else { spin_lock(&c->space_lock); - exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); + exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs); spin_unlock(&c->space_lock); } @@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, /* Check if there are enough empty LEBs for commit */ spin_lock(&c->space_lock); - if (c->min_idx_lebs > c->lst.idx_lebs) - rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; + if (c->bi.min_idx_lebs > c->lst.idx_lebs) + rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; else rsvd_idx_lebs = 0; lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 21f47af..278c238 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -148,7 +148,7 @@ static int validate_master(const struct ubifs_info *c) } main_sz = (long long)c->main_lebs * c->leb_size; - if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { + if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) { err = 9; goto out; } @@ -218,7 +218,7 @@ static int validate_master(const struct ubifs_info *c) } if (c->lst.total_dead + c->lst.total_dark + - c->lst.total_used + c->old_idx_sz > main_sz) { + c->lst.total_used + c->bi.old_idx_sz > main_sz) { err = 21; goto out; } @@ -286,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c) c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); - c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); + c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size); c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); @@ -305,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c) c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); - c->calc_idx_sz = c->old_idx_sz; + c->calc_idx_sz = c->bi.old_idx_sz; if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) c->no_orphs = 1; diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index d3d6d36..c29c468 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -1065,13 +1065,13 @@ int ubifs_replay_journal(struct ubifs_info *c) goto out; /* - * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable - * to roughly estimate index growth. Things like @c->min_idx_lebs + * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable + * to roughly estimate index growth. Things like @c->bi.min_idx_lebs * depend on it. This means we have to initialize it to make sure * budgeting works properly. */ - c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); - c->budg_uncommitted_idx *= c->max_idx_node_sz; + c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); + c->bi.uncommitted_idx *= c->max_idx_node_sz; ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index a089cca..a0aa951 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -375,7 +375,7 @@ out: ubifs_release_dirty_inode_budget(c, ui); else { /* We've deleted something - clean the "no space" flags */ - c->nospace = c->nospace_rp = 0; + c->bi.nospace = c->bi.nospace_rp = 0; smp_wmb(); } done: @@ -694,11 +694,11 @@ static int init_constants_sb(struct ubifs_info *c) * be compressed and direntries are of the maximum size. * * Note, data, which may be stored in inodes is budgeted separately, so - * it is not included into 'c->inode_budget'. + * it is not included into 'c->bi.inode_budget'. */ - c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; - c->inode_budget = UBIFS_INO_NODE_SZ; - c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; + c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; + c->bi.inode_budget = UBIFS_INO_NODE_SZ; + c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ; /* * When the amount of flash space used by buds becomes @@ -742,7 +742,7 @@ static void init_constants_master(struct ubifs_info *c) { long long tmp64; - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); c->report_rp_size = ubifs_reported_space(c, c->rp_size); /* @@ -1304,7 +1304,7 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_lpt; - err = dbg_check_idx_size(c, c->old_idx_sz); + err = dbg_check_idx_size(c, c->bi.old_idx_sz); if (err) goto out_lpt; @@ -1313,7 +1313,7 @@ static int mount_ubifs(struct ubifs_info *c) goto out_journal; /* Calculate 'min_idx_lebs' after journal replay */ - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); if (err) @@ -1442,7 +1442,8 @@ static int mount_ubifs(struct ubifs_info *c) c->main_lebs, c->main_first, c->leb_cnt - 1); dbg_msg("index LEBs: %d", c->lst.idx_lebs); dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", - c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); + c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, + c->bi.old_idx_sz >> 20); dbg_msg("key hash type: %d", c->key_hash_type); dbg_msg("tree fanout: %d", c->fanout); dbg_msg("reserved GC LEB: %d", c->gc_lnum); @@ -1766,10 +1767,9 @@ static void ubifs_put_super(struct super_block *sb) * to write them back because of I/O errors. */ if (!c->ro_error) { - ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); - ubifs_assert(c->budg_idx_growth == 0); - ubifs_assert(c->budg_dd_growth == 0); - ubifs_assert(c->budg_data_growth == 0); + ubifs_assert(c->bi.idx_growth == 0); + ubifs_assert(c->bi.dd_growth == 0); + ubifs_assert(c->bi.data_growth == 0); } /* diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 53288e5..9c376b9 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -796,16 +796,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) spin_lock(&c->space_lock); /* * Although we have not finished committing yet, update size of the - * committed index ('c->old_idx_sz') and zero out the index growth + * committed index ('c->bi.old_idx_sz') and zero out the index growth * budget. It is OK to do this now, because we've reserved all the * space which is needed to commit the index, and it is save for the * budgeting subsystem to assume the index is already committed, * even though it is not. */ - ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); - c->old_idx_sz = c->calc_idx_sz; - c->budg_uncommitted_idx = 0; - c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); + c->bi.old_idx_sz = c->calc_idx_sz; + c->bi.uncommitted_idx = 0; + c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); spin_unlock(&c->space_lock); mutex_unlock(&c->tnc_mutex); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index d4c07a9..26a7ebe 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -937,6 +937,40 @@ struct ubifs_mount_opts { unsigned int compr_type:2; }; +/** + * struct ubifs_budg_info - UBIFS budgeting information. + * @idx_growth: amount of bytes budgeted for index growth + * @data_growth: amount of bytes budgeted for cached data + * @dd_growth: amount of bytes budgeted for cached data that will make + * other data dirty + * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but + * which still have to be taken into account because the index + * has not been committed so far + * @old_idx_sz: size of index on flash + * @min_idx_lebs: minimum number of LEBs required for the index + * @nospace: non-zero if the file-system does not have flash space (used as + * optimization) + * @nospace_rp: the same as @nospace, but additionally means that even reserved + * pool is full + * @page_budget: budget for a page (constant, nenver changed after mount) + * @inode_budget: budget for an inode (constant, nenver changed after mount) + * @dent_budget: budget for a directory entry (constant, nenver changed after + * mount) + */ +struct ubifs_budg_info { + long long idx_growth; + long long data_growth; + long long dd_growth; + long long uncommitted_idx; + unsigned long long old_idx_sz; + int min_idx_lebs; + unsigned int nospace:1; + unsigned int nospace_rp:1; + int page_budget; + int inode_budget; + int dent_budget; +}; + struct ubifs_debug_info; /** @@ -1057,32 +1091,14 @@ struct ubifs_debug_info; * @dirty_zn_cnt: number of dirty znodes * @clean_zn_cnt: number of clean znodes * - * @budg_idx_growth: amount of bytes budgeted for index growth - * @budg_data_growth: amount of bytes budgeted for cached data - * @budg_dd_growth: amount of bytes budgeted for cached data that will make - * other data dirty - * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, - * but which still have to be taken into account because - * the index has not been committed so far - * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, - * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, - * @nospace, and @nospace_rp; - * @min_idx_lebs: minimum number of LEBs required for the index - * @old_idx_sz: size of index on flash + * @space_lock: protects @bi and @lst + * @lst: lprops statistics + * @bi: budgeting information * @calc_idx_sz: temporary variable which is used to calculate new index size * (contains accurate new index size at end of TNC commit start) - * @lst: lprops statistics - * @nospace: non-zero if the file-system does not have flash space (used as - * optimization) - * @nospace_rp: the same as @nospace, but additionally means that even reserved - * pool is full - * - * @page_budget: budget for a page - * @inode_budget: budget for an inode - * @dent_budget: budget for a directory entry * * @ref_node_alsz: size of the LEB reference node aligned to the min. flash - * I/O unit + * I/O unit * @mst_node_alsz: master node aligned size * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary @@ -1308,21 +1324,10 @@ struct ubifs_info { atomic_long_t dirty_zn_cnt; atomic_long_t clean_zn_cnt; - long long budg_idx_growth; - long long budg_data_growth; - long long budg_dd_growth; - long long budg_uncommitted_idx; spinlock_t space_lock; - int min_idx_lebs; - unsigned long long old_idx_sz; - unsigned long long calc_idx_sz; struct ubifs_lp_stats lst; - unsigned int nospace:1; - unsigned int nospace_rp:1; - - int page_budget; - int inode_budget; - int dent_budget; + struct ubifs_budg_info bi; + unsigned long long calc_idx_sz; int ref_node_alsz; int mst_node_alsz; -- cgit v0.10.2 From 8ff83089f8bcbd9a2e898b68f1a46487c8b6e38c Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 29 Mar 2011 18:19:50 +0300 Subject: UBIFS: simplify dbg_dump_budg calling conventions The current 'dbg_dump_budg()' calling convention is that the 'c->space_lock' spinlock is held. However, none of the callers actually use it from contects which have 'c->space_lock' locked, so all callers have to explicitely lock and unlock the spinlock. This is not very sensible convention. This patch changes it and makes 'dbg_dump_budg()' lock the spinlock instead of imposing this to the callers. This simplifies the code a little. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index c58867c..aad4fb8 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -610,7 +610,7 @@ void dbg_dump_budg(struct ubifs_info *c) struct ubifs_gced_idx_leb *idx_gc; long long available, outstanding, free; - ubifs_assert(spin_is_locked(&c->space_lock)); + spin_lock(&c->space_lock); spin_lock(&dbg_lock); printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, @@ -655,6 +655,7 @@ void dbg_dump_budg(struct ubifs_info *c) printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", available, outstanding, free); spin_unlock(&dbg_lock); + spin_unlock(&c->space_lock); } void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) @@ -1046,10 +1047,7 @@ out: ubifs_msg("current lprops statistics dump"); dbg_dump_lstats(&lst); - - spin_lock(&c->space_lock); dbg_dump_budg(c); - spin_unlock(&c->space_lock); dump_stack(); return -EINVAL; } @@ -2796,11 +2794,9 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf, if (file->f_path.dentry == d->dfs_dump_lprops) dbg_dump_lprops(c); - else if (file->f_path.dentry == d->dfs_dump_budg) { - spin_lock(&c->space_lock); + else if (file->f_path.dentry == d->dfs_dump_budg) dbg_dump_budg(c); - spin_unlock(&c->space_lock); - } else if (file->f_path.dentry == d->dfs_dump_tnc) { + else if (file->f_path.dentry == d->dfs_dump_tnc) { mutex_lock(&c->tnc_mutex); dbg_dump_tnc(c); mutex_unlock(&c->tnc_mutex); diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index aed25e8..65d485f 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -380,10 +380,8 @@ out: if (err == -ENOSPC) { /* This are some budgeting problems, print useful information */ down_write(&c->commit_sem); - spin_lock(&c->space_lock); dbg_dump_stack(); dbg_dump_budg(c); - spin_unlock(&c->space_lock); dbg_dump_lprops(c); cmt_retries = dbg_check_lprops(c); up_write(&c->commit_sem); diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 9c376b9..66f066d 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -383,9 +383,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) * option which forces in-the-gaps is enabled. */ ubifs_err("out of space"); - spin_lock(&c->space_lock); dbg_dump_budg(c); - spin_unlock(&c->space_lock); dbg_dump_lprops(c); } /* Try to commit anyway */ -- cgit v0.10.2 From 8c3067e445fb25119761356c88abc39dacfb9524 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 30 Mar 2011 13:18:58 +0300 Subject: UBIFS: rearrange the budget dump Re-arrange the budget dump and make sure we first dump all the 'struct ubifs_budg_info' fields, and then the other information. Additionally, print the 'uncommitted_idx' variable. This change is required for to the following dumping function enhancement where it will be possible to dump saved 'struct ubifs_budg_info' objects, not only the current one. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index aad4fb8..8e8bba9 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -612,22 +612,28 @@ void dbg_dump_budg(struct ubifs_info *c) spin_lock(&c->space_lock); spin_lock(&dbg_lock); - printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " - "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, - c->bi.data_growth, c->bi.dd_growth, c->bi.idx_growth); - printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " - "freeable_cnt %d\n", c->bi.data_growth + c->bi.dd_growth, - c->bi.data_growth + c->bi.dd_growth + c->bi.idx_growth, - c->freeable_cnt); - printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " - "calc_idx_sz %lld, idx_gc_cnt %d\n", c->bi.min_idx_lebs, - c->bi.old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); + printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, " + "total budget sum %lld\n", current->pid, + c->bi.data_growth + c->bi.dd_growth, + c->bi.data_growth + c->bi.dd_growth + c->bi.idx_growth); + printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, " + "budg_idx_growth %lld\n", c->bi.data_growth, c->bi.dd_growth, + c->bi.idx_growth); + printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, " + "uncommitted_idx %lld\n", c->bi.min_idx_lebs, c->bi.old_idx_sz, + c->bi.uncommitted_idx); + printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n", + c->bi.page_budget, c->bi.inode_budget, c->bi.dent_budget); + printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n", + c->bi.nospace, c->bi.nospace_rp); + printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", + c->dark_wm, c->dead_wm, c->max_idx_node_sz); + printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", + c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), atomic_long_read(&c->dirty_zn_cnt), atomic_long_read(&c->clean_zn_cnt)); - printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", - c->dark_wm, c->dead_wm, c->max_idx_node_sz); printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", c->gc_lnum, c->ihead_lnum); /* If we are in R/O mode, journal heads do not exist */ -- cgit v0.10.2 From f1bd66afb14c25095cf6ff499c1388db423acc9e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 29 Mar 2011 18:36:21 +0300 Subject: UBIFS: improve space checking debugging feature This patch improves the 'dbg_check_space_info()' function which checks whether the amount of space before re-mounting and after re-mounting is the same (remounting from R/O to R/W modes and vice-versa). The problem is that 'dbg_check_space_info()' does not save the budgeting information before re-mounting, so when an error is reported, we do not know why the amount of free space changed. This patches makes the following changes: 1. Teaches 'dbg_dump_budg()' function to accept a 'struct ubifs_budg_info' argument and print out the this argument. This way we may ask it to print any saved budgeting info, no only the current one. 2. Accordingly changes all the callers of 'dbg_dump_budg()' to comply with the changed interface. 3. Introduce a 'saved_bi' (saved budgeting info) field to 'struct ubifs_debug_info' and save the budgeting info before re-mounting there. 4. Change 'dbg_check_space_info()' and make it print both old and new budgeting information. 5. Additionally, save 'c->igx_gc_cnt' and print it if and error happens. This value contributes to the amount of free space, so we have to print it. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 8e8bba9..546ad57 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -602,7 +602,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst) spin_unlock(&dbg_lock); } -void dbg_dump_budg(struct ubifs_info *c) +void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) { int i; struct rb_node *rb; @@ -614,20 +614,29 @@ void dbg_dump_budg(struct ubifs_info *c) spin_lock(&dbg_lock); printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, " "total budget sum %lld\n", current->pid, - c->bi.data_growth + c->bi.dd_growth, - c->bi.data_growth + c->bi.dd_growth + c->bi.idx_growth); + bi->data_growth + bi->dd_growth, + bi->data_growth + bi->dd_growth + bi->idx_growth); printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, " - "budg_idx_growth %lld\n", c->bi.data_growth, c->bi.dd_growth, - c->bi.idx_growth); + "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, + bi->idx_growth); printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, " - "uncommitted_idx %lld\n", c->bi.min_idx_lebs, c->bi.old_idx_sz, - c->bi.uncommitted_idx); + "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, + bi->uncommitted_idx); printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n", - c->bi.page_budget, c->bi.inode_budget, c->bi.dent_budget); + bi->page_budget, bi->inode_budget, bi->dent_budget); printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n", - c->bi.nospace, c->bi.nospace_rp); + bi->nospace, bi->nospace_rp); printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", c->dark_wm, c->dead_wm, c->max_idx_node_sz); + + if (bi != &c->bi) + /* + * If we are dumping saved budgeting data, do not print + * additional information which is about the current state, not + * the old one which corresponded to the saved budgeting data. + */ + goto out_unlock; + printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " @@ -636,6 +645,7 @@ void dbg_dump_budg(struct ubifs_info *c) atomic_long_read(&c->clean_zn_cnt)); printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", c->gc_lnum, c->ihead_lnum); + /* If we are in R/O mode, journal heads do not exist */ if (c->jheads) for (i = 0; i < c->jhead_cnt; i++) @@ -660,6 +670,7 @@ void dbg_dump_budg(struct ubifs_info *c) printk(KERN_DEBUG "Budgeting predictions:\n"); printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", available, outstanding, free); +out_unlock: spin_unlock(&dbg_lock); spin_unlock(&c->space_lock); } @@ -983,6 +994,8 @@ void dbg_save_space_info(struct ubifs_info *c) spin_lock(&c->space_lock); memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); + memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info)); + d->saved_idx_gc_cnt = c->idx_gc_cnt; /* * We use a dirty hack here and zero out @c->freeable_cnt, because it @@ -1049,11 +1062,14 @@ int dbg_check_space_info(struct ubifs_info *c) out: ubifs_msg("saved lprops statistics dump"); dbg_dump_lstats(&d->saved_lst); - ubifs_get_lp_stats(c, &lst); - + ubifs_msg("saved budgeting info dump"); + dbg_dump_budg(c, &d->saved_bi); + ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt); ubifs_msg("current lprops statistics dump"); + ubifs_get_lp_stats(c, &lst); dbg_dump_lstats(&lst); - dbg_dump_budg(c); + ubifs_msg("current budgeting info dump"); + dbg_dump_budg(c, &c->bi); dump_stack(); return -EINVAL; } @@ -2801,7 +2817,7 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf, if (file->f_path.dentry == d->dfs_dump_lprops) dbg_dump_lprops(c); else if (file->f_path.dentry == d->dfs_dump_budg) - dbg_dump_budg(c); + dbg_dump_budg(c, &c->bi); else if (file->f_path.dentry == d->dfs_dump_tnc) { mutex_lock(&c->tnc_mutex); dbg_dump_tnc(c); diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index e6493ca..6b5fe7b 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -50,7 +50,9 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, * @new_ihead_offs: used by debugging to check @c->ihead_offs * * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') - * @saved_free: saved free space (used by 'dbg_save_space_info()') + * @saved_bi: saved budgeting information + * @saved_free: saved amount of free space + * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt * * dfs_dir_name: name of debugfs directory containing this file-system's files * dfs_dir: direntry object of the file-system debugfs directory @@ -76,7 +78,9 @@ struct ubifs_debug_info { int new_ihead_offs; struct ubifs_lp_stats saved_lst; + struct ubifs_budg_info saved_bi; long long saved_free; + int saved_idx_gc_cnt; char dfs_dir_name[100]; struct dentry *dfs_dir; @@ -262,7 +266,7 @@ void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, int offs); void dbg_dump_budget_req(const struct ubifs_budget_req *req); void dbg_dump_lstats(const struct ubifs_lp_stats *lst); -void dbg_dump_budg(struct ubifs_info *c); +void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi); void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); void dbg_dump_lprops(struct ubifs_info *c); void dbg_dump_lpt_info(struct ubifs_info *c); @@ -420,7 +424,9 @@ static inline void dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; } static inline void dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; } -static inline void dbg_dump_budg(struct ubifs_info *c) { return; } +static inline void +dbg_dump_budg(struct ubifs_info *c, + const struct ubifs_budg_info *bi) { return; } static inline void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) { return; } static inline void dbg_dump_lprops(struct ubifs_info *c) { return; } diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 65d485f..ce55a48 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -381,7 +381,7 @@ out: /* This are some budgeting problems, print useful information */ down_write(&c->commit_sem); dbg_dump_stack(); - dbg_dump_budg(c); + dbg_dump_budg(c, &c->bi); dbg_dump_lprops(c); cmt_retries = dbg_check_lprops(c); up_write(&c->commit_sem); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index a0aa951..407c064 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1145,7 +1145,7 @@ static int check_free_space(struct ubifs_info *c) ubifs_assert(c->dark_wm > 0); if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { ubifs_err("insufficient free space to mount in R/W mode"); - dbg_dump_budg(c); + dbg_dump_budg(c, &c->bi); dbg_dump_lprops(c); return -ENOSPC; } diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 66f066d..c471b06 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -383,7 +383,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) * option which forces in-the-gaps is enabled. */ ubifs_err("out of space"); - dbg_dump_budg(c); + dbg_dump_budg(c, &c->bi); dbg_dump_lprops(c); } /* Try to commit anyway */ -- cgit v0.10.2 From bc3f07f0906e867270fdc2006b0bbcb130a722c1 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 5 Apr 2011 13:52:20 +0300 Subject: UBIFS: make force in-the-gaps to be a general self-check UBIFS can force itself to use the 'in-the-gaps' commit method - the last resort method which is normally invoced very very rarely. Currently this "force int-the-gaps" debugging feature is a separate test mode. But it is a bit saner to make it to be the "general" self-test check instead. This patch is just a clean-up which should make the debugging code look a bit nicer and easier to use - we have way too many debugging options. Signed-off-by: Artem Bityutskiy diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt index d7b13b0..7d17e5b 100644 --- a/Documentation/filesystems/ubifs.txt +++ b/Documentation/filesystems/ubifs.txt @@ -154,7 +154,6 @@ debug_tsts Selects a mode of testing, as follows: Test mode Flag value - Force in-the-gaps method 2 Failure mode for recovery testing 4 For example, set debug_msgs to 5 to display General messages and Mount diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 546ad57..06d171c 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -34,7 +34,6 @@ #include #include #include -#include #ifdef CONFIG_UBIFS_FS_DEBUG @@ -2458,14 +2457,12 @@ error_dump: return 0; } -static int invocation_cnt; - int dbg_force_in_the_gaps(void) { - if (!dbg_force_in_the_gaps_enabled) + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) return 0; - /* Force in-the-gaps every 8th commit */ - return !((invocation_cnt++) & 0x7); + + return !(random32() & 7); } /* Failure mode for recovery testing */ diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 6b5fe7b..8934c12 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -31,6 +31,8 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, #ifdef CONFIG_UBIFS_FS_DEBUG +#include + /** * ubifs_debug_info - per-FS debugging information. * @old_zroot: old index root - used by 'dbg_check_old_index()' @@ -237,11 +239,9 @@ enum { /* * Special testing flags. * - * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method * UBIFS_TST_RCVRY: failure mode for recovery testing */ enum { - UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, UBIFS_TST_RCVRY = 0x4, }; @@ -308,18 +308,16 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); /* Force the use of in-the-gaps method for testing */ - -#define dbg_force_in_the_gaps_enabled \ - (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) - +static inline int dbg_force_in_the_gaps_enabled(void) +{ + return ubifs_chk_flags & UBIFS_CHK_GEN; +} int dbg_force_in_the_gaps(void); /* Failure mode for recovery testing */ - #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) #ifndef UBIFS_DBG_PRESERVE_UBI - #define ubi_leb_read dbg_leb_read #define ubi_leb_write dbg_leb_write #define ubi_leb_change dbg_leb_change @@ -327,7 +325,6 @@ int dbg_force_in_the_gaps(void); #define ubi_leb_unmap dbg_leb_unmap #define ubi_is_mapped dbg_is_mapped #define ubi_leb_map dbg_leb_map - #endif int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, @@ -488,8 +485,8 @@ dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) { return 0; } static inline int dbg_force_in_the_gaps(void) { return 0; } -#define dbg_force_in_the_gaps_enabled 0 -#define dbg_failure_mode 0 +#define dbg_force_in_the_gaps_enabled() 0 +#define dbg_failure_mode 0 static inline int dbg_debugfs_init(void) { return 0; } static inline void dbg_debugfs_exit(void) { return; } diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index c471b06..41920f3 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -377,12 +377,12 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) c->gap_lebs = NULL; return err; } - if (!dbg_force_in_the_gaps_enabled) { + if (dbg_force_in_the_gaps_enabled()) { /* * Do not print scary warnings if the debugging * option which forces in-the-gaps is enabled. */ - ubifs_err("out of space"); + ubifs_warn("out of space"); dbg_dump_budg(c, &c->bi); dbg_dump_lprops(c); } -- cgit v0.10.2 From cdd8ad6e9e78b295e71265762926342523579d10 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 5 Apr 2011 15:43:44 +0300 Subject: UBIFS: introduce lsave debugging Try to improve UBIFS testing coverage by randomly picking LEBs to store in lsave, rather than picking them optimally. Create a debugging version of 'populate_lsave()' for these purposes and enable it when general debugging self-checks are enabled. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 9fe376a..dfcb574 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -29,6 +29,12 @@ #include #include "ubifs.h" +#ifdef CONFIG_UBIFS_FS_DEBUG +static int dbg_populate_lsave(struct ubifs_info *c); +#else +#define dbg_populate_lsave(c) 0 +#endif + /** * first_dirty_cnode - find first dirty cnode. * @c: UBIFS file-system description object @@ -815,6 +821,10 @@ static void populate_lsave(struct ubifs_info *c) c->lpt_drty_flgs |= LSAVE_DIRTY; ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); } + + if (dbg_populate_lsave(c)) + return; + list_for_each_entry(lprops, &c->empty_list, list) { c->lsave[cnt++] = lprops->lnum; if (cnt >= c->lsave_cnt) @@ -1994,4 +2004,47 @@ void dbg_dump_lpt_lebs(const struct ubifs_info *c) current->pid); } +/** + * dbg_populate_lsave - debugging version of 'populate_lsave()' + * @c: UBIFS file-system description object + * + * This is a debugging version for 'populate_lsave()' which populates lsave + * with random LEBs instead of useful LEBs, which is good for test coverage. + * Returns zero if lsave has not been populated (this debugging feature is + * disabled) an non-zero if lsave has been populated. + */ +static int dbg_populate_lsave(struct ubifs_info *c) +{ + struct ubifs_lprops *lprops; + struct ubifs_lpt_heap *heap; + int i; + + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + if (random32() & 3) + return 0; + + for (i = 0; i < c->lsave_cnt; i++) + c->lsave[i] = c->main_first; + + list_for_each_entry(lprops, &c->empty_list, list) + c->lsave[random32() % c->lsave_cnt] = lprops->lnum; + list_for_each_entry(lprops, &c->freeable_list, list) + c->lsave[random32() % c->lsave_cnt] = lprops->lnum; + list_for_each_entry(lprops, &c->frdi_idx_list, list) + c->lsave[random32() % c->lsave_cnt] = lprops->lnum; + + heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; + for (i = 0; i < heap->cnt; i++) + c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; + heap = &c->lpt_heap[LPROPS_DIRTY - 1]; + for (i = 0; i < heap->cnt; i++) + c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; + heap = &c->lpt_heap[LPROPS_FREE - 1]; + for (i = 0; i < heap->cnt; i++) + c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; + + return 1; +} + #endif /* CONFIG_UBIFS_FS_DEBUG */ -- cgit v0.10.2 From 14ffd5d0b051b0b937df0cf3580a92f428ff70b1 Mon Sep 17 00:00:00 2001 From: Sedat Dilek Date: Mon, 4 Apr 2011 01:55:39 +0200 Subject: UBIFS: make xattr operations names consistent This is just a tiny clean-up patch. The variable name for empty address space operations is "empty_aops". Let's use consistent names for empty inode and file operations: "empty_iops" and "empty_fops", instead of inconsistent "none_inode_operations" and "none_file_operations". Artem: re-write the commit message. Signed-off-by: Sedat Dilek Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 3299f46..16f19f5 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -80,8 +80,8 @@ enum { SECURITY_XATTR, }; -static const struct inode_operations none_inode_operations; -static const struct file_operations none_file_operations; +static const struct inode_operations empty_iops; +static const struct file_operations empty_fops; /** * create_xattr - create an extended attribute. @@ -131,8 +131,8 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, /* Re-define all operations to be "nothing" */ inode->i_mapping->a_ops = &empty_aops; - inode->i_op = &none_inode_operations; - inode->i_fop = &none_file_operations; + inode->i_op = &empty_iops; + inode->i_fop = &empty_fops; inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; ui = ubifs_inode(inode); -- cgit v0.10.2 From 1a29af8bd740b2720caeafbffece1c0be6a9d06d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 20 Apr 2011 17:06:17 +0300 Subject: UBIFS: use EROFS when emulating failures When the debugging failure emulation is enabled and UBIFS decides to emulate an I/O error, it uses EIO error code. In which case UBIFS switches into R/O mode later on. The for the user-space is that when a failure is emulated, the file-system sometimes returns EIO and sometimes EROFS. This makes it more difficult to implement user-space tests for the failure mode. Let's be consistent and return EROFS in all the cases. This patch is an improvement for the debugging code and does not affect the functionality at all. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 06d171c..ba4d795 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2650,7 +2650,7 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, int len, int check) { if (in_failure_mode(desc)) - return -EIO; + return -EROFS; return ubi_leb_read(desc, lnum, buf, offset, len, check); } @@ -2660,7 +2660,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, int err, failing; if (in_failure_mode(desc)) - return -EIO; + return -EROFS; failing = do_fail(desc, lnum, 1); if (failing) cut_data(buf, len); @@ -2668,7 +2668,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, if (err) return err; if (failing) - return -EIO; + return -EROFS; return 0; } @@ -2678,12 +2678,12 @@ int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, int err; if (do_fail(desc, lnum, 1)) - return -EIO; + return -EROFS; err = ubi_leb_change(desc, lnum, buf, len, dtype); if (err) return err; if (do_fail(desc, lnum, 1)) - return -EIO; + return -EROFS; return 0; } @@ -2692,12 +2692,12 @@ int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) int err; if (do_fail(desc, lnum, 0)) - return -EIO; + return -EROFS; err = ubi_leb_erase(desc, lnum); if (err) return err; if (do_fail(desc, lnum, 0)) - return -EIO; + return -EROFS; return 0; } @@ -2706,19 +2706,19 @@ int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) int err; if (do_fail(desc, lnum, 0)) - return -EIO; + return -EROFS; err = ubi_leb_unmap(desc, lnum); if (err) return err; if (do_fail(desc, lnum, 0)) - return -EIO; + return -EROFS; return 0; } int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) { if (in_failure_mode(desc)) - return -EIO; + return -EROFS; return ubi_is_mapped(desc, lnum); } @@ -2727,12 +2727,12 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) int err; if (do_fail(desc, lnum, 0)) - return -EIO; + return -EROFS; err = ubi_leb_map(desc, lnum, dtype); if (err) return err; if (do_fail(desc, lnum, 0)) - return -EIO; + return -EROFS; return 0; } -- cgit v0.10.2 From 3b2f9a019e655f3407e4e69cdbaf8b75699b79a4 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 21 Apr 2011 15:11:33 +0300 Subject: UBIFS: use ro_mount instead of MS_RDONLY We have our own flags indicating R/O mode, and c->ro_mode is equivalent to MS_RDONLY. Let's be consistent and use UBIFS flags everywhere. This patch is just a minor cleanup. Additionally, add a comment that we are surprised with VFS behavior - as a reminder to look at this some day. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 3594aae..adce3b7 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1312,7 +1312,11 @@ int ubifs_fsync(struct file *file, int datasync) dbg_gen("syncing inode %lu", inode->i_ino); - if (inode->i_sb->s_flags & MS_RDONLY) + if (c->ro_mount) + /* + * For some really strange reasons VFS does not filter out + * 'fsync()' for R/O mounted file-systems as per 2.6.39. + */ return 0; /* -- cgit v0.10.2 From 1321657d8f843641529eff26e25722158cc29349 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 24 Apr 2011 10:53:17 +0300 Subject: UBIFS: fix oops in lprops dump function The 'dbg_dump_lprop()' is trying to detect journal head LEBs when printing, so it looks at the write-buffers. However, if we are in R/O mode, we de-allocate the write-buffers, so 'dbg_dump_lprop()' oopses. This patch fixes the issue. Note, this patch is not critical, it is only about the debugging code path, and it is unlikely that anyone but UBIFS developers would ever hit this issue. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index ba4d795..4062ec6 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -746,7 +746,13 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) if (bud->lnum == lp->lnum) { int head = 0; for (i = 0; i < c->jhead_cnt; i++) { - if (lp->lnum == c->jheads[i].wbuf.lnum) { + /* + * Note, if we are in R/O mode or in the middle + * of mounting/re-mounting, the write-buffers do + * not exist. + */ + if (c->jheads && + lp->lnum == c->jheads[i].wbuf.lnum) { printk(KERN_CONT ", jhead %s", dbg_jhead(i)); head = 1; -- cgit v0.10.2 From 341e262f903afbb8c23133ed6e193ea6254f73c4 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 22 Apr 2011 12:11:12 +0300 Subject: UBIFS: do not change debugfs file position This patch is a tiny improvement which removes few bytes of code. UBIFS debugfs files are non-seekable and the file position is ignored, so do not increase it in the write handler. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 4062ec6..f7515bd 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -2828,7 +2828,6 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf, } else return -EINVAL; - *ppos += count; return count; } -- cgit v0.10.2 From 2cd0a60cf49db4722445337b90fb06c9672f1128 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 24 Apr 2011 15:35:11 +0300 Subject: UBIFS: remove strange commentary Remove the following commentary from 'ubifs_file_mmap()': /* 'generic_file_mmap()' takes care of NOMMU case */ I do not understand what it means, and I could not find anything relater to NOMMU in 'generic_file_mmap()'. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index adce3b7..5e7fccf 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1541,7 +1541,6 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) { int err; - /* 'generic_file_mmap()' takes care of NOMMU case */ err = generic_file_mmap(file, vma); if (err) return err; -- cgit v0.10.2 From 2405f5948119cdc1c28697fd3110124dad974898 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 26 Apr 2011 09:49:32 +0300 Subject: UBIFS: remove duplicated code We have duplicated code in 'ubifs_garbage_collect()' and 'ubifs_rcvry_gc_commit()', which is about handling the special case of free LEB. In both cases we just want to garbage-collect the LEB using 'ubifs_garbage_collect_leb()'. This patch teaches 'ubifs_garbage_collect_leb()' to handle free LEB's so that the caller does not have to do this and the duplicated code is removed. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 151f108..082d21b 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -473,6 +473,37 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) ubifs_assert(c->gc_lnum != lnum); ubifs_assert(wbuf->lnum != lnum); + if (lp->free + lp->dirty == c->leb_size) { + /* Special case - a free LEB */ + dbg_gc("LEB %d is free, return it", lp->lnum); + ubifs_assert(!(lp->flags & LPROPS_INDEX)); + + if (lp->free != c->leb_size) { + /* + * Write buffers must be sync'd before unmapping + * freeable LEBs, because one of them may contain data + * which obsoletes something in 'lp->pnum'. + */ + err = gc_sync_wbufs(c); + if (err) + return err; + err = ubifs_change_one_lp(c, lp->lnum, c->leb_size, + 0, 0, 0, 0); + if (err) + return err; + } + err = ubifs_leb_unmap(c, lp->lnum); + if (err) + return err; + + if (c->gc_lnum == -1) { + c->gc_lnum = lnum; + return LEB_RETAINED; + } + + return LEB_FREED; + } + /* * We scan the entire LEB even though we only really need to scan up to * (c->leb_size - lp->free). @@ -682,37 +713,6 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) "(min. space %d)", lp.lnum, lp.free, lp.dirty, lp.free + lp.dirty, min_space); - if (lp.free + lp.dirty == c->leb_size) { - /* An empty LEB was returned */ - dbg_gc("LEB %d is free, return it", lp.lnum); - /* - * ubifs_find_dirty_leb() doesn't return freeable index - * LEBs. - */ - ubifs_assert(!(lp.flags & LPROPS_INDEX)); - if (lp.free != c->leb_size) { - /* - * Write buffers must be sync'd before - * unmapping freeable LEBs, because one of them - * may contain data which obsoletes something - * in 'lp.pnum'. - */ - ret = gc_sync_wbufs(c); - if (ret) - goto out; - ret = ubifs_change_one_lp(c, lp.lnum, - c->leb_size, 0, 0, 0, - 0); - if (ret) - goto out; - } - ret = ubifs_leb_unmap(c, lp.lnum); - if (ret) - goto out; - ret = lp.lnum; - break; - } - space_before = c->leb_size - wbuf->offs - wbuf->used; if (wbuf->lnum == -1) space_before = 0; diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 3dbad6f..1a72046 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -1125,25 +1125,10 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) } return err; } + ubifs_assert(!(lp.flags & LPROPS_INDEX)); lnum = lp.lnum; - if (lp.free + lp.dirty == c->leb_size) { - /* An empty LEB was returned */ - if (lp.free != c->leb_size) { - err = ubifs_change_one_lp(c, lnum, c->leb_size, - 0, 0, 0, 0); - if (err) - return err; - } - err = ubifs_leb_unmap(c, lnum); - if (err) - return err; - c->gc_lnum = lnum; - dbg_rcvry("allocated LEB %d for GC", lnum); - /* Run the commit */ - dbg_rcvry("committing"); - return ubifs_run_commit(c); - } + /* * There was no empty LEB so the used space in the dirtiest LEB must fit * in the GC head LEB. -- cgit v0.10.2 From bcdca3e10ac22c208955a63c907a0988550bc666 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 26 Apr 2011 10:07:50 +0300 Subject: UBIFS: remove dead GC LEB recovery piece of code This patch removes a piece of code in 'ubifs_rcvry_gc_commit()' which is never executed. We call 'ubifs_find_dirty_leb()' function with min_space = wbuf->offs, so if it returns us an LEB, it is guaranteed to have at lease 'wbuf->offs' bytes of free+dirty space. So we can remove the subsequent code which deals with "returned LEB has less than 'wbuf->offs' bytes of free+dirty space". This simplifies 'ubifs_rcvry_gc_commit()' a little. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 1a72046..3e0eedbe 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -1127,21 +1127,10 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) } ubifs_assert(!(lp.flags & LPROPS_INDEX)); + ubifs_assert(lp.free + lp.dirty >= wbuf->offs); lnum = lp.lnum; /* - * There was no empty LEB so the used space in the dirtiest LEB must fit - * in the GC head LEB. - */ - if (lp.free + lp.dirty < wbuf->offs) { - dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d", - lnum, wbuf->lnum, wbuf->offs); - err = ubifs_return_leb(c, lnum); - if (err) - return err; - goto find_free; - } - /* * We run the commit before garbage collection otherwise subsequent * mounts will see the GC and orphan deletion in a different order. */ -- cgit v0.10.2 From 6d5904e062d5cb28b705d49a35682e0871b3f382 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 26 Apr 2011 10:17:00 +0300 Subject: UBIFS: print useful debugging messages when cannot recover gc_lnum If we fail to recover the gc_lnum we just return an error and it then it is difficult to figure out why this happened. This patch adds useful debugging information which should make it easier to debug the failure. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 3e0eedbe..3d2598d 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -1177,6 +1177,8 @@ find_free: lnum = ubifs_find_free_leb_for_idx(c); if (lnum < 0) { dbg_err("could not find an empty LEB"); + dbg_dump_lprops(c); + dbg_dump_budg(c, &c->bi); return lnum; } /* And reset the index flag */ -- cgit v0.10.2 From ec0681426526b23d3e12cf247d64676806c30b7f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 26 Apr 2011 10:21:54 +0300 Subject: UBIFS: dump the stack on errors in failure mode too When UBIFS is in the failure mode (used for power cut emulation testing) we for some reasons do not dump the stack in many places, e.g., in assertions. Probably at early days we had too many of them and disabled this to make the development easier, but then never enabled. Nowadays I sometimes observe assertion failures during power cut testing, but the useful stackdump is not printed, which is bad. This patch makes UBIFS always print the stackdump when debugging is enabled. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 8934c12..f3e235f 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -107,10 +107,7 @@ struct ubifs_debug_info { } \ } while (0) -#define dbg_dump_stack() do { \ - if (!dbg_failure_mode) \ - dump_stack(); \ -} while (0) +#define dbg_dump_stack() dump_stack() /* Generic debugging messages */ #define dbg_msg(fmt, ...) do { \ -- cgit v0.10.2 From 447442139c764fd75cf892905d0feb08a9b983ed Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Wed, 27 Apr 2011 14:52:35 +0300 Subject: UBIFS: split ubifs_rcvry_gc_commit Split the 'ubifs_rcvry_gc_commit()' function and introduce a 'grab_empty_leb()' heler. This cleans 'ubifs_rcvry_gc_commit()' a little and makes it a bit less of spagetti. Also, add a commentary which explains why it is crucial to first search for an empty LEB and then run commit. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 3d2598d..11776ae 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -1070,6 +1070,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) } /** + * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit. + * @c: UBIFS file-system description object + * + * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty + * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns + * zero in case of success and a negative error code in case of failure. + */ +static int grab_empty_leb(struct ubifs_info *c) +{ + int lnum, err; + + /* + * Note, it is very important to first search for an empty LEB and then + * run the commit, not vice-versa. The reason is that there might be + * only one empty LEB at the moment, the one which has been the + * @c->gc_lnum just before the power cut happened. During the regular + * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no + * one but GC can grab it. But at this moment this single empty LEB is + * not marked as taken, so if we run commit - what happens? Right, the + * commit will grab it and write the index there. Remember that the + * index always expands as long as there is free space, and it only + * starts consolidating when we run out of space. + * + * IOW, if we run commit now, we might not be able to find a free LEB + * after this. + */ + lnum = ubifs_find_free_leb_for_idx(c); + if (lnum < 0) { + dbg_err("could not find an empty LEB"); + dbg_dump_lprops(c); + dbg_dump_budg(c, &c->bi); + return lnum; + } + + /* Reset the index flag */ + err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, + LPROPS_INDEX, 0); + if (err) + return err; + + c->gc_lnum = lnum; + dbg_rcvry("found empty LEB %d, run commit", lnum); + + return ubifs_run_commit(c); +} + +/** * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. * @c: UBIFS file-system description object * @@ -1096,7 +1143,7 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) c->gc_lnum = -1; if (wbuf->lnum == -1) { dbg_rcvry("no GC head LEB"); - goto find_free; + return grab_empty_leb(c); } /* * See whether the used space in the dirtiest LEB fits in the GC head @@ -1104,7 +1151,7 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) */ if (wbuf->offs == c->leb_size) { dbg_rcvry("no room in GC head LEB"); - goto find_free; + return grab_empty_leb(c); } err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); if (err) { @@ -1121,7 +1168,7 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) */ if (err == -ENOSPC) { dbg_rcvry("could not find a dirty LEB"); - goto find_free; + return grab_empty_leb(c); } return err; } @@ -1167,30 +1214,6 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) return err; dbg_rcvry("allocated LEB %d for GC", lnum); return 0; - -find_free: - /* - * There is no GC head LEB or the free space in the GC head LEB is too - * small, or there are not dirty LEBs. Allocate gc_lnum by calling - * 'ubifs_find_free_leb_for_idx()' so GC is not run. - */ - lnum = ubifs_find_free_leb_for_idx(c); - if (lnum < 0) { - dbg_err("could not find an empty LEB"); - dbg_dump_lprops(c); - dbg_dump_budg(c, &c->bi); - return lnum; - } - /* And reset the index flag */ - err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, - LPROPS_INDEX, 0); - if (err) - return err; - c->gc_lnum = lnum; - dbg_rcvry("allocated LEB %d for GC", lnum); - /* Run the commit */ - dbg_rcvry("committing"); - return ubifs_run_commit(c); } /** -- cgit v0.10.2 From fe79c05f03777acf87acc54fa3ad357ed5ee3b81 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 29 Apr 2011 16:35:46 +0300 Subject: UBIFS: refactor ubifs_rcvry_gc_commit This commits refactors and cleans up 'ubifs_rcvry_gc_commit()' which was quite untidy, also removes the commentary which was not 100% correct. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 11776ae..d28db1e 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -1138,44 +1138,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) { struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; struct ubifs_lprops lp; - int lnum, err; + int err; c->gc_lnum = -1; - if (wbuf->lnum == -1) { - dbg_rcvry("no GC head LEB"); - return grab_empty_leb(c); - } - /* - * See whether the used space in the dirtiest LEB fits in the GC head - * LEB. - */ - if (wbuf->offs == c->leb_size) { - dbg_rcvry("no room in GC head LEB"); + if (wbuf->lnum == -1 || wbuf->offs == c->leb_size) { + dbg_rcvry("no GC head: wbuf->lnum %d, wbuf->offs %d", + wbuf->lnum, wbuf->offs); return grab_empty_leb(c); } + err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); if (err) { - /* - * There are no dirty or empty LEBs subject to here being - * enough for the index. Try to use - * 'ubifs_find_free_leb_for_idx()', which will return any empty - * LEBs (ignoring index requirements). If the index then - * doesn't have enough LEBs the recovery commit will fail - - * which is the same result anyway i.e. recovery fails. So - * there is no problem ignoring index requirements and just - * grabbing a free LEB since we have already established there - * is not a dirty LEB we could have used instead. - */ - if (err == -ENOSPC) { - dbg_rcvry("could not find a dirty LEB"); - return grab_empty_leb(c); - } - return err; + if (err != -ENOSPC) + return err; + + dbg_rcvry("could not find a dirty LEB"); + return grab_empty_leb(c); } ubifs_assert(!(lp.flags & LPROPS_INDEX)); ubifs_assert(lp.free + lp.dirty >= wbuf->offs); - lnum = lp.lnum; /* * We run the commit before garbage collection otherwise subsequent @@ -1185,11 +1167,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) err = ubifs_run_commit(c); if (err) return err; - /* - * The data in the dirtiest LEB fits in the GC head LEB, so do the GC - * - use locking to keep 'ubifs_assert()' happy. - */ - dbg_rcvry("GC'ing LEB %d", lnum); + + dbg_rcvry("GC'ing LEB %d", lp.lnum); mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); err = ubifs_garbage_collect_leb(c, &lp); if (err >= 0) { @@ -1205,14 +1184,16 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) err = -EINVAL; return err; } - if (err != LEB_RETAINED) { - dbg_err("GC returned %d", err); + + ubifs_assert(err == LEB_RETAINED); + if (err != LEB_RETAINED) return -EINVAL; - } + err = ubifs_leb_unmap(c, c->gc_lnum); if (err) return err; - dbg_rcvry("allocated LEB %d for GC", lnum); + + dbg_rcvry("allocated LEB %d for GC", lp.lnum); return 0; } -- cgit v0.10.2 From 4c9545200a3bd9e87b36475c263034a38be02e44 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 2 May 2011 21:43:54 +0300 Subject: UBIFS: fix debugging message When recovering the inode size, one of the debugging messages was printed incorrecly, this patches fixes it. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index d28db1e..596dede 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -1496,7 +1496,7 @@ int ubifs_recover_size(struct ubifs_info *c) if (inode->i_size < e->d_size) { dbg_rcvry("ino %lu size %lld -> %lld", (unsigned long)e->inum, - e->d_size, inode->i_size); + inode->i_size, e->d_size); inode->i_size = e->d_size; ubifs_inode(inode)->ui_size = e->d_size; e->inode = inode; -- cgit v0.10.2 From 69f8a75a7d9db05a7ee708514d605ab74956c73e Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 2 May 2011 21:51:17 +0300 Subject: UBIFS: remove an unneeded check In 'ubifs_recover_size()' we have an "if (!e->inode && c->ro_mount)" statement. But if 'c->ro_mount' is true, then '!e->inode' must always be true as well. So we can remove the unnecessary '!e->inode' test and put an 'ubifs_assert(!e->inode)' instead. This patch also removes an extra trailing white-space in a debugging print, as well as adds few empty lines to 'ubifs_recover_size()' to make it a bit more readable. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 596dede..d6c8ce3 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -1436,7 +1436,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); if (err) goto out; - dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", + dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", (unsigned long)e->inum, lnum, offs, i_size, e->d_size); return 0; @@ -1485,11 +1485,14 @@ int ubifs_recover_size(struct ubifs_info *c) e->i_size = le64_to_cpu(ino->size); } } + if (e->exists && e->i_size < e->d_size) { - if (!e->inode && c->ro_mount) { + if (c->ro_mount) { /* Fix the inode size and pin it in memory */ struct inode *inode; + ubifs_assert(!e->inode); + inode = ubifs_iget(c->vfs_sb, e->inum); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -1513,9 +1516,11 @@ int ubifs_recover_size(struct ubifs_info *c) iput(e->inode); } } + this = rb_next(this); rb_erase(&e->rb, &c->size_tree); kfree(e); } + return 0; } -- cgit v0.10.2 From 45cd5cddbfbdf0993dbc76d06ed77d0bf547b421 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 2 May 2011 22:34:39 +0300 Subject: UBIFS: fix debugging FS checking failure When the debugging self-checks are enabled, we go trough whole file-system after mount and check/validate every single node referred to by the index. This is implemented by the 'dbg_check_filesystem()' function. However, this function fails if we mount "unclean" file-system, i.e., if we mount the file-system after a power cut. It fails with the following symptoms: UBIFS DBG (pid 8171): ubifs_recover_size: ino 937 size 3309925 -> 3317760 UBIFS: recovery deferred UBIFS error (pid 8171): check_leaf: data node at LEB 1000:0 is not within inode size 3309925 The reason of failure is that recovery fixed up the inode size in memory, but not on the flash so far. So the value on the flash is incorrect so far, and would be corrected when we re-mount R/W. But 'check_leaf()' ignores this fact and tries to validate the size of the on-flash inode, which is incorrect, so it fails. This patch teaches the checking code to look at the VFS inode cache first, and if there is the inode in question, use that inode instead of the inode on the flash media. This fixes the issue. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index f7515bd..b5ba2ea 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -1818,6 +1818,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, struct rb_node **p, *parent = NULL; struct fsck_inode *fscki; ino_t inum = key_inum_flash(c, &ino->key); + struct inode *inode; + struct ubifs_inode *ui; p = &fsckd->inodes.rb_node; while (*p) { @@ -1841,19 +1843,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, if (!fscki) return ERR_PTR(-ENOMEM); + inode = ilookup(c->vfs_sb, inum); + fscki->inum = inum; - fscki->nlink = le32_to_cpu(ino->nlink); - fscki->size = le64_to_cpu(ino->size); - fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); - fscki->xattr_sz = le32_to_cpu(ino->xattr_size); - fscki->xattr_nms = le32_to_cpu(ino->xattr_names); - fscki->mode = le32_to_cpu(ino->mode); + /* + * If the inode is present in the VFS inode cache, use it instead of + * the on-flash inode which might be out-of-date. E.g., the size might + * be out-of-date. If we do not do this, the following may happen, for + * example: + * 1. A power cut happens + * 2. We mount the file-system R/O, the replay process fixes up the + * inode size in the VFS cache, but on on-flash. + * 3. 'check_leaf()' fails because it hits a data node beyond inode + * size. + */ + if (!inode) { + fscki->nlink = le32_to_cpu(ino->nlink); + fscki->size = le64_to_cpu(ino->size); + fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); + fscki->xattr_sz = le32_to_cpu(ino->xattr_size); + fscki->xattr_nms = le32_to_cpu(ino->xattr_names); + fscki->mode = le32_to_cpu(ino->mode); + } else { + ui = ubifs_inode(inode); + fscki->nlink = inode->i_nlink; + fscki->size = inode->i_size; + fscki->xattr_cnt = ui->xattr_cnt; + fscki->xattr_sz = ui->xattr_size; + fscki->xattr_nms = ui->xattr_names; + fscki->mode = inode->i_mode; + iput(inode); + } + if (S_ISDIR(fscki->mode)) { fscki->calc_sz = UBIFS_INO_NODE_SZ; fscki->calc_cnt = 2; } + rb_link_node(&fscki->rb, parent, p); rb_insert_color(&fscki->rb, &fsckd->inodes); + return fscki; } -- cgit v0.10.2 From c1f1f91d2183b91c684900b529b6c336ad3dd27c Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Thu, 5 May 2011 14:16:32 +0300 Subject: UBIFS: fix inode size debugging check failure This patch fixes a problem with the following symptoms: UBIFS: deferred recovery completed UBIFS error (pid 15676): dbg_check_synced_i_size: ui_size is 11481088, synced_i_size is 11459081, but inode is clean UBIFS error (pid 15676): dbg_check_synced_i_size: i_ino 128, i_mode 0x81a4, i_size 11481088 It happens when additional debugging checks are enabled and we are recovering from a power cut. When we fixup corrupted inode size during recovery, we change them in-place and we change ui_size as well, but not synced_i_size, which causes this failure. This patch makes sure we change both fields and fixes the issue. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index d6c8ce3..3f41a0c 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -1490,18 +1490,22 @@ int ubifs_recover_size(struct ubifs_info *c) if (c->ro_mount) { /* Fix the inode size and pin it in memory */ struct inode *inode; + struct ubifs_inode *ui; ubifs_assert(!e->inode); inode = ubifs_iget(c->vfs_sb, e->inum); if (IS_ERR(inode)) return PTR_ERR(inode); + + ui = ubifs_inode(inode); if (inode->i_size < e->d_size) { dbg_rcvry("ino %lu size %lld -> %lld", (unsigned long)e->inum, inode->i_size, e->d_size); inode->i_size = e->d_size; - ubifs_inode(inode)->ui_size = e->d_size; + ui->ui_size = e->d_size; + ui->synced_i_size = e->d_size; e->inode = inode; this = rb_next(this); continue; -- cgit v0.10.2 From eaeee242c531cd4b0a4a46e8b5dd7ef504380c42 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 6 May 2011 17:08:56 +0300 Subject: UBIFS: fix a rare memory leak in ro to rw remounting path When re-mounting from R/O mode to R/W mode and the LEB count in the superblock is not up-to date, because for the underlying UBI volume became larger, we re-write the superblock. We allocate RAM for these purposes, but never free it. So this is a memory leak, although very rare one. Signed-off-by: Artem Bityutskiy Cc: stable@kernel.org diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index bf31b47..cad60b5 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -475,7 +475,8 @@ failed: * @c: UBIFS file-system description object * * This function returns a pointer to the superblock node or a negative error - * code. + * code. Note, the user of this function is responsible of kfree()'ing the + * returned superblock buffer. */ struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) { diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 407c064..575ea83 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1585,6 +1585,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) } sup->leb_cnt = cpu_to_le32(c->leb_cnt); err = ubifs_write_sb_node(c, sup); + kfree(sup); if (err) goto out; } -- cgit v0.10.2 From 8ca5175b02b77178a70cbb9fd7020c4938e3d3a6 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 6 May 2011 20:41:48 +0300 Subject: UBIFS: improve debugging lprops scanning a little When doing the lprops extra check ('dbg_check_lprops()') we scan whole media. We even scan empty and freeable LEBs which may contain garbage, which we handle after scanning. This patch teach the lprops checking function ('scan_check_cb()') to avoid scanning for free and freeable LEBs and save time. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 0ee0847..ce9fe39 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -1100,32 +1100,26 @@ static int scan_check_cb(struct ubifs_info *c, goto out; } + /* + * After an unclean unmount, empty and freeable LEBs + * may contain garbage - do not scan them. + */ + if (lp->free == c->leb_size) { + lst->empty_lebs += 1; + lst->total_free += c->leb_size; + lst->total_dark += ubifs_calc_dark(c, c->leb_size); + return LPT_SCAN_CONTINUE; + } + if (lp->free + lp->dirty == c->leb_size && + !(lp->flags & LPROPS_INDEX)) { + lst->total_free += lp->free; + lst->total_dirty += lp->dirty; + lst->total_dark += ubifs_calc_dark(c, c->leb_size); + return LPT_SCAN_CONTINUE; + } + sleb = ubifs_scan(c, lnum, 0, buf, 0); if (IS_ERR(sleb)) { - /* - * After an unclean unmount, empty and freeable LEBs - * may contain garbage. - */ - if (lp->free == c->leb_size) { - ubifs_err("scan errors were in empty LEB " - "- continuing checking"); - lst->empty_lebs += 1; - lst->total_free += c->leb_size; - lst->total_dark += ubifs_calc_dark(c, c->leb_size); - ret = LPT_SCAN_CONTINUE; - goto exit; - } - - if (lp->free + lp->dirty == c->leb_size && - !(lp->flags & LPROPS_INDEX)) { - ubifs_err("scan errors were in freeable LEB " - "- continuing checking"); - lst->total_free += lp->free; - lst->total_dirty += lp->dirty; - lst->total_dark += ubifs_calc_dark(c, c->leb_size); - ret = LPT_SCAN_CONTINUE; - goto exit; - } data->err = PTR_ERR(sleb); ret = LPT_SCAN_STOP; goto exit; -- cgit v0.10.2 From dcc50c8ee334d570cef84fc956084d567f32a8a3 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 6 May 2011 20:47:14 +0300 Subject: UBIFS: simplify error path in lprops debugging check Simplify error path in 'scan_check_cb()' and stop using the special 'data->err' field, but instead return the error code directly. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index ce9fe39..ac4521a 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -1044,7 +1044,7 @@ static int scan_check_cb(struct ubifs_info *c, if (cat != (lp->flags & LPROPS_CAT_MASK)) { ubifs_err("bad LEB category %d expected %d", (lp->flags & LPROPS_CAT_MASK), cat); - goto out; + return -EINVAL; } } @@ -1078,7 +1078,7 @@ static int scan_check_cb(struct ubifs_info *c, } if (!found) { ubifs_err("bad LPT list (category %d)", cat); - goto out; + return -EINVAL; } } } @@ -1090,15 +1090,13 @@ static int scan_check_cb(struct ubifs_info *c, if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || lp != heap->arr[lp->hpos]) { ubifs_err("bad LPT heap (category %d)", cat); - goto out; + return -EINVAL; } } buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); - if (!buf) { - ubifs_err("cannot allocate memory to scan LEB %d", lnum); - goto out; - } + if (!buf) + return -ENOMEM; /* * After an unclean unmount, empty and freeable LEBs @@ -1120,9 +1118,8 @@ static int scan_check_cb(struct ubifs_info *c, sleb = ubifs_scan(c, lnum, 0, buf, 0); if (IS_ERR(sleb)) { - data->err = PTR_ERR(sleb); - ret = LPT_SCAN_STOP; - goto exit; + ret = PTR_ERR(sleb); + goto out; } is_idx = -1; @@ -1240,10 +1237,8 @@ static int scan_check_cb(struct ubifs_info *c, } ubifs_scan_destroy(sleb); - ret = LPT_SCAN_CONTINUE; -exit: vfree(buf); - return ret; + return LPT_SCAN_CONTINUE; out_print: ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " @@ -1252,10 +1247,10 @@ out_print: dbg_dump_leb(c, lnum); out_destroy: ubifs_scan_destroy(sleb); + ret = -EINVAL; out: vfree(buf); - data->err = -EINVAL; - return LPT_SCAN_STOP; + return ret; } /** -- cgit v0.10.2 From 34bdc3e2578cae3162e481203a2980d55e184a73 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 6 May 2011 20:58:08 +0300 Subject: UBIFS: simplify lprops debugging check Now we return all errors from 'scan_check_cb()' directly, so we do not need 'struct scan_check_data' any more, and this patch removes it. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index ac4521a..77c541b 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -1007,21 +1007,11 @@ out: } /** - * struct scan_check_data - data provided to scan callback function. - * @lst: LEB properties statistics - * @err: error code - */ -struct scan_check_data { - struct ubifs_lp_stats lst; - int err; -}; - -/** * scan_check_cb - scan callback. * @c: the UBIFS file-system description object * @lp: LEB properties to scan * @in_tree: whether the LEB properties are in main memory - * @data: information passed to and from the caller of the scan + * @lst: lprops statistics to update * * This function returns a code that indicates whether the scan should continue * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree @@ -1030,11 +1020,10 @@ struct scan_check_data { */ static int scan_check_cb(struct ubifs_info *c, const struct ubifs_lprops *lp, int in_tree, - struct scan_check_data *data) + struct ubifs_lp_stats *lst) { struct ubifs_scan_leb *sleb; struct ubifs_scan_node *snod; - struct ubifs_lp_stats *lst = &data->lst; int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; void *buf = NULL; @@ -1267,8 +1256,7 @@ out: int dbg_check_lprops(struct ubifs_info *c) { int i, err; - struct scan_check_data data; - struct ubifs_lp_stats *lst = &data.lst; + struct ubifs_lp_stats lst; if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) return 0; @@ -1283,29 +1271,23 @@ int dbg_check_lprops(struct ubifs_info *c) return err; } - memset(lst, 0, sizeof(struct ubifs_lp_stats)); - - data.err = 0; + memset(&lst, 0, sizeof(struct ubifs_lp_stats)); err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, (ubifs_lpt_scan_callback)scan_check_cb, - &data); + &lst); if (err && err != -ENOSPC) goto out; - if (data.err) { - err = data.err; - goto out; - } - if (lst->empty_lebs != c->lst.empty_lebs || - lst->idx_lebs != c->lst.idx_lebs || - lst->total_free != c->lst.total_free || - lst->total_dirty != c->lst.total_dirty || - lst->total_used != c->lst.total_used) { + if (lst.empty_lebs != c->lst.empty_lebs || + lst.idx_lebs != c->lst.idx_lebs || + lst.total_free != c->lst.total_free || + lst.total_dirty != c->lst.total_dirty || + lst.total_used != c->lst.total_used) { ubifs_err("bad overall accounting"); ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " "total_free %lld, total_dirty %lld, total_used %lld", - lst->empty_lebs, lst->idx_lebs, lst->total_free, - lst->total_dirty, lst->total_used); + lst.empty_lebs, lst.idx_lebs, lst.total_free, + lst.total_dirty, lst.total_used); ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " "total_free %lld, total_dirty %lld, total_used %lld", c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, @@ -1314,11 +1296,11 @@ int dbg_check_lprops(struct ubifs_info *c) goto out; } - if (lst->total_dead != c->lst.total_dead || - lst->total_dark != c->lst.total_dark) { + if (lst.total_dead != c->lst.total_dead || + lst.total_dark != c->lst.total_dark) { ubifs_err("bad dead/dark space accounting"); ubifs_err("calculated: total_dead %lld, total_dark %lld", - lst->total_dead, lst->total_dark); + lst.total_dead, lst.total_dark); ubifs_err("read from lprops: total_dead %lld, total_dark %lld", c->lst.total_dead, c->lst.total_dark); err = -EINVAL; -- cgit v0.10.2 From 12346037a71809197b22e223c3a719190a26172d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 6 May 2011 21:23:25 +0300 Subject: UBIFS: dump more in the lprops debugging check Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 77c541b..667884f 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -1108,6 +1108,10 @@ static int scan_check_cb(struct ubifs_info *c, sleb = ubifs_scan(c, lnum, 0, buf, 0); if (IS_ERR(sleb)) { ret = PTR_ERR(sleb); + if (ret == -EUCLEAN) { + dbg_dump_lprops(c); + dbg_dump_budg(c, &c->bi); + } goto out; } -- cgit v0.10.2 From c839e29768059c56ef233586e66a2a66edbe93a3 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 13 May 2011 12:26:54 +0300 Subject: UBIFS: improve debugging messages Print a bit more information is some recovery and replay paths. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 3f41a0c..42b4512 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -1140,12 +1140,11 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) struct ubifs_lprops lp; int err; + dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs); + c->gc_lnum = -1; - if (wbuf->lnum == -1 || wbuf->offs == c->leb_size) { - dbg_rcvry("no GC head: wbuf->lnum %d, wbuf->offs %d", - wbuf->lnum, wbuf->offs); + if (wbuf->lnum == -1 || wbuf->offs == c->leb_size) return grab_empty_leb(c); - } err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); if (err) { diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index c29c468..0052663 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -509,7 +509,7 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, struct ubifs_scan_node *snod; struct ubifs_bud *bud; - dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); + dbg_mnt("replay bud LEB %d, head %d, offs %d", lnum, jhead, offs); if (c->need_recovery) sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); else @@ -636,6 +636,7 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, *dirty = sleb->endpt - offs - used; *free = c->leb_size - sleb->endpt; + dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, *dirty, *free); out: ubifs_scan_destroy(sleb); -- cgit v0.10.2 From 7a9c3e399389723f01be3dab50991604d2bc0409 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 13 May 2011 13:02:00 +0300 Subject: UBIFS: improve commentary This is a tiny clean-up patch which improves replay commentaries. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 0052663..e27346f 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -125,10 +125,11 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) * zero which means the LEB must have been empty. The LEB * property values should be lp->free == c->leb_size and * lp->dirty == 0, but that is not the case. The reason is that - * the LEB was garbage collected. The garbage collector resets - * the free and dirty space without recording it anywhere except - * lprops, so if there is not a commit then lprops does not have - * that information next time the file system is mounted. + * the LEB had been garbage collected before it became the bud, + * and there was not commit inbetween. The garbage collector + * resets the free and dirty space without recording it + * anywhere except lprops, so if there was no commit then + * lprops does not have that information. * * We do not need to adjust free space because the scan has told * us the exact value which is recorded in the replay entry as -- cgit v0.10.2 From e9ef7b5f25d31c5660fb4a87f4b40ac48070fcb7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 13 May 2011 13:22:19 +0300 Subject: UBIFS: make 2 functions static This is a minor change which makes 2 functions static because they are not used outside the gc.c file: 'data_nodes_cmp()' and 'nondata_nodes_cmp()'. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 082d21b..d70937b 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -118,7 +118,7 @@ static int switch_gc_head(struct ubifs_info *c) * This function compares data nodes @a and @b. Returns %1 if @a has greater * inode or block number, and %-1 otherwise. */ -int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) +static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) { ino_t inuma, inumb; struct ubifs_info *c = priv; @@ -161,7 +161,8 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) * first and sorted by length in descending order. Directory entry nodes go * after inode nodes and are sorted in ascending hash valuer order. */ -int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) +static int nondata_nodes_cmp(void *priv, struct list_head *a, + struct list_head *b) { ino_t inuma, inumb; struct ubifs_info *c = priv; -- cgit v0.10.2 From 7703f09ded1b8719d2defe0f61215b4a08702ffa Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 13 May 2011 16:02:19 +0300 Subject: UBIFS: double check that buds are replied in order Commit 52c6e6f990669deac3f370f1603815adb55a1dbd provides misleading infomation in the commit messages - buds are replied in order. And the real reason why that fix helped is probably because it made sure we seek head even in read-only mode (so deferred recovery will have seeked heads). This patch adds an assertion which will fire if we reply buds out of order. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index e27346f..5e81503 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -713,6 +713,7 @@ static int replay_buds(struct ubifs_info *c) { struct bud_entry *b; int err, uninitialized_var(free), uninitialized_var(dirty); + unsigned long long prev_sqnum = 0; list_for_each_entry(b, &c->replay_buds, list) { err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, @@ -723,6 +724,9 @@ static int replay_buds(struct ubifs_info *c) free, dirty, b->bud->jhead); if (err) return err; + + ubifs_assert(b->sqnum > prev_sqnum); + prev_sqnum = b->sqnum; } return 0; -- cgit v0.10.2 From 12f338914e7f2a9ab0def09564b9e78ab45a474f Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sat, 14 May 2011 17:37:47 +0300 Subject: UBIFS: remove unnecessary stack variable This is patch removes an unnecessary 'offs' variable from 'ubifs_wbuf_write_nolock()' - we can just keep 'wbuf->offs' up-to-date instead. This patch is very minor the only motivation for it was that it is cleaner to keep wbuf->offs up-to-date by the time we call 'ubifs_leb_write()'. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 6cc0931..67bbde3 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -573,7 +573,7 @@ out_timers: int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) { struct ubifs_info *c = wbuf->c; - int err, written, n, aligned_len = ALIGN(len, 8), offs; + int err, written, n, aligned_len = ALIGN(len, 8); dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, dbg_ntype(((struct ubifs_ch *)buf)->node_type), @@ -636,7 +636,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) goto exit; } - offs = wbuf->offs; written = 0; if (wbuf->used) { @@ -653,7 +652,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) if (err) goto out; - offs += wbuf->size; + wbuf->offs += wbuf->size; len -= wbuf->avail; aligned_len -= wbuf->avail; written += wbuf->avail; @@ -672,7 +671,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) if (err) goto out; - offs += wbuf->size; + wbuf->offs += wbuf->size; len -= wbuf->size; aligned_len -= wbuf->size; written += wbuf->size; @@ -687,12 +686,13 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) n = aligned_len >> c->max_write_shift; if (n) { n <<= c->max_write_shift; - dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); - err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, - wbuf->dtype); + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, + wbuf->offs); + err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, + wbuf->offs, n, wbuf->dtype); if (err) goto out; - offs += n; + wbuf->offs += n; aligned_len -= n; len -= n; written += n; @@ -707,7 +707,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) */ memcpy(wbuf->buf, buf + written, len); - wbuf->offs = offs; if (c->leb_size - wbuf->offs >= c->max_write_size) wbuf->size = c->max_write_size; else -- cgit v0.10.2 From af1dd412646a58b29522f93f7befa944d7d361c0 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 15 May 2011 11:14:57 +0300 Subject: UBIFS: store free and dirty space in the bud replay entry This is just a small preparation patch which adds 'free' and 'drity' fields to 'struct bud_entry'. They will be used to set bud lprops. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 5e81503..ee2f0b2 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -90,14 +90,16 @@ struct replay_entry { * struct bud_entry - entry in the list of buds to replay. * @list: next bud in the list * @bud: bud description object - * @free: free bytes in the bud * @sqnum: reference node sequence number + * @free: free bytes in the bud + * @dirty: dirty bytes in the bud */ struct bud_entry { struct list_head list; struct ubifs_bud *bud; - int free; unsigned long long sqnum; + int free; + int dirty; }; /** @@ -720,6 +722,8 @@ static int replay_buds(struct ubifs_info *c) &free, &dirty); if (err) return err; + b->free = free; + b->dirty = dirty; err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, free, dirty, b->bud->jhead); if (err) -- cgit v0.10.2 From 074bcb9b5ce698bd7b02ddb469da9cba21fe83fd Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 15 May 2011 11:37:17 +0300 Subject: UBIFS: simplify replay This patch simplifies the replay code and makes it smaller. First of all, we can notice that we do not really need to create bud replay entries and insert them to the replay tree, because the only reason we do this is to set buds lprops correctly at the end. Instead, we can just walk the list of buds at the very end and set lprops for each bud. This allows us to get rid of whole 'insert_ref_node()' function, the 'REPLAY_REF' flag, and several fields in 'struct replay_entry'. Then we can also notice that we do not need the 'flags' 'struct replay_entry' field, because there is only one flag - 'REPLAY_DELETION'. Instead, we can just add a 'deletion' bit fields. As a result, this patch deletes much more lines that in adds. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index ee2f0b2..08f5036 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -34,32 +34,18 @@ #include "ubifs.h" -/* - * Replay flags. - * - * REPLAY_DELETION: node was deleted - * REPLAY_REF: node is a reference node - */ -enum { - REPLAY_DELETION = 1, - REPLAY_REF = 2, -}; - /** * struct replay_entry - replay tree entry. * @lnum: logical eraseblock number of the node * @offs: node offset * @len: node length + * @deletion: non-zero if this entry corresponds to a node deletion * @sqnum: node sequence number - * @flags: replay flags * @rb: links the replay tree * @key: node key * @nm: directory entry name * @old_size: truncation old size * @new_size: truncation new size - * @free: amount of free space in a bud - * @dirty: amount of dirty space in a bud from padding and deletion nodes - * @jhead: journal head number of the bud * * UBIFS journal replay must compare node sequence numbers, which means it must * build a tree of node information to insert into the TNC. @@ -68,8 +54,8 @@ struct replay_entry { int lnum; int offs; int len; + unsigned int deletion:1; unsigned long long sqnum; - int flags; struct rb_node rb; union ubifs_key key; union { @@ -78,11 +64,6 @@ struct replay_entry { loff_t old_size; loff_t new_size; }; - struct { - int free; - int dirty; - int jhead; - }; }; }; @@ -105,28 +86,32 @@ struct bud_entry { /** * set_bud_lprops - set free and dirty space used by a bud. * @c: UBIFS file-system description object - * @r: replay entry of bud + * @b: bud entry which describes the bud + * + * This function makes sure the LEB properties of bud @b are set correctly + * after the replay. Returns zero in case of success and a negative error code + * in case of failure. */ -static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) +static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) { const struct ubifs_lprops *lp; int err = 0, dirty; ubifs_get_lprops(c); - lp = ubifs_lpt_lookup_dirty(c, r->lnum); + lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); if (IS_ERR(lp)) { err = PTR_ERR(lp); goto out; } dirty = lp->dirty; - if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { + if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { /* * The LEB was added to the journal with a starting offset of * zero which means the LEB must have been empty. The LEB - * property values should be lp->free == c->leb_size and - * lp->dirty == 0, but that is not the case. The reason is that + * property values should be @lp->free == @c->leb_size and + * @lp->dirty == 0, but that is not the case. The reason is that * the LEB had been garbage collected before it became the bud, * and there was not commit inbetween. The garbage collector * resets the free and dirty space without recording it @@ -135,15 +120,15 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) * * We do not need to adjust free space because the scan has told * us the exact value which is recorded in the replay entry as - * r->free. + * @b->free. * * However we do need to subtract from the dirty space the * amount of space that the garbage collector reclaimed, which * is the whole LEB minus the amount of space that was free. */ - dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, + dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, lp->free, lp->dirty); - dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, + dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, lp->free, lp->dirty); dirty -= c->leb_size - lp->free; /* @@ -155,10 +140,10 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) */ if (dirty != 0) dbg_msg("LEB %d lp: %d free %d dirty " - "replay: %d free %d dirty", r->lnum, lp->free, - lp->dirty, r->free, r->dirty); + "replay: %d free %d dirty", b->bud->lnum, + lp->free, lp->dirty, b->free, b->dirty); } - lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, + lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, lp->flags | LPROPS_TAKEN, 0); if (IS_ERR(lp)) { err = PTR_ERR(lp); @@ -166,8 +151,9 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) } /* Make sure the journal head points to the latest bud */ - err = ubifs_wbuf_seek_nolock(&c->jheads[r->jhead].wbuf, r->lnum, - c->leb_size - r->free, UBI_SHORTTERM); + err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, + b->bud->lnum, c->leb_size - b->free, + UBI_SHORTTERM); out: ubifs_release_lprops(c); @@ -175,6 +161,27 @@ out: } /** + * set_buds_lprops - set free and dirty space for all replayed buds. + * @c: UBIFS file-system description object + * + * This function sets LEB properties for all replayed buds. Returns zero in + * case of success and a negative error code in case of failure. + */ +static int set_buds_lprops(struct ubifs_info *c) +{ + struct bud_entry *b; + int err; + + list_for_each_entry(b, &c->replay_buds, list) { + err = set_bud_lprops(c, b); + if (err) + return err; + } + + return 0; +} + +/** * trun_remove_range - apply a replay entry for a truncation to the TNC. * @c: UBIFS file-system description object * @r: replay entry of truncation @@ -210,24 +217,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) */ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) { - int err, deletion = ((r->flags & REPLAY_DELETION) != 0); + int err; - dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, - r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); + dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum, + r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key)); /* Set c->replay_sqnum to help deal with dangling branches. */ c->replay_sqnum = r->sqnum; - if (r->flags & REPLAY_REF) - err = set_bud_lprops(c, r); - else if (is_hash_key(c, &r->key)) { - if (deletion) + if (is_hash_key(c, &r->key)) { + if (r->deletion) err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); else err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, r->len, &r->nm); } else { - if (deletion) + if (r->deletion) switch (key_type(c, &r->key)) { case UBIFS_INO_KEY: { @@ -250,7 +255,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) return err; if (c->need_recovery) - err = ubifs_recover_size_accum(c, &r->key, deletion, + err = ubifs_recover_size_accum(c, &r->key, r->deletion, r->new_size); } @@ -373,11 +378,11 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, r->lnum = lnum; r->offs = offs; r->len = len; + r->deletion = !!deletion; r->sqnum = sqnum; - r->flags = (deletion ? REPLAY_DELETION : 0); + key_copy(c, key, &r->key); r->old_size = old_size; r->new_size = new_size; - key_copy(c, key, &r->key); rb_link_node(&r->rb, parent, p); rb_insert_color(&r->rb, &c->replay_tree); @@ -445,13 +450,13 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, r->lnum = lnum; r->offs = offs; r->len = len; + r->deletion = !!deletion; r->sqnum = sqnum; + key_copy(c, key, &r->key); r->nm.len = nlen; memcpy(nbuf, name, nlen); nbuf[nlen] = '\0'; r->nm.name = nbuf; - r->flags = (deletion ? REPLAY_DELETION : 0); - key_copy(c, key, &r->key); ubifs_assert(!*p); rb_link_node(&r->rb, parent, p); @@ -653,58 +658,6 @@ out_dump: } /** - * insert_ref_node - insert a reference node to the replay tree. - * @c: UBIFS file-system description object - * @lnum: node logical eraseblock number - * @offs: node offset - * @sqnum: sequence number - * @free: amount of free space in bud - * @dirty: amount of dirty space from padding and deletion nodes - * @jhead: journal head number for the bud - * - * This function inserts a reference node to the replay tree and returns zero - * in case of success or a negative error code in case of failure. - */ -static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, - unsigned long long sqnum, int free, int dirty, - int jhead) -{ - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; - struct replay_entry *r; - - dbg_mnt("add ref LEB %d:%d", lnum, offs); - while (*p) { - parent = *p; - r = rb_entry(parent, struct replay_entry, rb); - if (sqnum < r->sqnum) { - p = &(*p)->rb_left; - continue; - } else if (sqnum > r->sqnum) { - p = &(*p)->rb_right; - continue; - } - ubifs_err("duplicate sqnum in replay tree"); - return -EINVAL; - } - - r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); - if (!r) - return -ENOMEM; - - r->lnum = lnum; - r->offs = offs; - r->sqnum = sqnum; - r->flags = REPLAY_REF; - r->free = free; - r->dirty = dirty; - r->jhead = jhead; - - rb_link_node(&r->rb, parent, p); - rb_insert_color(&r->rb, &c->replay_tree); - return 0; -} - -/** * replay_buds - replay all buds. * @c: UBIFS file-system description object * @@ -714,18 +667,12 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, static int replay_buds(struct ubifs_info *c) { struct bud_entry *b; - int err, uninitialized_var(free), uninitialized_var(dirty); + int err; unsigned long long prev_sqnum = 0; list_for_each_entry(b, &c->replay_buds, list) { err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, - &free, &dirty); - if (err) - return err; - b->free = free; - b->dirty = dirty; - err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, - free, dirty, b->bud->jhead); + &b->free, &b->dirty); if (err) return err; @@ -1074,6 +1021,10 @@ int ubifs_replay_journal(struct ubifs_info *c) if (err) goto out; + err = set_buds_lprops(c); + if (err) + goto out; + /* * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable * to roughly estimate index growth. Things like @c->bi.min_idx_lebs -- cgit v0.10.2 From debf12d54182b324a01c4276b003669c94b7b531 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 15 May 2011 12:05:54 +0300 Subject: UBIFS: substitute the replay tree with a replay list This patch simplifies replay even further - it removes the replay tree and adds the replay list instead. Indeed, we just do not need to use a tree here - all we need to do is to add all nodes to the list and then sort it. Using RB-tree is an overkill - more code and slower. And since we replay buds in order, we expect the nodes to follow in _mostly_ sorted order, so the merge sort becomes much cheaper in average than an RB-tree. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 08f5036..47915a7 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -33,22 +33,24 @@ */ #include "ubifs.h" +#include /** - * struct replay_entry - replay tree entry. + * struct replay_entry - replay list entry. * @lnum: logical eraseblock number of the node * @offs: node offset * @len: node length * @deletion: non-zero if this entry corresponds to a node deletion * @sqnum: node sequence number - * @rb: links the replay tree + * @list: links the replay list * @key: node key * @nm: directory entry name * @old_size: truncation old size * @new_size: truncation new size * - * UBIFS journal replay must compare node sequence numbers, which means it must - * build a tree of node information to insert into the TNC. + * The replay process first scans all buds and builds the replay list, then + * sorts the replay list in nodes sequence number order, and then inserts all + * the replay entries to the TNC. */ struct replay_entry { int lnum; @@ -56,7 +58,7 @@ struct replay_entry { int len; unsigned int deletion:1; unsigned long long sqnum; - struct rb_node rb; + struct list_head list; union ubifs_key key; union { struct qstr nm; @@ -263,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) } /** - * destroy_replay_tree - destroy the replay. - * @c: UBIFS file-system description object + * replay_entries_cmp - compare 2 replay entries. + * @priv: UBIFS file-system description object + * @a: first replay entry + * @a: second replay entry * - * Destroy the replay tree. + * This is a comparios function for 'list_sort()' which compares 2 replay + * entries @a and @b by comparing their sequence numer. Returns %1 if @a has + * greater sequence number and %-1 otherwise. */ -static void destroy_replay_tree(struct ubifs_info *c) +static int replay_entries_cmp(void *priv, struct list_head *a, + struct list_head *b) { - struct rb_node *this = c->replay_tree.rb_node; - struct replay_entry *r; - - while (this) { - if (this->rb_left) { - this = this->rb_left; - continue; - } else if (this->rb_right) { - this = this->rb_right; - continue; - } - r = rb_entry(this, struct replay_entry, rb); - this = rb_parent(this); - if (this) { - if (this->rb_left == &r->rb) - this->rb_left = NULL; - else - this->rb_right = NULL; - } - if (is_hash_key(c, &r->key)) - kfree(r->nm.name); - kfree(r); - } - c->replay_tree = RB_ROOT; + struct replay_entry *ra, *rb; + + cond_resched(); + if (a == b) + return 0; + + ra = list_entry(a, struct replay_entry, list); + rb = list_entry(b, struct replay_entry, list); + ubifs_assert(ra->sqnum != rb->sqnum); + if (ra->sqnum > rb->sqnum) + return 1; + return -1; } /** - * apply_replay_tree - apply the replay tree to the TNC. + * apply_replay_list - apply the replay list to the TNC. * @c: UBIFS file-system description object * - * Apply the replay tree. - * Returns zero in case of success and a negative error code in case of - * failure. + * Apply all entries in the replay list to the TNC. Returns zero in case of + * success and a negative error code in case of failure. */ -static int apply_replay_tree(struct ubifs_info *c) +static int apply_replay_list(struct ubifs_info *c) { - struct rb_node *this = rb_first(&c->replay_tree); + struct replay_entry *r; + int err; - while (this) { - struct replay_entry *r; - int err; + list_sort(c, &c->replay_list, &replay_entries_cmp); + list_for_each_entry(r, &c->replay_list, list) { cond_resched(); - r = rb_entry(this, struct replay_entry, rb); err = apply_replay_entry(c, r); if (err) return err; - this = rb_next(this); } + return 0; } /** - * insert_node - insert a node to the replay tree. + * destroy_replay_list - destroy the replay. + * @c: UBIFS file-system description object + * + * Destroy the replay list. + */ +static void destroy_replay_list(struct ubifs_info *c) +{ + struct replay_entry *r, *tmp; + + list_for_each_entry_safe(r, tmp, &c->replay_list, list) { + if (is_hash_key(c, &r->key)) + kfree(r->nm.name); + list_del(&r->list); + kfree(r); + } +} + +/** + * insert_node - insert a node to the replay list * @c: UBIFS file-system description object * @lnum: node logical eraseblock number * @offs: node offset @@ -336,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c) * @old_size: truncation old size * @new_size: truncation new size * - * This function inserts a scanned non-direntry node to the replay tree. The - * replay tree is an RB-tree containing @struct replay_entry elements which are - * indexed by the sequence number. The replay tree is applied at the very end - * of the replay process. Since the tree is sorted in sequence number order, - * the older modifications are applied first. This function returns zero in - * case of success and a negative error code in case of failure. + * This function inserts a scanned non-direntry node to the replay list. The + * replay list contains @struct replay_entry elements, and we sort this list in + * sequence number order before applying it. The replay list is applied at the + * very end of the replay process. Since the list is sorted in sequence number + * order, the older modifications are applied first. This function returns zero + * in case of success and a negative error code in case of failure. */ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, union ubifs_key *key, unsigned long long sqnum, int deletion, int *used, loff_t old_size, loff_t new_size) { - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; struct replay_entry *r; + dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); + if (key_inum(c, key) >= c->highest_inum) c->highest_inum = key_inum(c, key); - dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); - while (*p) { - parent = *p; - r = rb_entry(parent, struct replay_entry, rb); - if (sqnum < r->sqnum) { - p = &(*p)->rb_left; - continue; - } else if (sqnum > r->sqnum) { - p = &(*p)->rb_right; - continue; - } - ubifs_err("duplicate sqnum in replay"); - return -EINVAL; - } - r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); if (!r) return -ENOMEM; @@ -384,13 +381,12 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, r->old_size = old_size; r->new_size = new_size; - rb_link_node(&r->rb, parent, p); - rb_insert_color(&r->rb, &c->replay_tree); + list_add_tail(&r->list, &c->replay_list); return 0; } /** - * insert_dent - insert a directory entry node into the replay tree. + * insert_dent - insert a directory entry node into the replay list. * @c: UBIFS file-system description object * @lnum: node logical eraseblock number * @offs: node offset @@ -402,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, * @deletion: non-zero if this is a deletion * @used: number of bytes in use in a LEB * - * This function inserts a scanned directory entry node to the replay tree. - * Returns zero in case of success and a negative error code in case of - * failure. - * - * This function is also used for extended attribute entries because they are - * implemented as directory entry nodes. + * This function inserts a scanned directory entry node or an extended + * attribute entry to the replay list. Returns zero in case of success and a + * negative error code in case of failure. */ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, union ubifs_key *key, const char *name, int nlen, unsigned long long sqnum, int deletion, int *used) { - struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; struct replay_entry *r; char *nbuf; + dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); if (key_inum(c, key) >= c->highest_inum) c->highest_inum = key_inum(c, key); - dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); - while (*p) { - parent = *p; - r = rb_entry(parent, struct replay_entry, rb); - if (sqnum < r->sqnum) { - p = &(*p)->rb_left; - continue; - } - if (sqnum > r->sqnum) { - p = &(*p)->rb_right; - continue; - } - ubifs_err("duplicate sqnum in replay"); - return -EINVAL; - } - r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); if (!r) return -ENOMEM; + nbuf = kmalloc(nlen + 1, GFP_KERNEL); if (!nbuf) { kfree(r); @@ -458,9 +436,7 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, nbuf[nlen] = '\0'; r->nm.name = nbuf; - ubifs_assert(!*p); - rb_link_node(&r->rb, parent, p); - rb_insert_color(&r->rb, &c->replay_tree); + list_add_tail(&r->list, &c->replay_list); return 0; } @@ -1017,7 +993,7 @@ int ubifs_replay_journal(struct ubifs_info *c) if (err) goto out; - err = apply_replay_tree(c); + err = apply_replay_list(c); if (err) goto out; @@ -1039,7 +1015,7 @@ int ubifs_replay_journal(struct ubifs_info *c) "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, (unsigned long)c->highest_inum); out: - destroy_replay_tree(c); + destroy_replay_list(c); destroy_bud_list(c); c->replaying = 0; return err; diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 26a7ebe..a2f9d4e 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1205,7 +1205,6 @@ struct ubifs_debug_info; * @replaying: %1 during journal replay * @mounting: %1 while mounting * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode - * @replay_tree: temporary tree used during journal replay * @replay_list: temporary list used during journal replay * @replay_buds: list of buds to replay * @cs_sqnum: sequence number of first node in the log (commit start node) @@ -1435,7 +1434,6 @@ struct ubifs_info { unsigned int replaying:1; unsigned int mounting:1; unsigned int remounting_rw:1; - struct rb_root replay_tree; struct list_head replay_list; struct list_head replay_buds; unsigned long long cs_sqnum; -- cgit v0.10.2 From e76a452640dc110147f7a7da1dcfb1c5026f982d Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 15 May 2011 12:34:29 +0300 Subject: UBIFS: change bud replay function conventions This is a minor preparation patch which changes 'replay_bud()' interface - instead of passing bud lnum, offs, jhead, etc directly, pass a pointer to the bud entry which contains all the information. The bud entry will be also needed in one of the following patches. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 47915a7..4378baa 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -475,25 +475,22 @@ int ubifs_validate_entry(struct ubifs_info *c, /** * replay_bud - replay a bud logical eraseblock. * @c: UBIFS file-system description object - * @lnum: bud logical eraseblock number to replay - * @offs: bud start offset - * @jhead: journal head to which this bud belongs - * @free: amount of free space in the bud is returned here - * @dirty: amount of dirty space from padding and deletion nodes is returned - * here + * @b: bud entry which describes the bud * - * This function returns zero in case of success and a negative error code in - * case of failure. + * This function replays bud @bud, recovers it if needed, and adds all nodes + * from this bud to the replay list. Returns zero in case of success and a + * negative error code in case of failure. */ -static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, - int *free, int *dirty) +static int replay_bud(struct ubifs_info *c, struct bud_entry *b) { - int err = 0, used = 0; + int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; + int jhead = b->bud->jhead; struct ubifs_scan_leb *sleb; struct ubifs_scan_node *snod; struct ubifs_bud *bud; dbg_mnt("replay bud LEB %d, head %d, offs %d", lnum, jhead, offs); + if (c->need_recovery) sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); else @@ -618,9 +615,9 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, ubifs_assert(sleb->endpt - offs >= used); ubifs_assert(sleb->endpt % c->min_io_size == 0); - *dirty = sleb->endpt - offs - used; - *free = c->leb_size - sleb->endpt; - dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, *dirty, *free); + b->dirty = sleb->endpt - offs - used; + b->free = c->leb_size - sleb->endpt; + dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free); out: ubifs_scan_destroy(sleb); @@ -647,8 +644,7 @@ static int replay_buds(struct ubifs_info *c) unsigned long long prev_sqnum = 0; list_for_each_entry(b, &c->replay_buds, list) { - err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, - &b->free, &b->dirty); + err = replay_bud(c, b); if (err) return err; -- cgit v0.10.2 From c49139d8096dc1c392455dbc3f158b46038fc9d4 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 15 May 2011 12:40:46 +0300 Subject: UBIFS: remove BUG statement Remove a 'BUG()' statement when we are unable to find a bud and add a similar 'ubifs_assert()' statement instead. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 4378baa..0f50fbf 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -487,7 +487,6 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b) int jhead = b->bud->jhead; struct ubifs_scan_leb *sleb; struct ubifs_scan_node *snod; - struct ubifs_bud *bud; dbg_mnt("replay bud LEB %d, head %d, offs %d", lnum, jhead, offs); @@ -608,10 +607,7 @@ static int replay_bud(struct ubifs_info *c, struct bud_entry *b) goto out; } - bud = ubifs_search_bud(c, lnum); - if (!bud) - BUG(); - + ubifs_assert(ubifs_search_bud(c, lnum)); ubifs_assert(sleb->endpt - offs >= used); ubifs_assert(sleb->endpt % c->min_io_size == 0); -- cgit v0.10.2 From cb14a18465686ea6add51b1008865b8174c28bd7 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 15 May 2011 14:51:54 +0300 Subject: UBIFS: synchronize write-buffer before switching to the next bud Currently when UBIFS fills up the current bud (which is the last in the journal head) and switches to the next bud, it first writes the log reference node for the next bud and only after this synchronizes the write-buffer of the previous bud. This is not a big deal, but an unclean power cut may lead to a situation when we have corruption in a next-to-last bud, although it is much more logical that we have to have corruption only in the last bud. This patch also removes write-buffer synchronization from 'ubifs_wbuf_seek_nolock()' because this is not needed anymore (we synchronize the write-buffer explicitly everywhere now) and also because this is just prone to various errors. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index d70937b..ded29f6 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -100,6 +100,10 @@ static int switch_gc_head(struct ubifs_info *c) if (err) return err; + err = ubifs_wbuf_sync_nolock(wbuf); + if (err) + return err; + err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); if (err) return err; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 67bbde3..166951e 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -452,8 +452,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) * @dtype: data type * * This function targets the write-buffer to logical eraseblock @lnum:@offs. - * The write-buffer is synchronized if it is not empty. Returns zero in case of - * success and a negative error code in case of failure. + * The write-buffer has to be empty. Returns zero in case of success and a + * negative error code in case of failure. */ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, int dtype) @@ -465,13 +465,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, ubifs_assert(offs >= 0 && offs <= c->leb_size); ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); ubifs_assert(lnum != wbuf->lnum); - - if (wbuf->used > 0) { - int err = ubifs_wbuf_sync_nolock(wbuf); - - if (err) - return err; - } + ubifs_assert(wbuf->used == 0); spin_lock(&wbuf->lock); wbuf->lnum = lnum; diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index ce55a48..34b1679 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -141,14 +141,8 @@ again: * LEB with some empty space. */ lnum = ubifs_find_free_space(c, len, &offs, squeeze); - if (lnum >= 0) { - /* Found an LEB, add it to the journal head */ - err = ubifs_add_bud_to_log(c, jhead, lnum, offs); - if (err) - goto out_return; - /* A new bud was successfully allocated and added to the log */ + if (lnum >= 0) goto out; - } err = lnum; if (err != -ENOSPC) @@ -203,12 +197,23 @@ again: return 0; } - err = ubifs_add_bud_to_log(c, jhead, lnum, 0); - if (err) - goto out_return; offs = 0; out: + /* + * Make sure we synchronize the write-buffer before we add the new bud + * to the log. Otherwise we may have a power cut after the log + * reference node for the last bud (@lnum) is written but before the + * write-buffer data are written to the next-to-last bud + * (@wbuf->lnum). And the effect would be that the recovery would see + * that there is corruption in the next-to-last bud. + */ + err = ubifs_wbuf_sync_nolock(wbuf); + if (err) + goto out_return; + err = ubifs_add_bud_to_log(c, jhead, lnum, offs); + if (err) + goto out_return; err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); if (err) goto out_unlock; -- cgit v0.10.2 From 91c66083fca36cdf496e927ef8bea19e6b1bbdce Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Sun, 15 May 2011 13:11:00 +0300 Subject: UBIFS: expect corruption only in last journal head LEBs This patch improves UBIFS recovery and teaches it to expect corruption only in the last buds. Indeed, currently we just recover all buds, which is incorrect because only the last buds can have corruptions in case of a power cut. So it is inconsistent with the rest of the recovery strategy which tries hard to distinguish between corruptions cause by power cuts and other types of corruptions. This patch also adds one quirk - a bit older UBIFS was could have corruption in the next to last bud because of the way it switched buds: when bud A is full, it first searched for the next bud B, the wrote a reference node to the log about B, and then synchronized the write-buffer of A. So we could end up with buds A and B, where B is the last, but A had corruption. The UBIFS behavior was fixed, though, so currently it always first synchronizes A's write-buffer and only after this adds B to the log. However, to be make sure that we handle unclean (after a power cut) UBIFS images belonging to older UBIFS - we need to add a quirk and keep it for some time: we need to check for the situation described above. Thankfully, it is easy to check for that situation. When UBIFS adds B to the log, it always first unmaps B, then maps it, and then syncs A's write-buffer. Thus, in that situation we can check that B is empty, in which case it is OK to have corruption in A. To check that B is empty it is enough to just read the first few bytes of the bud and compare them with 0xFFs. This quirk may be removed in a couple of years. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 0f50fbf..6617280 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -473,6 +473,65 @@ int ubifs_validate_entry(struct ubifs_info *c, } /** + * is_last_bud - check if the bud is the last in the journal head. + * @c: UBIFS file-system description object + * @bud: bud description object + * + * This function checks if bud @bud is the last bud in its journal head. This + * information is then used by 'replay_bud()' to decide whether the bud can + * have corruptions or not. Indeed, only last buds can be corrupted by power + * cuts. Returns %1 if this is the last bud, and %0 if not. + */ +static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) +{ + struct ubifs_jhead *jh = &c->jheads[bud->jhead]; + struct ubifs_bud *next; + uint32_t data; + int err; + + if (list_is_last(&bud->list, &jh->buds_list)) + return 1; + + /* + * The following is a quirk to make sure we work correctly with UBIFS + * images used with older UBIFS. + * + * Normally, the last bud will be the last in the journal head's list + * of bud. However, there is one exception if the UBIFS image belongs + * to older UBIFS. This is fairly unlikely: one would need to use old + * UBIFS, then have a power cut exactly at the right point, and then + * try to mount this image with new UBIFS. + * + * The exception is: it is possible to have 2 buds A and B, A goes + * before B, and B is the last, bud B is contains no data, and bud A is + * corrupted at the end. The reason is that in older versions when the + * journal code switched the next bud (from A to B), it first added a + * log reference node for the new bud (B), and only after this it + * synchronized the write-buffer of current bud (A). But later this was + * changed and UBIFS started to always synchronize the write-buffer of + * the bud (A) before writing the log reference for the new bud (B). + * + * But because older UBIFS always synchronized A's write-buffer before + * writing to B, we can recognize this exceptional situation but + * checking the contents of bud B - if it is empty, then A can be + * treated as the last and we can recover it. + * + * TODO: remove this piece of code in a couple of years (today it is + * 16.05.2011). + */ + next = list_entry(bud->list.next, struct ubifs_bud, list); + if (!list_is_last(&next->list, &jh->buds_list)) + return 0; + + err = ubi_read(c->ubi, next->lnum, (char *)&data, + next->start, 4); + if (err) + return 0; + + return data == 0xFFFFFFFF; +} + +/** * replay_bud - replay a bud logical eraseblock. * @c: UBIFS file-system description object * @b: bud entry which describes the bud @@ -483,15 +542,23 @@ int ubifs_validate_entry(struct ubifs_info *c, */ static int replay_bud(struct ubifs_info *c, struct bud_entry *b) { + int is_last = is_last_bud(c, b->bud); int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; - int jhead = b->bud->jhead; struct ubifs_scan_leb *sleb; struct ubifs_scan_node *snod; - dbg_mnt("replay bud LEB %d, head %d, offs %d", lnum, jhead, offs); + dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", + lnum, b->bud->jhead, offs, is_last); - if (c->need_recovery) - sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); + if (c->need_recovery && is_last) + /* + * Recover only last LEBs in the journal heads, because power + * cuts may cause corruptions only in these LEBs, because only + * these LEBs could possibly be written to at the power cut + * time. + */ + sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, + b->bud->jhead != GCHD); else sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); if (IS_ERR(sleb)) -- cgit v0.10.2 From e11602ea3e43392904db7a579dc990062ebb7151 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Fri, 6 May 2011 17:52:32 +0300 Subject: UBIFS: share the next_log_lnum helper We'll need to use the 'next_log_lnum()' helper function from log.c in the fixup code, so let's move it to misc.h. IOW, this is a preparation to the following free space fixup changes. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 40fa780..affea94 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum) } /** - * next_log_lnum - switch to the next log LEB. - * @c: UBIFS file-system description object - * @lnum: current log LEB - */ -static inline int next_log_lnum(const struct ubifs_info *c, int lnum) -{ - lnum += 1; - if (lnum > c->log_last) - lnum = UBIFS_LOG_LNUM; - - return lnum; -} - -/** * empty_log_bytes - calculate amount of empty space in the log. * @c: UBIFS file-system description object */ @@ -257,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) ref->jhead = cpu_to_le32(jhead); if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { - c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); c->lhead_offs = 0; } @@ -425,7 +411,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) /* Switch to the next log LEB */ if (c->lhead_offs) { - c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); c->lhead_offs = 0; } @@ -446,7 +432,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) c->lhead_offs += len; if (c->lhead_offs == c->leb_size) { - c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); + c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); c->lhead_offs = 0; } @@ -533,7 +519,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum) } mutex_lock(&c->log_mutex); for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; - lnum = next_log_lnum(c, lnum)) { + lnum = ubifs_next_log_lnum(c, lnum)) { dbg_log("unmap log LEB %d", lnum); err = ubifs_leb_unmap(c, lnum); if (err) @@ -642,7 +628,7 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs, err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); if (err) return err; - *lnum = next_log_lnum(c, *lnum); + *lnum = ubifs_next_log_lnum(c, *lnum); *offs = 0; } memcpy(buf + *offs, node, len); @@ -712,7 +698,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) ubifs_scan_destroy(sleb); if (lnum == c->lhead_lnum) break; - lnum = next_log_lnum(c, lnum); + lnum = ubifs_next_log_lnum(c, lnum); } if (offs) { int sz = ALIGN(offs, c->min_io_size); @@ -732,7 +718,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) /* Unmap remaining LEBs */ lnum = write_lnum; do { - lnum = next_log_lnum(c, lnum); + lnum = ubifs_next_log_lnum(c, lnum); err = ubifs_leb_unmap(c, lnum); if (err) return err; diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index c3de04d..0b5296a 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -340,4 +340,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c) mutex_unlock(&c->lp_mutex); } +/** + * ubifs_next_log_lnum - switch to the next log LEB. + * @c: UBIFS file-system description object + * @lnum: current log LEB + * + * This helper function returns the log LEB number which goes next after LEB + * 'lnum'. + */ +static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum) +{ + lnum += 1; + if (lnum > c->log_last) + lnum = UBIFS_LOG_LNUM; + + return lnum; +} + #endif /* __UBIFS_MISC_H__ */ -- cgit v0.10.2 From 9f58d3503a1368673609db1962e4a584261b62eb Mon Sep 17 00:00:00 2001 From: "Matthew L. Creech" Date: Thu, 5 May 2011 16:33:20 -0400 Subject: UBIFS: add a superblock flag for free space fix-up The 'space_fixup' flag can be set in the superblock of a new filesystem by mkfs.ubifs to indicate that any eraseblocks with free space remaining should be fixed-up the first time it's mounted (after which the flag is un-set). This means that the UBIFS image has been flashed by a "dumb" flasher and the free space has been actually programmed (writing all 0xFFs), so this free space cannot be used. UBIFS fixes the free space up by re-writing the contents of all LEBs with free space using the atomic LEB change UBI operation. Artem: improved commit message, add some more commentaries to the code. Signed-off-by: Matthew L. Creech Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index b5ba2ea..f46d77e 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -316,6 +316,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) printk(KERN_DEBUG "\tflags %#x\n", sup_flags); printk(KERN_DEBUG "\t big_lpt %u\n", !!(sup_flags & UBIFS_FLG_BIGLPT)); + printk(KERN_DEBUG "\t space_fixup %u\n", + !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); printk(KERN_DEBUG "\tmin_io_size %u\n", le32_to_cpu(sup->min_io_size)); printk(KERN_DEBUG "\tleb_size %u\n", diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index cad60b5..93d6928 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -617,6 +617,7 @@ int ubifs_read_superblock(struct ubifs_info *c) c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); memcpy(&c->uuid, &sup->uuid, 16); c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); + c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP); /* Automatically increase file system size to the maximum size */ c->old_leb_cnt = c->leb_cnt; diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index b922f03..e24380c 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -408,9 +408,11 @@ enum { * Superblock flags. * * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set + * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed */ enum { UBIFS_FLG_BIGLPT = 0x02, + UBIFS_FLG_SPACE_FIXUP = 0x04, }; /** diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index a2f9d4e..8e27553 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1014,6 +1014,7 @@ struct ubifs_debug_info; * @cmt_wq: wait queue to sleep on if the log is full and a commit is running * * @big_lpt: flag that LPT is too big to write whole during commit + * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up * @no_chk_data_crc: do not check CRCs when reading data nodes (except during * recovery) * @bulk_read: enable bulk-reads @@ -1253,6 +1254,7 @@ struct ubifs_info { wait_queue_head_t cmt_wq; unsigned int big_lpt:1; + unsigned int space_fixup:1; unsigned int no_chk_data_crc:1; unsigned int bulk_read:1; unsigned int default_compr:2; -- cgit v0.10.2 From 6554a6578131a217d4ea6d779a62f120081a2e8b Mon Sep 17 00:00:00 2001 From: "Matthew L. Creech" Date: Fri, 6 May 2011 18:58:22 -0400 Subject: UBIFS: add the fixup function This patch adds the 'ubifs_fixup_free_space()' function which scans all LEBs in the filesystem for those that are in-use but have one or more empty pages, then re-maps the LEBs in order to erase the empty portions. Afterward it removes the "space_fixup" flag from the UBIFS superblock. Artem: massaged the patch Signed-off-by: Matthew L. Creech Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 93d6928..c606f01 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -652,3 +652,152 @@ out: kfree(sup); return err; } + +/** + * fixup_leb - fixup/unmap an LEB containing free space. + * @c: UBIFS file-system description object + * @lnum: the LEB number to fix up + * @len: number of used bytes in LEB (starting at offset 0) + * + * This function reads the contents of the given LEB number @lnum, then fixes + * it up, so that empty min. I/O units in the end of LEB are actually erased on + * flash (rather than being just all-0xff real data). If the LEB is completely + * empty, it is simply unmapped. + */ +static int fixup_leb(struct ubifs_info *c, int lnum, int len) +{ + int err; + + ubifs_assert(len >= 0); + ubifs_assert(len % c->min_io_size == 0); + ubifs_assert(len < c->leb_size); + + if (len == 0) { + dbg_mnt("unmap empty LEB %d", lnum); + return ubi_leb_unmap(c->ubi, lnum); + } + + dbg_mnt("fixup LEB %d, data len %d", lnum, len); + err = ubi_read(c->ubi, lnum, c->sbuf, 0, len); + if (err) + return err; + + return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); +} + +/** + * fixup_free_space - find & remap all LEBs containing free space. + * @c: UBIFS file-system description object + * + * This function walks through all LEBs in the filesystem and fiexes up those + * containing free/empty space. + */ +static int fixup_free_space(struct ubifs_info *c) +{ + int lnum, err = 0; + struct ubifs_lprops *lprops; + + ubifs_get_lprops(c); + + /* Fixup LEBs in the master area */ + for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) { + err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz); + if (err) + goto out; + } + + /* Unmap unused log LEBs */ + lnum = ubifs_next_log_lnum(c, c->lhead_lnum); + while (lnum != c->ltail_lnum) { + err = fixup_leb(c, lnum, 0); + if (err) + goto out; + lnum = ubifs_next_log_lnum(c, lnum); + } + + /* Fixup the current log head */ + err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); + if (err) + goto out; + + /* Fixup LEBs in the LPT area */ + for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { + int free = c->ltab[lnum - c->lpt_first].free; + + if (free > 0) { + err = fixup_leb(c, lnum, c->leb_size - free); + if (err) + goto out; + } + } + + /* Unmap LEBs in the orphans area */ + for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { + err = fixup_leb(c, lnum, 0); + if (err) + goto out; + } + + /* Fixup LEBs in the main area */ + for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { + lprops = ubifs_lpt_lookup(c, lnum); + if (IS_ERR(lprops)) { + err = PTR_ERR(lprops); + goto out; + } + + if (lprops->free > 0) { + err = fixup_leb(c, lnum, c->leb_size - lprops->free); + if (err) + goto out; + } + } + +out: + ubifs_release_lprops(c); + return err; +} + +/** + * ubifs_fixup_free_space - find & fix all LEBs with free space. + * @c: UBIFS file-system description object + * + * This function fixes up LEBs containing free space on first mount, if the + * appropriate flag was set when the FS was created. Each LEB with one or more + * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure + * the free space is actually erased. E.g., this is necessary for some NAND + * chips, since the free space may have been programmed like real "0xff" data + * (generating a non-0xff ECC), causing future writes to the not-really-erased + * NAND pages to behave badly. After the space is fixed up, the superblock flag + * is cleared, so that this is skipped for all future mounts. + */ +int ubifs_fixup_free_space(struct ubifs_info *c) +{ + int err; + struct ubifs_sb_node *sup; + + ubifs_assert(c->space_fixup); + ubifs_assert(!c->ro_mount); + + ubifs_msg("start fixing up free space"); + + err = fixup_free_space(c); + if (err) + return err; + + sup = ubifs_read_sb_node(c); + if (IS_ERR(sup)) + return PTR_ERR(sup); + + /* Free-space fixup is no longer required */ + c->space_fixup = 0; + sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP); + + err = ubifs_write_sb_node(c, sup); + kfree(sup); + if (err) + return err; + + ubifs_msg("free space fixup complete"); + return err; +} diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 8e27553..93d1412 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1633,6 +1633,7 @@ int ubifs_write_master(struct ubifs_info *c); int ubifs_read_superblock(struct ubifs_info *c); struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); +int ubifs_fixup_free_space(struct ubifs_info *c); /* replay.c */ int ubifs_validate_entry(struct ubifs_info *c, -- cgit v0.10.2 From 9d510db423303b4f3555074dd7bdd0d692e432a4 Mon Sep 17 00:00:00 2001 From: "Matthew L. Creech" Date: Fri, 6 May 2011 18:58:23 -0400 Subject: UBIFS: fix-up free space on mount if flag is set If a UBIFS filesystem is being mounted read-write, or is being remounted from read-only to read-write, check for the "space_fixup" flag and fix all LEBs containing empty space if necessary. Artem: tweaked the patch a bit Signed-off-by: Matthew L. Creech Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 575ea83..6db0bdaa 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1396,6 +1396,12 @@ static int mount_ubifs(struct ubifs_info *c) } else ubifs_assert(c->lst.taken_empty_lebs > 0); + if (!c->ro_mount && c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + goto out_infos; + } + err = dbg_check_filesystem(c); if (err) goto out_infos; @@ -1686,6 +1692,13 @@ static int ubifs_remount_rw(struct ubifs_info *c) */ err = dbg_check_space_info(c); } + + if (c->space_fixup) { + err = ubifs_fixup_free_space(c); + if (err) + goto out; + } + mutex_unlock(&c->umount_mutex); return err; -- cgit v0.10.2 From 617992069513c1e789c707c4d75ff03bf7dd0fb0 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 16 May 2011 13:41:55 +0300 Subject: UBIFS: clean up LEB recovery function This patch cleans up 'ubifs_recover_leb()' function and makes it more readable. Move things which are done only once out of the loop and kill unneeded 'switch' statement. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 42b4512..7d92203 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -609,8 +609,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf, int grouped) { - int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; - int empty_chkd = 0, start = offs; + int ret = 0, err, len = c->leb_size - offs, need_clean = 0; + int start = offs; struct ubifs_scan_leb *sleb; void *buf = sbuf + offs; @@ -624,8 +624,6 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, need_clean = 1; while (len >= 8) { - int ret; - dbg_scan("look at LEB %d:%d (%d bytes left)", lnum, offs, len); @@ -635,8 +633,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, * Scan quietly until there is an error from which we cannot * recover */ - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); - + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); if (ret == SCANNED_A_NODE) { /* A valid node, and not a padding node */ struct ubifs_ch *ch = buf; @@ -649,66 +646,37 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, offs += node_len; buf += node_len; len -= node_len; - continue; - } - - if (ret > 0) { + } else if (ret > 0) { /* Padding bytes or a valid padding node */ offs += ret; buf += ret; len -= ret; - continue; - } - - if (ret == SCANNED_EMPTY_SPACE) { - if (!is_empty(buf, len)) { - if (!is_last_write(c, buf, offs)) - break; - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - } - empty_chkd = 1; + } else if (ret == SCANNED_EMPTY_SPACE || + ret == SCANNED_GARBAGE || + ret == SCANNED_A_BAD_PAD_NODE || + ret == SCANNED_A_CORRUPT_NODE) { + dbg_rcvry("found corruption - %d", ret); break; - } - - if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) - if (is_last_write(c, buf, offs)) { - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - empty_chkd = 1; - break; - } - - if (ret == SCANNED_A_CORRUPT_NODE) - if (no_more_nodes(c, buf, len, lnum, offs)) { - clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - empty_chkd = 1; - break; - } - - if (quiet) { - /* Redo the last scan but noisily */ - quiet = 0; - continue; - } - - switch (ret) { - case SCANNED_GARBAGE: - dbg_err("garbage"); - goto corrupted; - case SCANNED_A_CORRUPT_NODE: - case SCANNED_A_BAD_PAD_NODE: - dbg_err("bad node"); - goto corrupted; - default: - dbg_err("unknown"); + } else { + dbg_err("unexpected return value %d", ret); err = -EINVAL; goto error; } } - if (!empty_chkd && !is_empty(buf, len)) { + if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) { + if (is_last_write(c, buf, offs)) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } else + goto corrupted_rescan; + } else if (ret == SCANNED_A_CORRUPT_NODE) { + if (no_more_nodes(c, buf, len, lnum, offs)) { + clean_buf(c, &buf, lnum, &offs, &len); + need_clean = 1; + } else + goto corrupted_rescan; + } else if (!is_empty(buf, len)) { if (is_last_write(c, buf, offs)) { clean_buf(c, &buf, lnum, &offs, &len); need_clean = 1; @@ -751,6 +719,10 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, return sleb; +corrupted_rescan: + /* Re-scan the corrupted data with verbose messages */ + dbg_err("corruptio %d", ret); + ubifs_scan_a_node(c, buf, len, lnum, offs, 1); corrupted: ubifs_scanned_corruption(c, lnum, offs, buf); err = -EUCLEAN; -- cgit v0.10.2 From 7c47bfd0dbb20e5d7fa4e37cfd76bb73d39b32b4 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 16 May 2011 13:44:48 +0300 Subject: UBIFS: always cleanup the recovered LEB Now when we call 'ubifs_recover_leb()' only for LEBs which are potentially corrupted (i.e., only for last buds, not for all of them), we can cleanup every LEB, not only those where we find corruption. The reason - unstable bits. Even though the LEB may look good now, it might contain unstable bits which may hit us a bit later. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 7d92203..4d10b6e 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -609,7 +609,7 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf, int grouped) { - int ret = 0, err, len = c->leb_size - offs, need_clean = 0; + int ret = 0, err, len = c->leb_size - offs; int start = offs; struct ubifs_scan_leb *sleb; void *buf = sbuf + offs; @@ -620,9 +620,6 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, if (IS_ERR(sleb)) return sleb; - if (sleb->ecc) - need_clean = 1; - while (len >= 8) { dbg_scan("look at LEB %d:%d (%d bytes left)", lnum, offs, len); @@ -665,21 +662,18 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, } if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) { - if (is_last_write(c, buf, offs)) { + if (is_last_write(c, buf, offs)) clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - } else + else goto corrupted_rescan; } else if (ret == SCANNED_A_CORRUPT_NODE) { - if (no_more_nodes(c, buf, len, lnum, offs)) { + if (no_more_nodes(c, buf, len, lnum, offs)) clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - } else + else goto corrupted_rescan; } else if (!is_empty(buf, len)) { if (is_last_write(c, buf, offs)) { clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; } else { int corruption = first_non_ff(buf, len); @@ -701,21 +695,16 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, buf = sbuf + offs; len = c->leb_size - offs; clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; } - if (offs % c->min_io_size) { + if (offs % c->min_io_size) clean_buf(c, &buf, lnum, &offs, &len); - need_clean = 1; - } ubifs_end_scan(c, sleb, lnum, offs); - if (need_clean) { - err = fix_unclean_leb(c, sleb, start); - if (err) - goto error; - } + err = fix_unclean_leb(c, sleb, start); + if (err) + goto error; return sleb; -- cgit v0.10.2 From 43e07073865ae540e3b463f437f0f837f17714ba Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 16 May 2011 14:21:51 +0300 Subject: UBIFS: simplify LEB recovery function further Further simplify 'ubifs_recover_leb()' by noticing that we have to call 'clean_buf()' in any case, and it is fine to call it if the offset is aligned to 'c->min_io_size'. Thus, we do not have to call it separately from every "if" - just call it once at the end. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 4d10b6e..74281f1 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -662,19 +662,13 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, } if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) { - if (is_last_write(c, buf, offs)) - clean_buf(c, &buf, lnum, &offs, &len); - else + if (!is_last_write(c, buf, offs)) goto corrupted_rescan; } else if (ret == SCANNED_A_CORRUPT_NODE) { - if (no_more_nodes(c, buf, len, lnum, offs)) - clean_buf(c, &buf, lnum, &offs, &len); - else + if (!no_more_nodes(c, buf, len, lnum, offs)) goto corrupted_rescan; } else if (!is_empty(buf, len)) { - if (is_last_write(c, buf, offs)) { - clean_buf(c, &buf, lnum, &offs, &len); - } else { + if (!is_last_write(c, buf, offs)) { int corruption = first_non_ff(buf, len); /* @@ -694,12 +688,9 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, if (grouped && drop_incomplete_group(sleb, &offs)) { buf = sbuf + offs; len = c->leb_size - offs; - clean_buf(c, &buf, lnum, &offs, &len); } - if (offs % c->min_io_size) - clean_buf(c, &buf, lnum, &offs, &len); - + clean_buf(c, &buf, lnum, &offs, &len); ubifs_end_scan(c, sleb, lnum, offs); err = fix_unclean_leb(c, sleb, start); -- cgit v0.10.2 From bbf2b37a98d22d5b111f03674dd4f093dd6c0ae5 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Mon, 16 May 2011 15:15:52 +0300 Subject: UBIFS: fix extremely rare mount failure This patch fixes an extremely rare mount failure after a power cut, when mount fails with ENOSPC error because UBIFS could not find the GC LEB. In short, the reason for this failure is that after recovery the GC head LEB contains less free space than it had contained just before the power cut happened. As a result, if the FS is full, 'ubifs_rcvry_gc_commit()' is unable to find a dirty LEB to GC and a free LEB, so mount fails. This patch contains a huge comment with more detailed explanation, please refer that comment. Since this is really really rare and unlikely situation, I do not send this patch to the stable tree, also because it requires a lot of preparation patches which I did before. So sending this to -stable would be too risky. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 74281f1..731d9e2 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -564,13 +564,16 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, } /** - * drop_incomplete_group - drop nodes from an incomplete group. + * drop_last_node - drop the last node or group of nodes. * @sleb: scanned LEB information * @offs: offset of dropped nodes is returned here + * @grouped: non-zero if whole group of nodes have to be dropped * - * This function returns %1 if nodes are dropped and %0 otherwise. + * This is a helper function for 'ubifs_recover_leb()' which drops the last + * node of the scanned LEB or the last group of nodes if @grouped is not zero. + * This function returns %1 if a node was dropped and %0 otherwise. */ -static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) +static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) { int dropped = 0; @@ -589,6 +592,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) kfree(snod); sleb->nodes_cnt -= 1; dropped = 1; + if (!grouped) + break; } return dropped; } @@ -609,8 +614,7 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf, int grouped) { - int ret = 0, err, len = c->leb_size - offs; - int start = offs; + int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; struct ubifs_scan_leb *sleb; void *buf = sbuf + offs; @@ -620,6 +624,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, if (IS_ERR(sleb)) return sleb; + ubifs_assert(len >= 8); while (len >= 8) { dbg_scan("look at LEB %d:%d (%d bytes left)", lnum, offs, len); @@ -684,11 +689,68 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, } } - /* Drop nodes from incomplete group */ - if (grouped && drop_incomplete_group(sleb, &offs)) { - buf = sbuf + offs; - len = c->leb_size - offs; - } + min_io_unit = round_down(offs, c->min_io_size); + if (grouped) + /* + * If nodes are grouped, always drop the incomplete group at + * the end. + */ + drop_last_node(sleb, &offs, 1); + + /* + * While we are in the middle of the same min. I/O unit keep dropping + * nodes. So basically, what we want is to make sure that the last min. + * I/O unit where we saw the corruption is dropped completely with all + * the uncorrupted node which may possibly sit there. + * + * In other words, let's name the min. I/O unit where the corruption + * starts B, and the previous min. I/O unit A. The below code tries to + * deal with a situation when half of B contains valid nodes or the end + * of a valid node, and the second half of B contains corrupted data or + * garbage. This means that UBIFS had been writing to B just before the + * power cut happened. I do not know how realistic is this scenario + * that half of the min. I/O unit had been written successfully and the + * other half not, but this is possible in our 'failure mode emulation' + * infrastructure at least. + * + * So what is the problem, why we need to drop those nodes? Whey can't + * we just clean-up the second half of B by putting a padding node + * there? We can, and this works fine with one exception which was + * reproduced with power cut emulation testing and happens extremely + * rarely. The description follows, but it is worth noting that that is + * only about the GC head, so we could do this trick only if the bud + * belongs to the GC head, but it does not seem to be worth an + * additional "if" statement. + * + * So, imagine the file-system is full, we run GC which is moving valid + * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head + * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X + * and will try to continue. Imagine that LEB X is currently the + * dirtiest LEB, and the amount of used space in LEB Y is exactly the + * same as amount of free space in LEB X. + * + * And a power cut happens when nodes are moved from LEB X to LEB Y. We + * are here trying to recover LEB Y which is the GC head LEB. We find + * the min. I/O unit B as described above. Then we clean-up LEB Y by + * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function + * fails, because it cannot find a dirty LEB which could be GC'd into + * LEB Y! Even LEB X does not match because the amount of valid nodes + * there does not fit the free space in LEB Y any more! And this is + * because of the padding node which we added to LEB Y. The + * user-visible effect of this which I once observed and analysed is + * that we cannot mount the file-system with -ENOSPC error. + * + * So obviously, to make sure that situation does not happen we should + * free min. I/O unit B in LEB Y completely and the last used min. I/O + * unit in LEB Y should be A. This is basically what the below code + * tries to do. + */ + while (min_io_unit == round_down(offs, c->min_io_size) && + min_io_unit != offs && + drop_last_node(sleb, &offs, grouped)); + + buf = sbuf + offs; + len = c->leb_size - offs; clean_buf(c, &buf, lnum, &offs, &len); ubifs_end_scan(c, sleb, lnum, offs); -- cgit v0.10.2 From bdc1a1b6100c78a6002b1761ebe36d5fe8f8585b Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 17 May 2011 14:07:24 +0300 Subject: UBIFS: fix kernel-doc comments This is a minor fix for UBIFS kernel-doc comments - we forgot the "@" symbol for several 'struct ubifs_debug_info'. Signed-off-by: Artem Bityutskiy diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index f3e235f..7538bf2 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -56,11 +56,11 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, * @saved_free: saved amount of free space * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt * - * dfs_dir_name: name of debugfs directory containing this file-system's files - * dfs_dir: direntry object of the file-system debugfs directory - * dfs_dump_lprops: "dump lprops" debugfs knob - * dfs_dump_budg: "dump budgeting information" debugfs knob - * dfs_dump_tnc: "dump TNC" debugfs knob + * @dfs_dir_name: name of debugfs directory containing this file-system's files + * @dfs_dir: direntry object of the file-system debugfs directory + * @dfs_dump_lprops: "dump lprops" debugfs knob + * @dfs_dump_budg: "dump budgeting information" debugfs knob + * @dfs_dump_tnc: "dump TNC" debugfs knob */ struct ubifs_debug_info { struct ubifs_zbranch old_zroot; -- cgit v0.10.2 From 56e46742e846e4de167dde0e1e1071ace1c882a5 Mon Sep 17 00:00:00 2001 From: Artem Bityutskiy Date: Tue, 17 May 2011 15:15:30 +0300 Subject: UBIFS: switch to dynamic printks Switch to debugging using dynamic printk (pr_debug()). There is no good reason to carry custom debugging prints if there is so cool and powerful generic dynamic printk infrastructure, see Documentation/dynamic-debug-howto.txt. With dynamic printks we can switch on/of individual prints, per-file, per-function and per format messages. This means that instead of doing old-fashioned echo 1 > /sys/module/ubifs/parameters/debug_msgs to enable general messages, we can do: echo 'format "UBIFS DBG gen" +ptlf' > control to enable general messages and additionally ask the dynamic printk infrastructure to print process ID, line number and function name. So there is no reason to keep UBIFS-specific crud if there is more powerful generic thing. Signed-off-by: Artem Bityutskiy diff --git a/Documentation/filesystems/ubifs.txt b/Documentation/filesystems/ubifs.txt index 7d17e5b..8e4fab6 100644 --- a/Documentation/filesystems/ubifs.txt +++ b/Documentation/filesystems/ubifs.txt @@ -115,28 +115,8 @@ ubi.mtd=0 root=ubi0:rootfs rootfstype=ubifs Module Parameters for Debugging =============================== -When UBIFS has been compiled with debugging enabled, there are 3 module +When UBIFS has been compiled with debugging enabled, there are 2 module parameters that are available to control aspects of testing and debugging. -The parameters are unsigned integers where each bit controls an option. -The parameters are: - -debug_msgs Selects which debug messages to display, as follows: - - Message Type Flag value - - General messages 1 - Journal messages 2 - Mount messages 4 - Commit messages 8 - LEB search messages 16 - Budgeting messages 32 - Garbage collection messages 64 - Tree Node Cache (TNC) messages 128 - LEB properties (lprops) messages 256 - Input/output messages 512 - Log messages 1024 - Scan messages 2048 - Recovery messages 4096 debug_chks Selects extra checks that UBIFS can do while running: @@ -156,8 +136,7 @@ debug_tsts Selects a mode of testing, as follows: Failure mode for recovery testing 4 -For example, set debug_msgs to 5 to display General messages and Mount -messages. +For example, set debug_chks to 3 to enable general and TNC checks. References diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index f46d77e..0bb2bce 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -42,15 +42,12 @@ DEFINE_SPINLOCK(dbg_lock); static char dbg_key_buf0[128]; static char dbg_key_buf1[128]; -unsigned int ubifs_msg_flags; unsigned int ubifs_chk_flags; unsigned int ubifs_tst_flags; -module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); -MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); MODULE_PARM_DESC(debug_chks, "Debug check flags"); MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 7538bf2..a811ac4 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -109,19 +109,6 @@ struct ubifs_debug_info { #define dbg_dump_stack() dump_stack() -/* Generic debugging messages */ -#define dbg_msg(fmt, ...) do { \ - spin_lock(&dbg_lock); \ - printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ - __func__, ##__VA_ARGS__); \ - spin_unlock(&dbg_lock); \ -} while (0) - -#define dbg_do_msg(typ, fmt, ...) do { \ - if (ubifs_msg_flags & typ) \ - dbg_msg(fmt, ##__VA_ARGS__); \ -} while (0) - #define dbg_err(fmt, ...) do { \ spin_lock(&dbg_lock); \ ubifs_err(fmt, ##__VA_ARGS__); \ @@ -140,77 +127,40 @@ const char *dbg_key_str1(const struct ubifs_info *c, #define DBGKEY(key) dbg_key_str0(c, (key)) #define DBGKEY1(key) dbg_key_str1(c, (key)) -/* General messages */ -#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) +#define ubifs_dbg_msg(type, fmt, ...) do { \ + spin_lock(&dbg_lock); \ + pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \ + spin_unlock(&dbg_lock); \ +} while (0) +/* Just a debugging messages not related to any specific UBIFS subsystem */ +#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__) +/* General messages */ +#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) /* Additional journal messages */ -#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) - +#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__) /* Additional TNC messages */ -#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) - +#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__) /* Additional lprops messages */ -#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) - +#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__) /* Additional LEB find messages */ -#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) - +#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__) /* Additional mount messages */ -#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) - +#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__) /* Additional I/O messages */ -#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) - +#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__) /* Additional commit messages */ -#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) - +#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__) /* Additional budgeting messages */ -#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) - +#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__) /* Additional log messages */ -#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) - +#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__) /* Additional gc messages */ -#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) - +#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__) /* Additional scan messages */ -#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) - +#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__) /* Additional recovery messages */ -#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) - -/* - * Debugging message type flags. - * - * UBIFS_MSG_GEN: general messages - * UBIFS_MSG_JNL: journal messages - * UBIFS_MSG_MNT: mount messages - * UBIFS_MSG_CMT: commit messages - * UBIFS_MSG_FIND: LEB find messages - * UBIFS_MSG_BUDG: budgeting messages - * UBIFS_MSG_GC: garbage collection messages - * UBIFS_MSG_TNC: TNC messages - * UBIFS_MSG_LP: lprops messages - * UBIFS_MSG_IO: I/O messages - * UBIFS_MSG_LOG: log messages - * UBIFS_MSG_SCAN: scan messages - * UBIFS_MSG_RCVRY: recovery messages - */ -enum { - UBIFS_MSG_GEN = 0x1, - UBIFS_MSG_JNL = 0x2, - UBIFS_MSG_MNT = 0x4, - UBIFS_MSG_CMT = 0x8, - UBIFS_MSG_FIND = 0x10, - UBIFS_MSG_BUDG = 0x20, - UBIFS_MSG_GC = 0x40, - UBIFS_MSG_TNC = 0x80, - UBIFS_MSG_LP = 0x100, - UBIFS_MSG_IO = 0x200, - UBIFS_MSG_LOG = 0x400, - UBIFS_MSG_SCAN = 0x800, - UBIFS_MSG_RCVRY = 0x1000, -}; +#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) /* * Debugging check flags. @@ -368,33 +318,33 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); __func__, __LINE__, current->pid); \ } while (0) -#define dbg_err(fmt, ...) do { \ - if (0) \ - ubifs_err(fmt, ##__VA_ARGS__); \ +#define dbg_err(fmt, ...) do { \ + if (0) \ + ubifs_err(fmt, ##__VA_ARGS__); \ } while (0) -#define dbg_msg(fmt, ...) do { \ - if (0) \ - printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ - current->pid, __func__, ##__VA_ARGS__); \ +#define ubifs_dbg_msg(fmt, ...) do { \ + if (0) \ + pr_debug(fmt "\n", ##__VA_ARGS__); \ } while (0) #define dbg_dump_stack() #define ubifs_assert_cmt_locked(c) -#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) -#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) +#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) #define DBGKEY(key) ((char *)(key)) #define DBGKEY1(key) ((char *)(key)) -- cgit v0.10.2