diff options
Diffstat (limited to 'fs/ubifs')
-rw-r--r-- | fs/ubifs/budget.c | 36 | ||||
-rw-r--r-- | fs/ubifs/commit.c | 7 | ||||
-rw-r--r-- | fs/ubifs/debug.c | 281 | ||||
-rw-r--r-- | fs/ubifs/debug.h | 9 | ||||
-rw-r--r-- | fs/ubifs/dir.c | 13 | ||||
-rw-r--r-- | fs/ubifs/file.c | 119 | ||||
-rw-r--r-- | fs/ubifs/gc.c | 181 | ||||
-rw-r--r-- | fs/ubifs/io.c | 80 | ||||
-rw-r--r-- | fs/ubifs/ioctl.c | 1 | ||||
-rw-r--r-- | fs/ubifs/journal.c | 16 | ||||
-rw-r--r-- | fs/ubifs/key.h | 49 | ||||
-rw-r--r-- | fs/ubifs/log.c | 23 | ||||
-rw-r--r-- | fs/ubifs/lprops.c | 43 | ||||
-rw-r--r-- | fs/ubifs/lpt.c | 22 | ||||
-rw-r--r-- | fs/ubifs/lpt_commit.c | 6 | ||||
-rw-r--r-- | fs/ubifs/master.c | 23 | ||||
-rw-r--r-- | fs/ubifs/misc.h | 9 | ||||
-rw-r--r-- | fs/ubifs/orphan.c | 7 | ||||
-rw-r--r-- | fs/ubifs/recovery.c | 98 | ||||
-rw-r--r-- | fs/ubifs/replay.c | 31 | ||||
-rw-r--r-- | fs/ubifs/sb.c | 10 | ||||
-rw-r--r-- | fs/ubifs/scan.c | 56 | ||||
-rw-r--r-- | fs/ubifs/shrinker.c | 4 | ||||
-rw-r--r-- | fs/ubifs/super.c | 201 | ||||
-rw-r--r-- | fs/ubifs/tnc.c | 82 | ||||
-rw-r--r-- | fs/ubifs/tnc_commit.c | 2 | ||||
-rw-r--r-- | fs/ubifs/ubifs-media.h | 7 | ||||
-rw-r--r-- | fs/ubifs/ubifs.h | 54 | ||||
-rw-r--r-- | fs/ubifs/xattr.c | 9 |
29 files changed, 902 insertions, 577 deletions
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index eaf6d89..c8ff0d1 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c @@ -54,41 +54,17 @@ * @nr_to_write: how many dirty pages to write-back * * This function shrinks UBIFS liability by means of writing back some amount - * of dirty inodes and their pages. Returns the amount of pages which were - * written back. The returned value does not include dirty inodes which were - * synchronized. + * of dirty inodes and their pages. * * Note, this function synchronizes even VFS inodes which are locked * (@i_mutex) by the caller of the budgeting function, because write-back does * not touch @i_mutex. */ -static int shrink_liability(struct ubifs_info *c, int nr_to_write) +static void shrink_liability(struct ubifs_info *c, int nr_to_write) { - int nr_written; - struct writeback_control wbc = { - .sync_mode = WB_SYNC_NONE, - .range_end = LLONG_MAX, - .nr_to_write = nr_to_write, - }; - - generic_sync_sb_inodes(c->vfs_sb, &wbc); - nr_written = nr_to_write - wbc.nr_to_write; - - if (!nr_written) { - /* - * Re-try again but wait on pages/inodes which are being - * written-back concurrently (e.g., by pdflush). - */ - memset(&wbc, 0, sizeof(struct writeback_control)); - wbc.sync_mode = WB_SYNC_ALL; - wbc.range_end = LLONG_MAX; - wbc.nr_to_write = nr_to_write; - generic_sync_sb_inodes(c->vfs_sb, &wbc); - nr_written = nr_to_write - wbc.nr_to_write; - } - - dbg_budg("%d pages were written back", nr_written); - return nr_written; + down_read(&c->vfs_sb->s_umount); + writeback_inodes_sb(c->vfs_sb); + up_read(&c->vfs_sb->s_umount); } /** @@ -741,7 +717,7 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) * ubifs_get_free_space - return amount of free space. * @c: UBIFS file-system description object * - * This function calculates and retuns amount of free space to report to + * This function calculates and returns amount of free space to report to * user-space. */ long long ubifs_get_free_space(struct ubifs_info *c) diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index f3a7945..02429d8 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -45,6 +45,7 @@ #include <linux/freezer.h> #include <linux/kthread.h> +#include <linux/slab.h> #include "ubifs.h" /** @@ -62,7 +63,9 @@ static int do_commit(struct ubifs_info *c) struct ubifs_lp_stats lst; dbg_cmt("start"); - if (c->ro_media) { + ubifs_assert(!c->ro_media && !c->ro_mount); + + if (c->ro_error) { err = -EROFS; goto out_up; } @@ -510,7 +513,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; int first = 1, iip; struct ubifs_debug_info *d = c->dbg; - union ubifs_key lower_key, upper_key, l_key, u_key; + union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key; unsigned long long uninitialized_var(last_sqnum); struct ubifs_idx_node *idx; struct list_head list; diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index ce2cd83..0bee4db 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -34,6 +34,7 @@ #include <linux/moduleparam.h> #include <linux/debugfs.h> #include <linux/math64.h> +#include <linux/slab.h> #ifdef CONFIG_UBIFS_FS_DEBUG @@ -210,6 +211,20 @@ const char *dbg_cstate(int cmt_state) } } +const char *dbg_jhead(int jhead) +{ + switch (jhead) { + case GCHD: + return "0 (GC)"; + case BASEHD: + return "1 (base)"; + case DATAHD: + return "2 (data)"; + default: + return "unknown journal head"; + } +} + static void dump_ch(const struct ubifs_ch *ch) { printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic)); @@ -336,13 +351,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) le32_to_cpu(sup->fmt_version)); printk(KERN_DEBUG "\ttime_gran %u\n", le32_to_cpu(sup->time_gran)); - printk(KERN_DEBUG "\tUUID %02X%02X%02X%02X-%02X%02X" - "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n", - sup->uuid[0], sup->uuid[1], sup->uuid[2], sup->uuid[3], - sup->uuid[4], sup->uuid[5], sup->uuid[6], sup->uuid[7], - sup->uuid[8], sup->uuid[9], sup->uuid[10], sup->uuid[11], - sup->uuid[12], sup->uuid[13], sup->uuid[14], - sup->uuid[15]); + printk(KERN_DEBUG "\tUUID %pUB\n", + sup->uuid); break; } case UBIFS_MST_NODE: @@ -623,8 +633,9 @@ void dbg_dump_budg(struct ubifs_info *c) /* If we are in R/O mode, journal heads do not exist */ if (c->jheads) for (i = 0; i < c->jhead_cnt; i++) - printk(KERN_DEBUG "\tjhead %d\t LEB %d\n", - c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum); + printk(KERN_DEBUG "\tjhead %s\t LEB %d\n", + dbg_jhead(c->jheads[i].wbuf.jhead), + c->jheads[i].wbuf.lnum); for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) { bud = rb_entry(rb, struct ubifs_bud, rb); printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum); @@ -648,9 +659,90 @@ void dbg_dump_budg(struct ubifs_info *c) void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) { - printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), " - "flags %#x\n", lp->lnum, lp->free, lp->dirty, - c->leb_size - lp->free - lp->dirty, lp->flags); + int i, spc, dark = 0, dead = 0; + struct rb_node *rb; + struct ubifs_bud *bud; + + spc = lp->free + lp->dirty; + if (spc < c->dead_wm) + dead = spc; + else + dark = ubifs_calc_dark(c, spc); + + if (lp->flags & LPROPS_INDEX) + printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " + "free + dirty %-8d flags %#x (", lp->lnum, lp->free, + lp->dirty, c->leb_size - spc, spc, lp->flags); + else + printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d " + "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d " + "flags %#-4x (", lp->lnum, lp->free, lp->dirty, + c->leb_size - spc, spc, dark, dead, + (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags); + + if (lp->flags & LPROPS_TAKEN) { + if (lp->flags & LPROPS_INDEX) + printk(KERN_CONT "index, taken"); + else + printk(KERN_CONT "taken"); + } else { + const char *s; + + if (lp->flags & LPROPS_INDEX) { + switch (lp->flags & LPROPS_CAT_MASK) { + case LPROPS_DIRTY_IDX: + s = "dirty index"; + break; + case LPROPS_FRDI_IDX: + s = "freeable index"; + break; + default: + s = "index"; + } + } else { + switch (lp->flags & LPROPS_CAT_MASK) { + case LPROPS_UNCAT: + s = "not categorized"; + break; + case LPROPS_DIRTY: + s = "dirty"; + break; + case LPROPS_FREE: + s = "free"; + break; + case LPROPS_EMPTY: + s = "empty"; + break; + case LPROPS_FREEABLE: + s = "freeable"; + break; + default: + s = NULL; + break; + } + } + printk(KERN_CONT "%s", s); + } + + for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) { + bud = rb_entry(rb, struct ubifs_bud, rb); + if (bud->lnum == lp->lnum) { + int head = 0; + for (i = 0; i < c->jhead_cnt; i++) { + if (lp->lnum == c->jheads[i].wbuf.lnum) { + printk(KERN_CONT ", jhead %s", + dbg_jhead(i)); + head = 1; + } + } + if (!head) + printk(KERN_CONT ", bud of jhead %s", + dbg_jhead(bud->jhead)); + } + } + if (lp->lnum == c->gc_lnum) + printk(KERN_CONT ", GC LEB"); + printk(KERN_CONT ")\n"); } void dbg_dump_lprops(struct ubifs_info *c) @@ -724,7 +816,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum) printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", current->pid, lnum); - sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); + sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); if (IS_ERR(sleb)) { ubifs_err("scan error %d", (int)PTR_ERR(sleb)); return; @@ -909,8 +1001,10 @@ out: ubifs_msg("saved lprops statistics dump"); dbg_dump_lstats(&d->saved_lst); ubifs_get_lp_stats(c, &lst); + ubifs_msg("current lprops statistics dump"); - dbg_dump_lstats(&d->saved_lst); + dbg_dump_lstats(&lst); + spin_lock(&c->space_lock); dbg_dump_budg(c); spin_unlock(&c->space_lock); @@ -1916,7 +2010,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr, inum = key_inum_flash(c, &dent->key); fscki1 = read_add_inode(c, priv, inum); if (IS_ERR(fscki1)) { - err = PTR_ERR(fscki); + err = PTR_ERR(fscki1); ubifs_err("error %d while processing entry node and " "trying to find parent inode node %lu", err, (unsigned long)inum); @@ -2145,6 +2239,162 @@ out_free: return err; } +/** + * dbg_check_data_nodes_order - check that list of data nodes is sorted. + * @c: UBIFS file-system description object + * @head: the list of nodes ('struct ubifs_scan_node' objects) + * + * This function returns zero if the list of data nodes is sorted correctly, + * and %-EINVAL if not. + */ +int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) +{ + struct list_head *cur; + struct ubifs_scan_node *sa, *sb; + + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + + for (cur = head->next; cur->next != head; cur = cur->next) { + ino_t inuma, inumb; + uint32_t blka, blkb; + + cond_resched(); + sa = container_of(cur, struct ubifs_scan_node, list); + sb = container_of(cur->next, struct ubifs_scan_node, list); + + if (sa->type != UBIFS_DATA_NODE) { + ubifs_err("bad node type %d", sa->type); + dbg_dump_node(c, sa->node); + return -EINVAL; + } + if (sb->type != UBIFS_DATA_NODE) { + ubifs_err("bad node type %d", sb->type); + dbg_dump_node(c, sb->node); + return -EINVAL; + } + + inuma = key_inum(c, &sa->key); + inumb = key_inum(c, &sb->key); + + if (inuma < inumb) + continue; + if (inuma > inumb) { + ubifs_err("larger inum %lu goes before inum %lu", + (unsigned long)inuma, (unsigned long)inumb); + goto error_dump; + } + + blka = key_block(c, &sa->key); + blkb = key_block(c, &sb->key); + + if (blka > blkb) { + ubifs_err("larger block %u goes before %u", blka, blkb); + goto error_dump; + } + if (blka == blkb) { + ubifs_err("two data nodes for the same block"); + goto error_dump; + } + } + + return 0; + +error_dump: + dbg_dump_node(c, sa->node); + dbg_dump_node(c, sb->node); + return -EINVAL; +} + +/** + * dbg_check_nondata_nodes_order - check that list of data nodes is sorted. + * @c: UBIFS file-system description object + * @head: the list of nodes ('struct ubifs_scan_node' objects) + * + * This function returns zero if the list of non-data nodes is sorted correctly, + * and %-EINVAL if not. + */ +int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) +{ + struct list_head *cur; + struct ubifs_scan_node *sa, *sb; + + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + + for (cur = head->next; cur->next != head; cur = cur->next) { + ino_t inuma, inumb; + uint32_t hasha, hashb; + + cond_resched(); + sa = container_of(cur, struct ubifs_scan_node, list); + sb = container_of(cur->next, struct ubifs_scan_node, list); + + if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && + sa->type != UBIFS_XENT_NODE) { + ubifs_err("bad node type %d", sa->type); + dbg_dump_node(c, sa->node); + return -EINVAL; + } + if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && + sa->type != UBIFS_XENT_NODE) { + ubifs_err("bad node type %d", sb->type); + dbg_dump_node(c, sb->node); + return -EINVAL; + } + + if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { + ubifs_err("non-inode node goes before inode node"); + goto error_dump; + } + + if (sa->type == UBIFS_INO_NODE && sb->type != UBIFS_INO_NODE) + continue; + + if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { + /* Inode nodes are sorted in descending size order */ + if (sa->len < sb->len) { + ubifs_err("smaller inode node goes first"); + goto error_dump; + } + continue; + } + + /* + * This is either a dentry or xentry, which should be sorted in + * ascending (parent ino, hash) order. + */ + inuma = key_inum(c, &sa->key); + inumb = key_inum(c, &sb->key); + + if (inuma < inumb) + continue; + if (inuma > inumb) { + ubifs_err("larger inum %lu goes before inum %lu", + (unsigned long)inuma, (unsigned long)inumb); + goto error_dump; + } + + hasha = key_block(c, &sa->key); + hashb = key_block(c, &sb->key); + + if (hasha > hashb) { + ubifs_err("larger hash %u goes before %u", hasha, hashb); + goto error_dump; + } + } + + return 0; + +error_dump: + ubifs_msg("dumping first node"); + dbg_dump_node(c, sa->node); + ubifs_msg("dumping second node"); + dbg_dump_node(c, sb->node); + return -EINVAL; + return 0; +} + static int invocation_cnt; int dbg_force_in_the_gaps(void) @@ -2531,6 +2781,7 @@ static const struct file_operations dfs_fops = { .open = open_debugfs_file, .write = write_debugfs_file, .owner = THIS_MODULE, + .llseek = default_llseek, }; /** diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index c1cd73b..69ebe47 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -271,6 +271,7 @@ void ubifs_debugging_exit(struct ubifs_info *c); /* Dump functions */ const char *dbg_ntype(int type); const char *dbg_cstate(int cmt_state); +const char *dbg_jhead(int jhead); const char *dbg_get_key_dump(const struct ubifs_info *c, const union ubifs_key *key); void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode); @@ -321,6 +322,10 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, int dbg_check_lprops(struct ubifs_info *c); int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, int row, int col); +int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, + loff_t size); +int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); +int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); /* Force the use of in-the-gaps method for testing */ @@ -425,6 +430,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define dbg_ntype(type) "" #define dbg_cstate(cmt_state) "" +#define dbg_jhead(jhead) "" #define dbg_get_key_dump(c, key) ({}) #define dbg_dump_inode(c, inode) ({}) #define dbg_dump_node(c, node) ({}) @@ -460,6 +466,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); #define dbg_check_heap(c, heap, cat, add_pos) ({}) #define dbg_check_lprops(c) 0 #define dbg_check_lpt_nodes(c, cnode, row, col) 0 +#define dbg_check_inode_size(c, inode, size) 0 +#define dbg_check_data_nodes_order(c, head) 0 +#define dbg_check_nondata_nodes_order(c, head) 0 #define dbg_force_in_the_gaps_enabled 0 #define dbg_force_in_the_gaps() 0 #define dbg_failure_mode 0 diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 552fb01..14f64b6 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -104,14 +104,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir, */ inode->i_flags |= (S_NOCMTIME); - inode->i_uid = current_fsuid(); - if (dir->i_mode & S_ISGID) { - inode->i_gid = dir->i_gid; - if (S_ISDIR(mode)) - mode |= S_ISGID; - } else - inode->i_gid = current_fsgid(); - inode->i_mode = mode; + inode_init_owner(inode, dir, mode); inode->i_mtime = inode->i_atime = inode->i_ctime = ubifs_current_time(inode); inode->i_mapping->nrpages = 0; @@ -557,7 +550,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, lock_2_inodes(dir, inode); inc_nlink(inode); - atomic_inc(&inode->i_count); + ihold(inode); inode->i_ctime = ubifs_current_time(inode); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; @@ -1120,7 +1113,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, if (release) ubifs_release_budget(c, &ino_req); if (IS_SYNC(old_inode)) - err = old_inode->i_sb->s_op->write_inode(old_inode, 1); + err = old_inode->i_sb->s_op->write_inode(old_inode, NULL); return err; out_cancel: diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 6d34dc7..d77db7e 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -21,39 +21,38 @@ */ /* - * This file implements VFS file and inode operations of regular files, device + * This file implements VFS file and inode operations for regular files, device * nodes and symlinks as well as address space operations. * - * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the - * page is dirty and is used for budgeting purposes - dirty pages should not be - * budgeted. The PG_checked flag is set if full budgeting is required for the - * page e.g., when it corresponds to a file hole or it is just beyond the file - * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to - * fail in this function, and the budget is released in 'ubifs_write_end()'. So - * the PG_private and PG_checked flags carry the information about how the page - * was budgeted, to make it possible to release the budget properly. + * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if + * the page is dirty and is used for optimization purposes - dirty pages are + * not budgeted so the flag shows that 'ubifs_write_end()' should not release + * the budget for this page. The @PG_checked flag is set if full budgeting is + * required for the page e.g., when it corresponds to a file hole or it is + * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because + * it is OK to fail in this function, and the budget is released in + * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry + * information about how the page was budgeted, to make it possible to release + * the budget properly. * - * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations - * we implement. However, this is not true for '->writepage()', which might be - * called with 'i_mutex' unlocked. For example, when pdflush is performing - * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the - * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is - * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim - * path'. So, in '->writepage()' we are only guaranteed that the page is - * locked. + * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we + * implement. However, this is not true for 'ubifs_writepage()', which may be + * called with @i_mutex unlocked. For example, when pdflush is doing background + * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal" + * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the + * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()' + * we are only guaranteed that the page is locked. * - * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g., - * readahead path does not have it locked ("sys_read -> generic_file_aio_read - * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is - * not set as well. However, UBIFS disables readahead. - * - * This, for example means that there might be 2 concurrent '->writepage()' - * calls for the same inode, but different inode dirty pages. + * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the + * read-ahead path does not lock it ("sys_read -> generic_file_aio_read -> + * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not + * set as well. However, UBIFS disables readahead. */ #include "ubifs.h" #include <linux/mount.h> #include <linux/namei.h> +#include <linux/slab.h> static int read_block(struct inode *inode, void *addr, unsigned int block, struct ubifs_data_node *dn) @@ -434,8 +433,9 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, struct page *page; ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); + ubifs_assert(!c->ro_media && !c->ro_mount); - if (unlikely(c->ro_media)) + if (unlikely(c->ro_error)) return -EROFS; /* Try out the fast-path part first */ @@ -449,9 +449,9 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, /* * We change whole page so no need to load it. But we * have to set the @PG_checked flag to make the further - * code the page is new. This might be not true, but it - * is better to budget more that to read the page from - * the media. + * code know that the page is new. This might be not + * true, but it is better to budget more than to read + * the page from the media. */ SetPageChecked(page); skipped_read = 1; @@ -497,8 +497,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, } /* - * Whee, we aquired budgeting quickly - without involving - * garbage-collection, committing or forceing write-back. We return + * Whee, we acquired budgeting quickly - without involving + * garbage-collection, committing or forcing write-back. We return * with @ui->ui_mutex locked if we are appending pages, and unlocked * otherwise. This is an optimization (slightly hacky though). */ @@ -562,7 +562,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping, /* * Return 0 to force VFS to repeat the whole operation, or the - * error code if 'do_readpage()' failes. + * error code if 'do_readpage()' fails. */ copied = do_readpage(page); goto out; @@ -968,12 +968,16 @@ static int do_writepage(struct page *page, int len) * the page locked, and it locks @ui_mutex. However, write-back does take inode * @i_mutex, which means other VFS operations may be run on this inode at the * same time. And the problematic one is truncation to smaller size, from where - * we have to call 'vmtruncate()', which first changes @inode->i_size, then + * we have to call 'truncate_setsize()', which first changes @inode->i_size, then * drops the truncated pages. And while dropping the pages, it takes the page - * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with + * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This * means that @inode->i_size is changed while @ui_mutex is unlocked. * + * XXX(truncate): with the new truncate sequence this is not true anymore, + * and the calls to truncate_setsize can be move around freely. They should + * be moved to the very end of the truncate sequence. + * * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond * inode size. How do we do this if @inode->i_size may became smaller while we * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the @@ -1013,7 +1017,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) /* Is the page fully inside @i_size? */ if (page->index < end_index) { if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_unlock; /* @@ -1041,7 +1045,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc) kunmap_atomic(kaddr, KM_USER0); if (i_size > synced_i_size) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) goto out_unlock; } @@ -1126,9 +1130,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, budgeted = 0; } - err = vmtruncate(inode, new_size); - if (err) - goto out_budg; + truncate_setsize(inode, new_size); if (offset) { pgoff_t index = new_size >> PAGE_CACHE_SHIFT; @@ -1175,11 +1177,11 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, ui->ui_size = inode->i_size; /* Truncation changes inode [mc]time */ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); - /* The other attributes may be changed at the same time as well */ + /* Other attributes may be changed at the same time as well */ do_attr_changes(inode, attr); - err = ubifs_jnl_truncate(c, inode, old_size, new_size); mutex_unlock(&ui->ui_mutex); + out_budg: if (budgeted) ubifs_release_budget(c, &req); @@ -1215,16 +1217,14 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, if (attr->ia_valid & ATTR_SIZE) { dbg_gen("size %lld -> %lld", inode->i_size, new_size); - err = vmtruncate(inode, new_size); - if (err) - goto out; + truncate_setsize(inode, new_size); } mutex_lock(&ui->ui_mutex); if (attr->ia_valid & ATTR_SIZE) { /* Truncation changes inode [mc]time */ inode->i_mtime = inode->i_ctime = ubifs_current_time(inode); - /* 'vmtruncate()' changed @i_size, update @ui_size */ + /* 'truncate_setsize()' changed @i_size, update @ui_size */ ui->ui_size = inode->i_size; } @@ -1244,11 +1244,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode, if (release) ubifs_release_budget(c, &req); if (IS_SYNC(inode)) - err = inode->i_sb->s_op->write_inode(inode, 1); - return err; - -out: - ubifs_release_budget(c, &req); + err = inode->i_sb->s_op->write_inode(inode, NULL); return err; } @@ -1305,9 +1301,9 @@ static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd) return NULL; } -int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) +int ubifs_fsync(struct file *file, int datasync) { - struct inode *inode = dentry->d_inode; + struct inode *inode = file->f_mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; int err; @@ -1318,7 +1314,7 @@ int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync) * the inode unless this is a 'datasync()' call. */ if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) { - err = inode->i_sb->s_op->write_inode(inode, 1); + err = inode->i_sb->s_op->write_inode(inode, NULL); if (err) return err; } @@ -1391,7 +1387,6 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { int err; - ssize_t ret; struct inode *inode = iocb->ki_filp->f_mapping->host; struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -1399,17 +1394,7 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) return err; - ret = generic_file_aio_write(iocb, iov, nr_segs, pos); - if (ret < 0) - return ret; - - if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) { - err = ubifs_sync_wbufs_by_inode(c, inode); - if (err) - return err; - } - - return ret; + return generic_file_aio_write(iocb, iov, nr_segs, pos); } static int ubifs_set_page_dirty(struct page *page) @@ -1455,9 +1440,9 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vm dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, i_size_read(inode)); - ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); + ubifs_assert(!c->ro_media && !c->ro_mount); - if (unlikely(c->ro_media)) + if (unlikely(c->ro_error)) return VM_FAULT_SIGBUS; /* -EROFS */ /* @@ -1536,7 +1521,7 @@ out_unlock: return err; } -static struct vm_operations_struct ubifs_file_vm_ops = { +static const struct vm_operations_struct ubifs_file_vm_ops = { .fault = filemap_fault, .page_mkwrite = ubifs_vm_page_mkwrite, }; diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index f0f5f15..151f108 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -53,7 +53,9 @@ * good, and GC takes extra care when moving them. */ +#include <linux/slab.h> #include <linux/pagemap.h> +#include <linux/list_sort.h> #include "ubifs.h" /* @@ -108,101 +110,6 @@ static int switch_gc_head(struct ubifs_info *c) } /** - * list_sort - sort a list. - * @priv: private data, passed to @cmp - * @head: the list to sort - * @cmp: the elements comparison function - * - * This function has been implemented by Mark J Roberts <mjr@znex.org>. It - * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted - * in ascending order. - * - * The comparison function @cmp is supposed to return a negative value if @a is - * than @b, and a positive value if @a is greater than @b. If @a and @b are - * equivalent, then it does not matter what this function returns. - */ -static void list_sort(void *priv, struct list_head *head, - int (*cmp)(void *priv, struct list_head *a, - struct list_head *b)) -{ - struct list_head *p, *q, *e, *list, *tail, *oldhead; - int insize, nmerges, psize, qsize, i; - - if (list_empty(head)) - return; - - list = head->next; - list_del(head); - insize = 1; - for (;;) { - p = oldhead = list; - list = tail = NULL; - nmerges = 0; - - while (p) { - nmerges++; - q = p; - psize = 0; - for (i = 0; i < insize; i++) { - psize++; - q = q->next == oldhead ? NULL : q->next; - if (!q) - break; - } - - qsize = insize; - while (psize > 0 || (qsize > 0 && q)) { - if (!psize) { - e = q; - q = q->next; - qsize--; - if (q == oldhead) - q = NULL; - } else if (!qsize || !q) { - e = p; - p = p->next; - psize--; - if (p == oldhead) - p = NULL; - } else if (cmp(priv, p, q) <= 0) { - e = p; - p = p->next; - psize--; - if (p == oldhead) - p = NULL; - } else { - e = q; - q = q->next; - qsize--; - if (q == oldhead) - q = NULL; - } - if (tail) - tail->next = e; - else - list = e; - e->prev = tail; - tail = e; - } - p = q; - } - - tail->next = list; - list->prev = tail; - - if (nmerges <= 1) - break; - - insize *= 2; - } - - head->next = list; - head->prev = list->prev; - list->prev->next = head; - list->prev = head; -} - -/** * data_nodes_cmp - compare 2 data nodes. * @priv: UBIFS file-system description object * @a: first data node @@ -218,10 +125,16 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) struct ubifs_scan_node *sa, *sb; cond_resched(); + if (a == b) + return 0; + sa = list_entry(a, struct ubifs_scan_node, list); sb = list_entry(b, struct ubifs_scan_node, list); + ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY); ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY); + ubifs_assert(sa->type == UBIFS_DATA_NODE); + ubifs_assert(sb->type == UBIFS_DATA_NODE); inuma = key_inum(c, &sa->key); inumb = key_inum(c, &sb->key); @@ -250,28 +163,40 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) */ int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) { - int typea, typeb; ino_t inuma, inumb; struct ubifs_info *c = priv; struct ubifs_scan_node *sa, *sb; cond_resched(); + if (a == b) + return 0; + sa = list_entry(a, struct ubifs_scan_node, list); sb = list_entry(b, struct ubifs_scan_node, list); - typea = key_type(c, &sa->key); - typeb = key_type(c, &sb->key); - ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY); + + ubifs_assert(key_type(c, &sa->key) != UBIFS_DATA_KEY && + key_type(c, &sb->key) != UBIFS_DATA_KEY); + ubifs_assert(sa->type != UBIFS_DATA_NODE && + sb->type != UBIFS_DATA_NODE); /* Inodes go before directory entries */ - if (typea == UBIFS_INO_KEY) { - if (typeb == UBIFS_INO_KEY) + if (sa->type == UBIFS_INO_NODE) { + if (sb->type == UBIFS_INO_NODE) return sb->len - sa->len; return -1; } - if (typeb == UBIFS_INO_KEY) + if (sb->type == UBIFS_INO_NODE) return 1; - ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY); + ubifs_assert(key_type(c, &sa->key) == UBIFS_DENT_KEY || + key_type(c, &sa->key) == UBIFS_XENT_KEY); + ubifs_assert(key_type(c, &sb->key) == UBIFS_DENT_KEY || + key_type(c, &sb->key) == UBIFS_XENT_KEY); + ubifs_assert(sa->type == UBIFS_DENT_NODE || + sa->type == UBIFS_XENT_NODE); + ubifs_assert(sb->type == UBIFS_DENT_NODE || + sb->type == UBIFS_XENT_NODE); + inuma = key_inum(c, &sa->key); inumb = key_inum(c, &sb->key); @@ -317,17 +242,33 @@ int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb, struct list_head *nondata, int *min) { + int err; struct ubifs_scan_node *snod, *tmp; *min = INT_MAX; /* Separate data nodes and non-data nodes */ list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { - int err; + ubifs_assert(snod->type == UBIFS_INO_NODE || + snod->type == UBIFS_DATA_NODE || + snod->type == UBIFS_DENT_NODE || + snod->type == UBIFS_XENT_NODE || + snod->type == UBIFS_TRUN_NODE); + + if (snod->type != UBIFS_INO_NODE && + snod->type != UBIFS_DATA_NODE && + snod->type != UBIFS_DENT_NODE && + snod->type != UBIFS_XENT_NODE) { + /* Probably truncation node, zap it */ + list_del(&snod->list); + kfree(snod); + continue; + } - ubifs_assert(snod->type != UBIFS_IDX_NODE); - ubifs_assert(snod->type != UBIFS_REF_NODE); - ubifs_assert(snod->type != UBIFS_CS_NODE); + ubifs_assert(key_type(c, &snod->key) == UBIFS_DATA_KEY || + key_type(c, &snod->key) == UBIFS_INO_KEY || + key_type(c, &snod->key) == UBIFS_DENT_KEY || + key_type(c, &snod->key) == UBIFS_XENT_KEY); err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, snod->offs, 0); @@ -351,6 +292,13 @@ static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb, /* Sort data and non-data nodes */ list_sort(c, &sleb->nodes, &data_nodes_cmp); list_sort(c, nondata, &nondata_nodes_cmp); + + err = dbg_check_data_nodes_order(c, &sleb->nodes); + if (err) + return err; + err = dbg_check_nondata_nodes_order(c, nondata); + if (err) + return err; return 0; } @@ -529,7 +477,7 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) * We scan the entire LEB even though we only really need to scan up to * (c->leb_size - lp->free). */ - sleb = ubifs_scan(c, lnum, 0, c->sbuf); + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0); if (IS_ERR(sleb)) return PTR_ERR(sleb); @@ -668,13 +616,14 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; ubifs_assert_cmt_locked(c); + ubifs_assert(!c->ro_media && !c->ro_mount); if (ubifs_gc_should_commit(c)) return -EAGAIN; mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); - if (c->ro_media) { + if (c->ro_error) { ret = -EROFS; goto out_unlock; } @@ -770,14 +719,12 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) ret = ubifs_garbage_collect_leb(c, &lp); if (ret < 0) { - if (ret == -EAGAIN || ret == -ENOSPC) { + if (ret == -EAGAIN) { /* - * These codes are not errors, so we have to - * return the LEB to lprops. But if the - * 'ubifs_return_leb()' function fails, its - * failure code is propagated to the caller - * instead of the original '-EAGAIN' or - * '-ENOSPC'. + * This is not error, so we have to return the + * LEB to lprops. But if 'ubifs_return_leb()' + * fails, its failure code is propagated to the + * caller instead of the original '-EAGAIN'. */ err = ubifs_return_leb(c, lp.lnum); if (err) @@ -867,8 +814,8 @@ out_unlock: out: ubifs_assert(ret < 0); ubifs_assert(ret != -ENOSPC && ret != -EAGAIN); - ubifs_ro_mode(c, ret); ubifs_wbuf_sync_nolock(wbuf); + ubifs_ro_mode(c, ret); mutex_unlock(&wbuf->io_mutex); ubifs_return_leb(c, lp.lnum); return ret; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index bc58571..d821731 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -51,6 +51,7 @@ */ #include <linux/crc32.h> +#include <linux/slab.h> #include "ubifs.h" /** @@ -60,9 +61,10 @@ */ void ubifs_ro_mode(struct ubifs_info *c, int err) { - if (!c->ro_media) { - c->ro_media = 1; + if (!c->ro_error) { + c->ro_error = 1; c->no_chk_data_crc = 0; + c->vfs_sb->s_flags |= MS_RDONLY; ubifs_warn("switched to read-only mode, error %d", err); dbg_dump_stack(); } @@ -297,6 +299,7 @@ static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer) { struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer); + dbg_io("jhead %s", dbg_jhead(wbuf->jhead)); wbuf->need_sync = 1; wbuf->c->need_wbuf_sync = 1; ubifs_wake_up_bgt(wbuf->c); @@ -311,8 +314,13 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) { ubifs_assert(!hrtimer_active(&wbuf->timer)); - if (!ktime_to_ns(wbuf->softlimit)) + if (wbuf->no_timer) return; + dbg_io("set timer for jhead %s, %llu-%llu millisecs", + dbg_jhead(wbuf->jhead), + div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC), + div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta, + USEC_PER_SEC)); hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta, HRTIMER_MODE_REL); } @@ -323,11 +331,8 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) */ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf) { - /* - * If the syncer is waiting for the lock (from the background thread's - * context) and another task is changing write-buffer then the syncing - * should be canceled. - */ + if (wbuf->no_timer) + return; wbuf->need_sync = 0; hrtimer_cancel(&wbuf->timer); } @@ -349,13 +354,13 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) /* Write-buffer is empty or not seeked */ return 0; - dbg_io("LEB %d:%d, %d bytes", - wbuf->lnum, wbuf->offs, wbuf->used); - ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); + dbg_io("LEB %d:%d, %d bytes, jhead %s", + wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); ubifs_assert(!(wbuf->avail & 7)); ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); + ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_media) + if (c->ro_error) return -EROFS; ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); @@ -390,7 +395,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) * @offs: logical eraseblock offset to seek to * @dtype: data type * - * This function targets the write buffer to logical eraseblock @lnum:@offs. + * This function targets the write-buffer to logical eraseblock @lnum:@offs. * The write-buffer is synchronized if it is not empty. Returns zero in case of * success and a negative error code in case of failure. */ @@ -399,7 +404,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, { const struct ubifs_info *c = wbuf->c; - dbg_io("LEB %d:%d", lnum, offs); + dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead)); ubifs_assert(lnum >= 0 && lnum < c->leb_cnt); ubifs_assert(offs >= 0 && offs <= c->leb_size); ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); @@ -435,11 +440,12 @@ int ubifs_bg_wbufs_sync(struct ubifs_info *c) { int err, i; + ubifs_assert(!c->ro_media && !c->ro_mount); if (!c->need_wbuf_sync) return 0; c->need_wbuf_sync = 0; - if (c->ro_media) { + if (c->ro_error) { err = -EROFS; goto out_timers; } @@ -506,14 +512,15 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) struct ubifs_info *c = wbuf->c; int err, written, n, aligned_len = ALIGN(len, 8), offs; - dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len, - dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum, - wbuf->offs + wbuf->used); + dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, + dbg_ntype(((struct ubifs_ch *)buf)->node_type), + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used); ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); + ubifs_assert(!c->ro_media && !c->ro_mount); if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { err = -ENOSPC; @@ -522,7 +529,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) cancel_wbuf_timer_nolock(wbuf); - if (c->ro_media) + if (c->ro_error) return -EROFS; if (aligned_len <= wbuf->avail) { @@ -533,8 +540,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) memcpy(wbuf->buf + wbuf->used, buf, len); if (aligned_len == wbuf->avail) { - dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, - wbuf->offs); + dbg_io("flush jhead %s wbuf to LEB %d:%d", + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, c->min_io_size, wbuf->dtype); @@ -562,7 +569,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) * minimal I/O unit. We have to fill and flush write-buffer and switch * to the next min. I/O unit. */ - dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs); + dbg_io("flush jhead %s wbuf to LEB %d:%d", + dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, c->min_io_size, wbuf->dtype); @@ -657,8 +665,9 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, buf_len); ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); + ubifs_assert(!c->ro_media && !c->ro_mount); - if (c->ro_media) + if (c->ro_error) return -EROFS; ubifs_prepare_node(c, buf, len, 1); @@ -695,7 +704,8 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, int err, rlen, overlap; struct ubifs_ch *ch = buf; - dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len); + dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs, + dbg_ntype(type), len, dbg_jhead(wbuf->jhead)); ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); ubifs_assert(!(offs & 7) && offs < c->leb_size); ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT); @@ -808,7 +818,8 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, return 0; out: - ubifs_err("bad node at LEB %d:%d", lnum, offs); + ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs, + ubi_is_mapped(c->ubi, lnum)); dbg_dump_node(c, buf); dbg_dump_stack(); return -EINVAL; @@ -819,13 +830,12 @@ out: * @c: UBIFS file-system description object * @wbuf: write-buffer to initialize * - * This function initializes write buffer. Returns zero in case of success + * This function initializes write-buffer. Returns zero in case of success * %-ENOMEM in case of failure. */ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) { size_t size; - ktime_t hardlimit; wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); if (!wbuf->buf) @@ -851,22 +861,16 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf) hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); wbuf->timer.function = wbuf_timer_callback_nolock; - /* - * Make write-buffer soft limit to be 20% of the hard limit. The - * write-buffer timer is allowed to expire any time between the soft - * and hard limits. - */ - hardlimit = ktime_set(DEFAULT_WBUF_TIMEOUT_SECS, 0); - wbuf->delta = (DEFAULT_WBUF_TIMEOUT_SECS * NSEC_PER_SEC) * 2 / 10; - wbuf->softlimit = ktime_sub_ns(hardlimit, wbuf->delta); - hrtimer_set_expires_range_ns(&wbuf->timer, wbuf->softlimit, - wbuf->delta); + wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0); + wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT; + wbuf->delta *= 1000000000ULL; + ubifs_assert(wbuf->delta <= ULONG_MAX); return 0; } /** * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array. - * @wbuf: the write-buffer whereto add + * @wbuf: the write-buffer where to add * @inum: the inode number * * This function adds an inode number to the inode array of the write-buffer. diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c index 6db7a6b..8aacd64 100644 --- a/fs/ubifs/ioctl.c +++ b/fs/ubifs/ioctl.c @@ -25,7 +25,6 @@ /* This file implements EXT2-compatible extended attribute ioctl() calls */ #include <linux/compat.h> -#include <linux/smp_lock.h> #include <linux/mount.h> #include "ubifs.h" diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 64b5f3a..914f1bd 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -122,11 +122,12 @@ static int reserve_space(struct ubifs_info *c, int jhead, int len) * better to try to allocate space at the ends of eraseblocks. This is * what the squeeze parameter does. */ + ubifs_assert(!c->ro_media && !c->ro_mount); squeeze = (jhead == BASEHD); again: mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); - if (c->ro_media) { + if (c->ro_error) { err = -EROFS; goto out_unlock; } @@ -158,7 +159,7 @@ again: * some. But the write-buffer mutex has to be unlocked because * GC also takes it. */ - dbg_jnl("no free space jhead %d, run GC", jhead); + dbg_jnl("no free space in jhead %s, run GC", dbg_jhead(jhead)); mutex_unlock(&wbuf->io_mutex); lnum = ubifs_garbage_collect(c, 0); @@ -173,7 +174,8 @@ again: * because we dropped @wbuf->io_mutex, so try once * again. */ - dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead); + dbg_jnl("GC couldn't make a free LEB for jhead %s", + dbg_jhead(jhead)); if (retries++ < 2) { dbg_jnl("retry (%d)", retries); goto again; @@ -184,7 +186,7 @@ again: } mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); - dbg_jnl("got LEB %d for jhead %d", lnum, jhead); + dbg_jnl("got LEB %d for jhead %s", lnum, dbg_jhead(jhead)); avail = c->leb_size - wbuf->offs - wbuf->used; if (wbuf->lnum != -1 && avail >= len) { @@ -255,7 +257,8 @@ static int write_node(struct ubifs_info *c, int jhead, void *node, int len, *lnum = c->jheads[jhead].wbuf.lnum; *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; - dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); + dbg_jnl("jhead %s, LEB %d:%d, len %d", + dbg_jhead(jhead), *lnum, *offs, len); ubifs_prepare_node(c, node, len, 0); return ubifs_wbuf_write_nolock(wbuf, node, len); @@ -285,7 +288,8 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len, *lnum = c->jheads[jhead].wbuf.lnum; *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used; - dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len); + dbg_jnl("jhead %s, LEB %d:%d, len %d", + dbg_jhead(jhead), *lnum, *offs, len); err = ubifs_wbuf_write_nolock(wbuf, buf, len); if (err) diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 5fa27ea..92a8491 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h @@ -229,23 +229,6 @@ static inline void xent_key_init(const struct ubifs_info *c, } /** - * xent_key_init_hash - initialize extended attribute entry key without - * re-calculating hash function. - * @c: UBIFS file-system description object - * @key: key to initialize - * @inum: host inode number - * @hash: extended attribute entry name hash - */ -static inline void xent_key_init_hash(const struct ubifs_info *c, - union ubifs_key *key, ino_t inum, - uint32_t hash) -{ - ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK)); - key->u32[0] = inum; - key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS); -} - -/** * xent_key_init_flash - initialize on-flash extended attribute entry key. * @c: UBIFS file-system description object * @k: key to initialize @@ -295,22 +278,15 @@ static inline void data_key_init(const struct ubifs_info *c, } /** - * data_key_init_flash - initialize on-flash data key. + * highest_data_key - get the highest possible data key for an inode. * @c: UBIFS file-system description object - * @k: key to initialize + * @key: key to initialize * @inum: inode number - * @block: block number */ -static inline void data_key_init_flash(const struct ubifs_info *c, void *k, - ino_t inum, unsigned int block) +static inline void highest_data_key(const struct ubifs_info *c, + union ubifs_key *key, ino_t inum) { - union ubifs_key *key = k; - - ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK)); - key->j32[0] = cpu_to_le32(inum); - key->j32[1] = cpu_to_le32(block | - (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS)); - memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8); + data_key_init(c, key, inum, UBIFS_S_KEY_BLOCK_MASK); } /** @@ -330,6 +306,20 @@ static inline void trun_key_init(const struct ubifs_info *c, } /** + * invalid_key_init - initialize invalid node key. + * @c: UBIFS file-system description object + * @key: key to initialize + * + * This is a helper function which marks a @key object as invalid. + */ +static inline void invalid_key_init(const struct ubifs_info *c, + union ubifs_key *key) +{ + key->u32[0] = 0xDEADBEAF; + key->u32[1] = UBIFS_INVALID_KEY; +} + +/** * key_type - get key type. * @c: UBIFS file-system description object * @key: key to get type of @@ -554,4 +544,5 @@ static inline unsigned long long key_max_inode_size(const struct ubifs_info *c) return 0; } } + #endif /* !__UBIFS_KEY_H__ */ diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 56e3377..4d0cb12 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c @@ -159,7 +159,7 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) jhead = &c->jheads[bud->jhead]; list_add_tail(&bud->list, &jhead->buds_list); } else - ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY)); + ubifs_assert(c->replaying && c->ro_mount); /* * Note, although this is a new bud, we anyway account this space now, @@ -169,8 +169,8 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) */ c->bud_bytes += c->leb_size - bud->start; - dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum, - bud->start, bud->jhead, c->bud_bytes); + dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum, + bud->start, dbg_jhead(bud->jhead), c->bud_bytes); spin_unlock(&c->buds_lock); } @@ -223,8 +223,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) } mutex_lock(&c->log_mutex); - - if (c->ro_media) { + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) { err = -EROFS; goto out_unlock; } @@ -355,16 +355,16 @@ static void remove_buds(struct ubifs_info *c) * heads (non-closed buds). */ c->cmt_bud_bytes += wbuf->offs - bud->start; - dbg_log("preserve %d:%d, jhead %d, bud bytes %d, " + dbg_log("preserve %d:%d, jhead %s, bud bytes %d, " "cmt_bud_bytes %lld", bud->lnum, bud->start, - bud->jhead, wbuf->offs - bud->start, + dbg_jhead(bud->jhead), wbuf->offs - bud->start, c->cmt_bud_bytes); bud->start = wbuf->offs; } else { c->cmt_bud_bytes += c->leb_size - bud->start; - dbg_log("remove %d:%d, jhead %d, bud bytes %d, " + dbg_log("remove %d:%d, jhead %s, bud bytes %d, " "cmt_bud_bytes %lld", bud->lnum, bud->start, - bud->jhead, c->leb_size - bud->start, + dbg_jhead(bud->jhead), c->leb_size - bud->start, c->cmt_bud_bytes); rb_erase(p1, &c->buds); /* @@ -429,7 +429,8 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) if (lnum == -1 || offs == c->leb_size) continue; - dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i); + dbg_log("add ref to LEB %d:%d for jhead %s", + lnum, offs, dbg_jhead(i)); ref = buf + len; ref->ch.node_type = UBIFS_REF_NODE; ref->lnum = cpu_to_le32(lnum); @@ -695,7 +696,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) lnum = c->ltail_lnum; write_lnum = lnum; while (1) { - sleb = ubifs_scan(c, lnum, 0, c->sbuf); + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); goto out_free; diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 4cdd284..4d4ca38 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -281,7 +281,7 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, case LPROPS_FREE: if (add_to_lpt_heap(c, lprops, cat)) break; - /* No more room on heap so make it uncategorized */ + /* No more room on heap so make it un-categorized */ cat = LPROPS_UNCAT; /* Fall through */ case LPROPS_UNCAT: @@ -375,8 +375,8 @@ void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops, * @lprops: LEB properties * * A LEB may have fallen off of the bottom of a heap, and ended up as - * uncategorized even though it has enough space for us now. If that is the case - * this function will put the LEB back onto a heap. + * un-categorized even though it has enough space for us now. If that is the + * case this function will put the LEB back onto a heap. */ void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops) { @@ -436,10 +436,10 @@ int ubifs_categorize_lprops(const struct ubifs_info *c, /** * change_category - change LEB properties category. * @c: UBIFS file-system description object - * @lprops: LEB properties to recategorize + * @lprops: LEB properties to re-categorize * * LEB properties are categorized to enable fast find operations. When the LEB - * properties change they must be recategorized. + * properties change they must be re-categorized. */ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) { @@ -461,21 +461,18 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) } /** - * calc_dark - calculate LEB dark space size. + * ubifs_calc_dark - calculate LEB dark space size. * @c: the UBIFS file-system description object * @spc: amount of free and dirty space in the LEB * - * This function calculates amount of dark space in an LEB which has @spc bytes - * of free and dirty space. Returns the calculations result. + * This function calculates and returns amount of dark space in an LEB which + * has @spc bytes of free and dirty space. * - * Dark space is the space which is not always usable - it depends on which - * nodes are written in which order. E.g., if an LEB has only 512 free bytes, - * it is dark space, because it cannot fit a large data node. So UBIFS cannot - * count on this LEB and treat these 512 bytes as usable because it is not true - * if, for example, only big chunks of uncompressible data will be written to - * the FS. + * UBIFS is trying to account the space which might not be usable, and this + * space is called "dark space". For example, if an LEB has only %512 free + * bytes, it is dark space, because it cannot fit a large data node. */ -static int calc_dark(struct ubifs_info *c, int spc) +int ubifs_calc_dark(const struct ubifs_info *c, int spc) { ubifs_assert(!(spc & 7)); @@ -518,7 +515,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops) * @free: new free space amount * @dirty: new dirty space amount * @flags: new flags - * @idx_gc_cnt: change to the count of idx_gc list + * @idx_gc_cnt: change to the count of @idx_gc list * * This function changes LEB properties (@free, @dirty or @flag). However, the * property which has the %LPROPS_NC value is not changed. Returns a pointer to @@ -535,7 +532,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, { /* * This is the only function that is allowed to change lprops, so we - * discard the const qualifier. + * discard the "const" qualifier. */ struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp; @@ -575,7 +572,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, if (old_spc < c->dead_wm) c->lst.total_dead -= old_spc; else - c->lst.total_dark -= calc_dark(c, old_spc); + c->lst.total_dark -= ubifs_calc_dark(c, old_spc); c->lst.total_used -= c->leb_size - old_spc; } @@ -616,7 +613,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, if (new_spc < c->dead_wm) c->lst.total_dead += new_spc; else - c->lst.total_dark += calc_dark(c, new_spc); + c->lst.total_dark += ubifs_calc_dark(c, new_spc); c->lst.total_used += c->leb_size - new_spc; } @@ -1096,7 +1093,7 @@ static int scan_check_cb(struct ubifs_info *c, } } - sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); + sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); if (IS_ERR(sleb)) { /* * After an unclean unmount, empty and freeable LEBs @@ -1107,7 +1104,7 @@ static int scan_check_cb(struct ubifs_info *c, "- continuing checking"); lst->empty_lebs += 1; lst->total_free += c->leb_size; - lst->total_dark += calc_dark(c, c->leb_size); + lst->total_dark += ubifs_calc_dark(c, c->leb_size); return LPT_SCAN_CONTINUE; } @@ -1117,7 +1114,7 @@ static int scan_check_cb(struct ubifs_info *c, "- continuing checking"); lst->total_free += lp->free; lst->total_dirty += lp->dirty; - lst->total_dark += calc_dark(c, c->leb_size); + lst->total_dark += ubifs_calc_dark(c, c->leb_size); return LPT_SCAN_CONTINUE; } data->err = PTR_ERR(sleb); @@ -1235,7 +1232,7 @@ static int scan_check_cb(struct ubifs_info *c, if (spc < c->dead_wm) lst->total_dead += spc; else - lst->total_dark += calc_dark(c, spc); + lst->total_dark += ubifs_calc_dark(c, spc); } ubifs_scan_destroy(sleb); diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index b2792e8..72775d3 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -46,6 +46,7 @@ #include "ubifs.h" #include <linux/crc16.h> #include <linux/math64.h> +#include <linux/slab.h> /** * do_calc_lpt_geom - calculate sizes for the LPT area. @@ -1362,6 +1363,7 @@ static int read_lsave(struct ubifs_info *c) goto out; for (i = 0; i < c->lsave_cnt; i++) { int lnum = c->lsave[i]; + struct ubifs_lprops *lprops; /* * Due to automatic resizing, the values in the lsave table @@ -1369,7 +1371,11 @@ static int read_lsave(struct ubifs_info *c) */ if (lnum >= c->leb_cnt) continue; - ubifs_lpt_lookup(c, lnum); + lprops = ubifs_lpt_lookup(c, lnum); + if (IS_ERR(lprops)) { + err = PTR_ERR(lprops); + goto out; + } } out: vfree(buf); @@ -1456,13 +1462,13 @@ struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum) shft -= UBIFS_LPT_FANOUT_SHIFT; nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); } iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); shft -= UBIFS_LPT_FANOUT_SHIFT; pnode = ubifs_get_pnode(c, nnode, iip); if (IS_ERR(pnode)) - return ERR_PTR(PTR_ERR(pnode)); + return ERR_CAST(pnode); iip = (i & (UBIFS_LPT_FANOUT - 1)); dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, pnode->lprops[iip].free, pnode->lprops[iip].dirty, @@ -1585,7 +1591,7 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) nnode = c->nroot; nnode = dirty_cow_nnode(c, nnode); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); i = lnum - c->main_first; shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT; for (h = 1; h < c->lpt_hght; h++) { @@ -1593,19 +1599,19 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum) shft -= UBIFS_LPT_FANOUT_SHIFT; nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); nnode = dirty_cow_nnode(c, nnode); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); } iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); shft -= UBIFS_LPT_FANOUT_SHIFT; pnode = ubifs_get_pnode(c, nnode, iip); if (IS_ERR(pnode)) - return ERR_PTR(PTR_ERR(pnode)); + return ERR_CAST(pnode); pnode = dirty_cow_pnode(c, pnode); if (IS_ERR(pnode)) - return ERR_PTR(PTR_ERR(pnode)); + return ERR_CAST(pnode); iip = (i & (UBIFS_LPT_FANOUT - 1)); dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum, pnode->lprops[iip].free, pnode->lprops[iip].dirty, diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 8cbfb82..5c90dec 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -26,6 +26,7 @@ */ #include <linux/crc16.h> +#include <linux/slab.h> #include "ubifs.h" /** @@ -645,7 +646,7 @@ static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i) shft -= UBIFS_LPT_FANOUT_SHIFT; nnode = ubifs_get_nnode(c, nnode, iip); if (IS_ERR(nnode)) - return ERR_PTR(PTR_ERR(nnode)); + return ERR_CAST(nnode); } iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1)); return ubifs_get_pnode(c, nnode, iip); @@ -704,6 +705,9 @@ static int make_tree_dirty(struct ubifs_info *c) struct ubifs_pnode *pnode; pnode = pnode_lookup(c, 0); + if (IS_ERR(pnode)) + return PTR_ERR(pnode); + while (pnode) { do_make_pnode_dirty(c, pnode); pnode = next_pnode_to_dirty(c, pnode); diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index a88f338..21f47af 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -29,7 +29,8 @@ * @c: UBIFS file-system description object * * This function scans the master node LEBs and search for the latest master - * node. Returns zero in case of success and a negative error code in case of + * node. Returns zero in case of success, %-EUCLEAN if there master area is + * corrupted and requires recovery, and a negative error code in case of * failure. */ static int scan_for_master(struct ubifs_info *c) @@ -40,7 +41,7 @@ static int scan_for_master(struct ubifs_info *c) lnum = UBIFS_MST_LNUM; - sleb = ubifs_scan(c, lnum, 0, c->sbuf); + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); if (IS_ERR(sleb)) return PTR_ERR(sleb); nodes_cnt = sleb->nodes_cnt; @@ -48,7 +49,7 @@ static int scan_for_master(struct ubifs_info *c) snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); if (snod->type != UBIFS_MST_NODE) - goto out; + goto out_dump; memcpy(c->mst_node, snod->node, snod->len); offs = snod->offs; } @@ -56,7 +57,7 @@ static int scan_for_master(struct ubifs_info *c) lnum += 1; - sleb = ubifs_scan(c, lnum, 0, c->sbuf); + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); if (IS_ERR(sleb)) return PTR_ERR(sleb); if (sleb->nodes_cnt != nodes_cnt) @@ -65,7 +66,7 @@ static int scan_for_master(struct ubifs_info *c) goto out; snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list); if (snod->type != UBIFS_MST_NODE) - goto out; + goto out_dump; if (snod->offs != offs) goto out; if (memcmp((void *)c->mst_node + UBIFS_CH_SZ, @@ -78,6 +79,12 @@ static int scan_for_master(struct ubifs_info *c) out: ubifs_scan_destroy(sleb); + return -EUCLEAN; + +out_dump: + ubifs_err("unexpected node type %d master LEB %d:%d", + snod->type, lnum, snod->offs); + ubifs_scan_destroy(sleb); return -EINVAL; } @@ -256,7 +263,8 @@ int ubifs_read_master(struct ubifs_info *c) err = scan_for_master(c); if (err) { - err = ubifs_recover_master_node(c); + if (err == -EUCLEAN) + err = ubifs_recover_master_node(c); if (err) /* * Note, we do not free 'c->mst_node' here because the @@ -353,7 +361,8 @@ int ubifs_write_master(struct ubifs_info *c) { int err, lnum, offs, len; - if (c->ro_media) + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) return -EROFS; lnum = UBIFS_MST_LNUM; diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 4fa81d8..c3de04d 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h @@ -132,7 +132,8 @@ static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) { int err; - if (c->ro_media) + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) return -EROFS; err = ubi_leb_unmap(c->ubi, lnum); if (err) { @@ -159,7 +160,8 @@ static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, { int err; - if (c->ro_media) + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) return -EROFS; err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); if (err) { @@ -186,7 +188,8 @@ static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, { int err; - if (c->ro_media) + ubifs_assert(!c->ro_media && !c->ro_mount); + if (c->ro_error) return -EROFS; err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); if (err) { diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 152a7b3..82009c7 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -670,9 +670,10 @@ static int kill_orphans(struct ubifs_info *c) struct ubifs_scan_leb *sleb; dbg_rcvry("LEB %d", lnum); - sleb = ubifs_scan(c, lnum, 0, c->sbuf); + sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); if (IS_ERR(sleb)) { - sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); + if (PTR_ERR(sleb) == -EUCLEAN) + sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); break; @@ -899,7 +900,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { struct ubifs_scan_leb *sleb; - sleb = ubifs_scan(c, lnum, 0, c->dbg->buf); + sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); if (IS_ERR(sleb)) { err = PTR_ERR(sleb); break; diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 8056052..77e9b87 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -23,14 +23,15 @@ /* * This file implements functions needed to recover from unclean un-mounts. * When UBIFS is mounted, it checks a flag on the master node to determine if - * an un-mount was completed sucessfully. If not, the process of mounting - * incorparates additional checking and fixing of on-flash data structures. + * an un-mount was completed successfully. If not, the process of mounting + * incorporates additional checking and fixing of on-flash data structures. * UBIFS always cleans away all remnants of an unclean un-mount, so that * errors do not accumulate. However UBIFS defers recovery if it is mounted * read-only, and the flash is not modified in that case. */ #include <linux/crc32.h> +#include <linux/slab.h> #include "ubifs.h" /** @@ -53,6 +54,25 @@ static int is_empty(void *buf, int len) } /** + * first_non_ff - find offset of the first non-0xff byte. + * @buf: buffer to search in + * @len: length of buffer + * + * This function returns offset of the first non-0xff byte in @buf or %-1 if + * the buffer contains only 0xff bytes. + */ +static int first_non_ff(void *buf, int len) +{ + uint8_t *p = buf; + int i; + + for (i = 0; i < len; i++) + if (*p++ != 0xff) + return i; + return -1; +} + +/** * get_master_node - get the last valid master node allowing for corruption. * @c: UBIFS file-system description object * @lnum: LEB number @@ -267,12 +287,12 @@ int ubifs_recover_master_node(struct ubifs_info *c) mst = mst2; } - dbg_rcvry("recovered master node from LEB %d", + ubifs_msg("recovered master node from LEB %d", (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1)); memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); - if ((c->vfs_sb->s_flags & MS_RDONLY)) { + if (c->ro_mount) { /* Read-only mode. Keep a copy for switching to rw mode */ c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); if (!c->rcvrd_mst_node) { @@ -357,11 +377,7 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs) empty_offs = ALIGN(offs + 1, c->min_io_size); check_len = c->leb_size - empty_offs; p = buf + empty_offs - offs; - - for (; check_len > 0; check_len--) - if (*p++ != 0xff) - return 0; - return 1; + return is_empty(p, check_len); } /** @@ -453,7 +469,7 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, endpt = snod->offs + snod->len; } - if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) { + if (c->ro_mount && !c->remounting_rw) { /* Add to recovery list */ struct ubifs_unclean_leb *ucleb; @@ -543,8 +559,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) * * This function does a scan of a LEB, but caters for errors that might have * been caused by the unclean unmount from which we are attempting to recover. - * - * This function returns %0 on success and a negative error code on failure. + * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is + * found, and a negative error code in case of failure. */ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf, int grouped) @@ -643,7 +659,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, goto corrupted; default: dbg_err("unknown"); - goto corrupted; + err = -EINVAL; + goto error; } } @@ -652,8 +669,13 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, clean_buf(c, &buf, lnum, &offs, &len); need_clean = 1; } else { - ubifs_err("corrupt empty space at LEB %d:%d", - lnum, offs); + int corruption = first_non_ff(buf, len); + + ubifs_err("corrupt empty space LEB %d:%d, corruption " + "starts at %d", lnum, offs, corruption); + /* Make sure we dump interesting non-0xFF data */ + offs = corruption; + buf += corruption; goto corrupted; } } @@ -750,7 +772,8 @@ out_free: * @sbuf: LEB-sized buffer to use * * This function does a scan of a LEB, but caters for errors that might have - * been caused by the unclean unmount from which we are attempting to recover. + * been caused by unclean reboots from which we are attempting to recover + * (assume that only the last log LEB can be corrupted by an unclean reboot). * * This function returns %0 on success and a negative error code on failure. */ @@ -769,7 +792,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, * We can only recover at the end of the log, so check that the * next log LEB is empty or out of date. */ - sleb = ubifs_scan(c, next_lnum, 0, sbuf); + sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0); if (IS_ERR(sleb)) return sleb; if (sleb->nodes_cnt) { @@ -813,7 +836,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, static int recover_head(const struct ubifs_info *c, int lnum, int offs, void *sbuf) { - int len, err, need_clean = 0; + int len, err; if (c->min_io_size > 1) len = c->min_io_size; @@ -827,19 +850,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs, /* Read at the head location and check it is empty flash */ err = ubi_read(c->ubi, lnum, sbuf, offs, len); - if (err) - need_clean = 1; - else { - uint8_t *p = sbuf; - - while (len--) - if (*p++ != 0xff) { - need_clean = 1; - break; - } - } - - if (need_clean) { + if (err || !is_empty(sbuf, len)) { dbg_rcvry("cleaning head at %d:%d", lnum, offs); if (offs == 0) return ubifs_leb_unmap(c, lnum); @@ -873,7 +884,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) { int err; - ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw); + ubifs_assert(!c->ro_mount || c->remounting_rw); dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); @@ -1053,8 +1064,21 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) } err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); if (err) { - if (err == -ENOSPC) - dbg_err("could not find a dirty LEB"); + /* + * There are no dirty or empty LEBs subject to here being + * enough for the index. Try to use + * 'ubifs_find_free_leb_for_idx()', which will return any empty + * LEBs (ignoring index requirements). If the index then + * doesn't have enough LEBs the recovery commit will fail - + * which is the same result anyway i.e. recovery fails. So + * there is no problem ignoring index requirements and just + * grabbing a free LEB since we have already established there + * is not a dirty LEB we could have used instead. + */ + if (err == -ENOSPC) { + dbg_rcvry("could not find a dirty LEB"); + goto find_free; + } return err; } ubifs_assert(!(lp.flags & LPROPS_INDEX)); @@ -1129,8 +1153,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) find_free: /* * There is no GC head LEB or the free space in the GC head LEB is too - * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so - * GC is not run. + * small, or there are not dirty LEBs. Allocate gc_lnum by calling + * 'ubifs_find_free_leb_for_idx()' so GC is not run. */ lnum = ubifs_find_free_leb_for_idx(c); if (lnum < 0) { @@ -1438,7 +1462,7 @@ int ubifs_recover_size(struct ubifs_info *c) } } if (e->exists && e->i_size < e->d_size) { - if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) { + if (!e->inode && c->ro_mount) { /* Fix the inode size and pin it in memory */ struct inode *inode; diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 11cc801..eed0fcf 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -506,7 +506,7 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, if (c->need_recovery) sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); else - sleb = ubifs_scan(c, lnum, offs, c->sbuf); + sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); if (IS_ERR(sleb)) return PTR_ERR(sleb); @@ -627,8 +627,7 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, ubifs_assert(sleb->endpt - offs >= used); ubifs_assert(sleb->endpt % c->min_io_size == 0); - if (sleb->endpt + c->min_io_size <= c->leb_size && - !(c->vfs_sb->s_flags & MS_RDONLY)) + if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount) err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, sleb->endpt, UBI_SHORTTERM); @@ -836,10 +835,16 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) const struct ubifs_cs_node *node; dbg_mnt("replay log LEB %d:%d", lnum, offs); - sleb = ubifs_scan(c, lnum, offs, sbuf); + sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery); if (IS_ERR(sleb)) { - if (c->need_recovery) - sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); + if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) + return PTR_ERR(sleb); + /* + * Note, the below function will recover this log LEB only if + * it is the last, because unclean reboots can possibly corrupt + * only the tail of the log. + */ + sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); if (IS_ERR(sleb)) return PTR_ERR(sleb); } @@ -850,7 +855,6 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) } node = sleb->buf; - snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); if (c->cs_sqnum == 0) { /* @@ -897,7 +901,6 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) } list_for_each_entry(snod, &sleb->nodes, list) { - cond_resched(); if (snod->sqnum >= SQNUM_WATERMARK) { @@ -957,7 +960,7 @@ out: return err; out_dump: - ubifs_err("log error detected while replying the log at LEB %d:%d", + ubifs_err("log error detected while replaying the log at LEB %d:%d", lnum, offs + snod->offs); dbg_dump_node(c, snod->node); ubifs_scan_destroy(sleb); @@ -1010,7 +1013,6 @@ out: int ubifs_replay_journal(struct ubifs_info *c) { int err, i, lnum, offs, free; - void *sbuf = NULL; BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); @@ -1025,14 +1027,8 @@ int ubifs_replay_journal(struct ubifs_info *c) return -EINVAL; } - sbuf = vmalloc(c->leb_size); - if (!sbuf) - return -ENOMEM; - dbg_mnt("start replaying the journal"); - c->replaying = 1; - lnum = c->ltail_lnum = c->lhead_lnum; offs = c->lhead_offs; @@ -1045,7 +1041,7 @@ int ubifs_replay_journal(struct ubifs_info *c) lnum = UBIFS_LOG_LNUM; offs = 0; } - err = replay_log_leb(c, lnum, offs, sbuf); + err = replay_log_leb(c, lnum, offs, c->sbuf); if (err == 1) /* We hit the end of the log */ break; @@ -1078,7 +1074,6 @@ int ubifs_replay_journal(struct ubifs_info *c) out: destroy_replay_tree(c); destroy_bud_list(c); - vfree(sbuf); c->replaying = 0; return err; } diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 57085e4..bf31b47 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -27,6 +27,7 @@ */ #include "ubifs.h" +#include <linux/slab.h> #include <linux/random.h> #include <linux/math64.h> @@ -541,11 +542,8 @@ int ubifs_read_superblock(struct ubifs_info *c) * due to the unavailability of time-travelling equipment. */ if (c->fmt_version > UBIFS_FORMAT_VERSION) { - struct super_block *sb = c->vfs_sb; - int mounting_ro = sb->s_flags & MS_RDONLY; - - ubifs_assert(!c->ro_media || mounting_ro); - if (!mounting_ro || + ubifs_assert(!c->ro_media || c->ro_mount); + if (!c->ro_mount || c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { ubifs_err("on-flash format version is w%d/r%d, but " "software only supports up to version " @@ -623,7 +621,7 @@ int ubifs_read_superblock(struct ubifs_info *c) c->old_leb_cnt = c->leb_cnt; if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size); - if (c->vfs_sb->s_flags & MS_RDONLY) + if (c->ro_mount) dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", c->old_leb_cnt, c->leb_cnt); else { diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 0ed8247..3e1ee57 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -108,10 +108,9 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, /* Make the node pads to 8-byte boundary */ if ((node_len + pad_len) & 7) { - if (!quiet) { + if (!quiet) dbg_err("bad padding length %d - %d", offs, offs + node_len + pad_len); - } return SCANNED_A_BAD_PAD_NODE; } @@ -198,7 +197,7 @@ int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, struct ubifs_ino_node *ino = buf; struct ubifs_scan_node *snod; - snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); + snod = kmalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); if (!snod) return -ENOMEM; @@ -213,13 +212,15 @@ int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, case UBIFS_DENT_NODE: case UBIFS_XENT_NODE: case UBIFS_DATA_NODE: - case UBIFS_TRUN_NODE: /* * The key is in the same place in all keyed * nodes. */ key_read(c, &ino->key, &snod->key); break; + default: + invalid_key_init(c, &snod->key); + break; } list_add_tail(&snod->list, &sleb->nodes); sleb->nodes_cnt += 1; @@ -238,12 +239,12 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, { int len; - ubifs_err("corrupted data at LEB %d:%d", lnum, offs); + ubifs_err("corruption at LEB %d:%d", lnum, offs); if (dbg_failure_mode) return; len = c->leb_size - offs; - if (len > 4096) - len = 4096; + if (len > 8192) + len = 8192; dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs); print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1); } @@ -253,13 +254,19 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs, * @c: UBIFS file-system description object * @lnum: logical eraseblock number * @offs: offset to start at (usually zero) - * @sbuf: scan buffer (must be c->leb_size) + * @sbuf: scan buffer (must be of @c->leb_size bytes in size) + * @quiet: print no messages * * This function scans LEB number @lnum and returns complete information about - * its contents. Returns an error code in case of failure. + * its contents. Returns the scaned information in case of success and, + * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case + * of failure. + * + * If @quiet is non-zero, this function does not print large and scary + * error messages and flash dumps in case of errors. */ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, - int offs, void *sbuf) + int offs, void *sbuf, int quiet) { void *buf = sbuf + offs; int err, len = c->leb_size - offs; @@ -278,8 +285,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, cond_resched(); - ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); - + ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); if (ret > 0) { /* Padding bytes or a valid padding node */ offs += ret; @@ -304,7 +310,8 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, goto corrupted; default: dbg_err("unknown"); - goto corrupted; + err = -EINVAL; + goto error; } err = ubifs_add_snod(c, sleb, buf, offs); @@ -317,8 +324,12 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, len -= node_len; } - if (offs % c->min_io_size) - goto corrupted; + if (offs % c->min_io_size) { + if (!quiet) + ubifs_err("empty space starts at non-aligned offset %d", + offs); + goto corrupted;; + } ubifs_end_scan(c, sleb, lnum, offs); @@ -327,18 +338,25 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, break; for (; len; offs++, buf++, len--) if (*(uint8_t *)buf != 0xff) { - ubifs_err("corrupt empty space at LEB %d:%d", - lnum, offs); + if (!quiet) + ubifs_err("corrupt empty space at LEB %d:%d", + lnum, offs); goto corrupted; } return sleb; corrupted: - ubifs_scanned_corruption(c, lnum, offs, buf); + if (!quiet) { + ubifs_scanned_corruption(c, lnum, offs, buf); + ubifs_err("LEB %d scanning failed", lnum); + } err = -EUCLEAN; + ubifs_scan_destroy(sleb); + return ERR_PTR(err); + error: - ubifs_err("LEB %d scanning failed", lnum); + ubifs_err("LEB %d scanning failed, error %d", lnum, err); ubifs_scan_destroy(sleb); return ERR_PTR(err); } diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 02feb59..46961c0 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c @@ -250,7 +250,7 @@ static int kick_a_thread(void) dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || - c->ro_media) { + c->ro_mount || c->ro_error) { mutex_unlock(&c->umount_mutex); continue; } @@ -277,7 +277,7 @@ static int kick_a_thread(void) return 0; } -int ubifs_shrinker(int nr, gfp_t gfp_mask) +int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask) { int freed, contention = 0; long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 79fad43..6e11c29 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -36,7 +36,6 @@ #include <linux/mount.h> #include <linux/math64.h> #include <linux/writeback.h> -#include <linux/smp_lock.h> #include "ubifs.h" /* @@ -273,18 +272,26 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb) return &ui->vfs_inode; }; +static void ubifs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct ubifs_inode *ui = ubifs_inode(inode); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(ubifs_inode_slab, ui); +} + static void ubifs_destroy_inode(struct inode *inode) { struct ubifs_inode *ui = ubifs_inode(inode); kfree(ui->data); - kmem_cache_free(ubifs_inode_slab, inode); + call_rcu(&inode->i_rcu, ubifs_i_callback); } /* * Note, Linux write-back code calls this without 'i_mutex'. */ -static int ubifs_write_inode(struct inode *inode, int wait) +static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc) { int err = 0; struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -318,6 +325,8 @@ static int ubifs_write_inode(struct inode *inode, int wait) if (err) ubifs_err("can't write inode %lu, error %d", inode->i_ino, err); + else + err = dbg_check_inode_size(c, inode, ui->ui_size); } ui->dirty = 0; @@ -326,7 +335,7 @@ static int ubifs_write_inode(struct inode *inode, int wait) return err; } -static void ubifs_delete_inode(struct inode *inode) +static void ubifs_evict_inode(struct inode *inode) { int err; struct ubifs_info *c = inode->i_sb->s_fs_info; @@ -342,9 +351,12 @@ static void ubifs_delete_inode(struct inode *inode) dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); ubifs_assert(!atomic_read(&inode->i_count)); - ubifs_assert(inode->i_nlink == 0); truncate_inode_pages(&inode->i_data, 0); + + if (inode->i_nlink) + goto done; + if (is_bad_inode(inode)) goto out; @@ -366,7 +378,8 @@ out: c->nospace = c->nospace_rp = 0; smp_wmb(); } - clear_inode(inode); +done: + end_writeback(inode); } static void ubifs_dirty_inode(struct inode *inode) @@ -438,12 +451,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) { int i, err; struct ubifs_info *c = sb->s_fs_info; - struct writeback_control wbc = { - .sync_mode = WB_SYNC_ALL, - .range_start = 0, - .range_end = LLONG_MAX, - .nr_to_write = LONG_MAX, - }; /* * Zero @wait is just an advisory thing to help the file system shove @@ -454,17 +461,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) return 0; /* - * VFS calls '->sync_fs()' before synchronizing all dirty inodes and - * pages, so synchronize them first, then commit the journal. Strictly - * speaking, it is not necessary to commit the journal here, - * synchronizing write-buffers would be enough. But committing makes - * UBIFS free space predictions much more accurate, so we want to let - * the user be able to get more accurate results of 'statfs()' after - * they synchronize the file system. - */ - generic_sync_sb_inodes(sb, &wbc); - - /* * Synchronize write buffers, because 'ubifs_run_commit()' does not * do this if it waits for an already running commit. */ @@ -474,6 +470,13 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) return err; } + /* + * Strictly speaking, it is not necessary to commit the journal here, + * synchronizing write-buffers would be enough. But committing makes + * UBIFS free space predictions much more accurate, so we want to let + * the user be able to get more accurate results of 'statfs()' after + * they synchronize the file system. + */ err = ubifs_run_commit(c); if (err) return err; @@ -797,7 +800,7 @@ static int alloc_wbufs(struct ubifs_info *c) * does not need to be synchronized by timer. */ c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; - c->jheads[GCHD].wbuf.softlimit = ktime_set(0, 0); + c->jheads[GCHD].wbuf.no_timer = 1; return 0; } @@ -986,7 +989,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, switch (token) { /* * %Opt_fast_unmount and %Opt_norm_unmount options are ignored. - * We accepte them in order to be backware-compatible. But this + * We accept them in order to be backward-compatible. But this * should be removed at some point. */ case Opt_fast_unmount: @@ -1142,11 +1145,11 @@ static int check_free_space(struct ubifs_info *c) */ static int mount_ubifs(struct ubifs_info *c) { - struct super_block *sb = c->vfs_sb; - int err, mounted_read_only = (sb->s_flags & MS_RDONLY); + int err; long long x; size_t sz; + c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY); err = init_constants_early(c); if (err) return err; @@ -1159,7 +1162,7 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_free; - if (c->empty && (mounted_read_only || c->ro_media)) { + if (c->empty && (c->ro_mount || c->ro_media)) { /* * This UBI volume is empty, and read-only, or the file system * is mounted read-only - we cannot format it. @@ -1170,7 +1173,7 @@ static int mount_ubifs(struct ubifs_info *c) goto out_free; } - if (c->ro_media && !mounted_read_only) { + if (c->ro_media && !c->ro_mount) { ubifs_err("cannot mount read-write - read-only media"); err = -EROFS; goto out_free; @@ -1190,7 +1193,7 @@ static int mount_ubifs(struct ubifs_info *c) if (!c->sbuf) goto out_free; - if (!mounted_read_only) { + if (!c->ro_mount) { c->ileb_buf = vmalloc(c->leb_size); if (!c->ileb_buf) goto out_free; @@ -1233,7 +1236,7 @@ static int mount_ubifs(struct ubifs_info *c) } sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); - if (!mounted_read_only) { + if (!c->ro_mount) { err = alloc_wbufs(c); if (err) goto out_cbuf; @@ -1259,12 +1262,12 @@ static int mount_ubifs(struct ubifs_info *c) if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { ubifs_msg("recovery needed"); c->need_recovery = 1; - if (!mounted_read_only) { + if (!c->ro_mount) { err = ubifs_recover_inl_heads(c, c->sbuf); if (err) goto out_master; } - } else if (!mounted_read_only) { + } else if (!c->ro_mount) { /* * Set the "dirty" flag so that if we reboot uncleanly we * will notice this immediately on the next mount. @@ -1275,7 +1278,7 @@ static int mount_ubifs(struct ubifs_info *c) goto out_master; } - err = ubifs_lpt_init(c, 1, !mounted_read_only); + err = ubifs_lpt_init(c, 1, !c->ro_mount); if (err) goto out_lpt; @@ -1287,11 +1290,14 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_journal; - err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); + /* Calculate 'min_idx_lebs' after journal replay */ + c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); + + err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); if (err) goto out_orphans; - if (!mounted_read_only) { + if (!c->ro_mount) { int lnum; err = check_free_space(c); @@ -1313,6 +1319,8 @@ static int mount_ubifs(struct ubifs_info *c) if (err) goto out_orphans; err = ubifs_rcvry_gc_commit(c); + if (err) + goto out_orphans; } else { err = take_gc_lnum(c); if (err) @@ -1324,7 +1332,7 @@ static int mount_ubifs(struct ubifs_info *c) */ err = ubifs_leb_unmap(c, c->gc_lnum); if (err) - return err; + goto out_orphans; } err = dbg_check_lprops(c); @@ -1351,7 +1359,7 @@ static int mount_ubifs(struct ubifs_info *c) spin_unlock(&ubifs_infos_lock); if (c->need_recovery) { - if (mounted_read_only) + if (c->ro_mount) ubifs_msg("recovery deferred"); else { c->need_recovery = 0; @@ -1378,7 +1386,7 @@ static int mount_ubifs(struct ubifs_info *c) ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", c->vi.ubi_num, c->vi.vol_id, c->vi.name); - if (mounted_read_only) + if (c->ro_mount) ubifs_msg("mounted read-only"); x = (long long)c->main_lebs * c->leb_size; ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " @@ -1399,12 +1407,7 @@ static int mount_ubifs(struct ubifs_info *c) c->leb_size, c->leb_size >> 10); dbg_msg("data journal heads: %d", c->jhead_cnt - NONDATA_JHEADS_CNT); - dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" - "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X", - c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3], - c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7], - c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11], - c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]); + dbg_msg("UUID: %pUB", c->uuid); dbg_msg("big_lpt %d", c->big_lpt); dbg_msg("log LEBs: %d (%d - %d)", c->log_lebs, UBIFS_LOG_LNUM, c->log_last); @@ -1645,7 +1648,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) } dbg_gen("re-mounted read-write"); - c->vfs_sb->s_flags &= ~MS_RDONLY; + c->ro_mount = 0; c->remounting_rw = 0; c->always_chk_crc = 0; err = dbg_check_space_info(c); @@ -1681,7 +1684,7 @@ static void ubifs_remount_ro(struct ubifs_info *c) int i, err; ubifs_assert(!c->need_recovery); - ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); + ubifs_assert(!c->ro_mount); mutex_lock(&c->umount_mutex); if (c->bgt) { @@ -1691,10 +1694,8 @@ static void ubifs_remount_ro(struct ubifs_info *c) dbg_save_space_info(c); - for (i = 0; i < c->jhead_cnt; i++) { + for (i = 0; i < c->jhead_cnt; i++) ubifs_wbuf_sync(&c->jheads[i].wbuf); - hrtimer_cancel(&c->jheads[i].wbuf.timer); - } c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); @@ -1709,6 +1710,7 @@ static void ubifs_remount_ro(struct ubifs_info *c) vfree(c->ileb_buf); c->ileb_buf = NULL; ubifs_lpt_free(c, 1); + c->ro_mount = 1; err = dbg_check_space_info(c); if (err) ubifs_ro_mode(c, err); @@ -1723,8 +1725,6 @@ static void ubifs_put_super(struct super_block *sb) ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num, c->vi.vol_id); - lock_kernel(); - /* * The following asserts are only valid if there has not been a failure * of the media. For example, there will be dirty inodes if we failed @@ -1742,7 +1742,7 @@ static void ubifs_put_super(struct super_block *sb) * the mutex is locked. */ mutex_lock(&c->umount_mutex); - if (!(c->vfs_sb->s_flags & MS_RDONLY)) { + if (!c->ro_mount) { /* * First of all kill the background thread to make sure it does * not interfere with un-mounting and freeing resources. @@ -1752,25 +1752,22 @@ static void ubifs_put_super(struct super_block *sb) c->bgt = NULL; } - /* Synchronize write-buffers */ - if (c->jheads) - for (i = 0; i < c->jhead_cnt; i++) { - ubifs_wbuf_sync(&c->jheads[i].wbuf); - hrtimer_cancel(&c->jheads[i].wbuf.timer); - } - /* - * On fatal errors c->ro_media is set to 1, in which case we do + * On fatal errors c->ro_error is set to 1, in which case we do * not write the master node. */ - if (!c->ro_media) { + if (!c->ro_error) { + int err; + + /* Synchronize write-buffers */ + for (i = 0; i < c->jhead_cnt; i++) + ubifs_wbuf_sync(&c->jheads[i].wbuf); + /* * We are being cleanly unmounted which means the * orphans were killed - indicate this in the master * node. Also save the reserved GC LEB number. */ - int err; - c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); @@ -1783,6 +1780,10 @@ static void ubifs_put_super(struct super_block *sb) */ ubifs_err("failed to write master node, " "error %d", err); + } else { + for (i = 0; i < c->jhead_cnt; i++) + /* Make sure write-buffer timers are canceled */ + hrtimer_cancel(&c->jheads[i].wbuf.timer); } } @@ -1791,8 +1792,6 @@ static void ubifs_put_super(struct super_block *sb) ubi_close_volume(c->ubi); mutex_unlock(&c->umount_mutex); kfree(c); - - unlock_kernel(); } static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) @@ -1808,22 +1807,21 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) return err; } - lock_kernel(); - if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { + if (c->ro_mount && !(*flags & MS_RDONLY)) { + if (c->ro_error) { + ubifs_msg("cannot re-mount R/W due to prior errors"); + return -EROFS; + } if (c->ro_media) { - ubifs_msg("cannot re-mount due to prior errors"); - unlock_kernel(); + ubifs_msg("cannot re-mount R/W - UBI volume is R/O"); return -EROFS; } err = ubifs_remount_rw(c); - if (err) { - unlock_kernel(); + if (err) return err; - } - } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { - if (c->ro_media) { - ubifs_msg("cannot re-mount due to prior errors"); - unlock_kernel(); + } else if (!c->ro_mount && (*flags & MS_RDONLY)) { + if (c->ro_error) { + ubifs_msg("cannot re-mount R/O due to prior errors"); return -EROFS; } ubifs_remount_ro(c); @@ -1838,7 +1836,6 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) } ubifs_assert(c->lst.taken_empty_lebs > 0); - unlock_kernel(); return 0; } @@ -1847,7 +1844,7 @@ const struct super_operations ubifs_super_operations = { .destroy_inode = ubifs_destroy_inode, .put_super = ubifs_put_super, .write_inode = ubifs_write_inode, - .delete_inode = ubifs_delete_inode, + .evict_inode = ubifs_evict_inode, .statfs = ubifs_statfs, .dirty_inode = ubifs_dirty_inode, .remount_fs = ubifs_remount_fs, @@ -1860,22 +1857,32 @@ const struct super_operations ubifs_super_operations = { * @name: UBI volume name * @mode: UBI volume open mode * - * There are several ways to specify UBI volumes when mounting UBIFS: - * o ubiX_Y - UBI device number X, volume Y; - * o ubiY - UBI device number 0, volume Y; + * The primary method of mounting UBIFS is by specifying the UBI volume + * character device node path. However, UBIFS may also be mounted withoug any + * character device node using one of the following methods: + * + * o ubiX_Y - mount UBI device number X, volume Y; + * o ubiY - mount UBI device number 0, volume Y; * o ubiX:NAME - mount UBI device X, volume with name NAME; * o ubi:NAME - mount UBI device 0, volume with name NAME. * * Alternative '!' separator may be used instead of ':' (because some shells * like busybox may interpret ':' as an NFS host name separator). This function - * returns ubi volume object in case of success and a negative error code in - * case of failure. + * returns UBI volume description object in case of success and a negative + * error code in case of failure. */ static struct ubi_volume_desc *open_ubi(const char *name, int mode) { + struct ubi_volume_desc *ubi; int dev, vol; char *endptr; + /* First, try to open using the device node path method */ + ubi = ubi_open_volume_path(name, mode); + if (!IS_ERR(ubi)) + return ubi; + + /* Try the "nodev" method */ if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i') return ERR_PTR(-EINVAL); @@ -1970,12 +1977,14 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) * * Read-ahead will be disabled because @c->bdi.ra_pages is 0. */ + c->bdi.name = "ubifs", c->bdi.capabilities = BDI_CAP_MAP_COPY; c->bdi.unplug_io_fn = default_unplug_io_fn; err = bdi_init(&c->bdi); if (err) goto out_close; - err = bdi_register(&c->bdi, NULL, "ubifs"); + err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d", + c->vi.ubi_num, c->vi.vol_id); if (err) goto out_bdi; @@ -1983,6 +1992,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto out_bdi; + sb->s_bdi = &c->bdi; sb->s_fs_info = c; sb->s_magic = UBIFS_SUPER_MAGIC; sb->s_blocksize = UBIFS_BLOCK_SIZE; @@ -2036,8 +2046,8 @@ static int sb_test(struct super_block *sb, void *data) return c->vi.cdev == *dev; } -static int ubifs_get_sb(struct file_system_type *fs_type, int flags, - const char *name, void *data, struct vfsmount *mnt) +static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags, + const char *name, void *data) { struct ubi_volume_desc *ubi; struct ubi_volume_info vi; @@ -2053,9 +2063,9 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags, */ ubi = open_ubi(name, UBI_READONLY); if (IS_ERR(ubi)) { - ubifs_err("cannot open \"%s\", error %d", - name, (int)PTR_ERR(ubi)); - return PTR_ERR(ubi); + dbg_err("cannot open \"%s\", error %d", + name, (int)PTR_ERR(ubi)); + return ERR_CAST(ubi); } ubi_get_volume_info(ubi, &vi); @@ -2068,9 +2078,11 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags, } if (sb->s_root) { + struct ubifs_info *c1 = sb->s_fs_info; + /* A new mount point for already mounted UBIFS */ dbg_gen("this ubi volume is already mounted"); - if ((flags ^ sb->s_flags) & MS_RDONLY) { + if (!!(flags & MS_RDONLY) != c1->ro_mount) { err = -EBUSY; goto out_deact; } @@ -2091,20 +2103,19 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags, /* 'fill_super()' opens ubi again so we must close it here */ ubi_close_volume(ubi); - simple_set_mnt(mnt, sb); - return 0; + return dget(sb->s_root); out_deact: deactivate_locked_super(sb); out_close: ubi_close_volume(ubi); - return err; + return ERR_PTR(err); } static struct file_system_type ubifs_fs_type = { .name = "ubifs", .owner = THIS_MODULE, - .get_sb = ubifs_get_sb, + .mount = ubifs_mount, .kill_sb = kill_anon_super, }; diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index f249f7b..ad9cf01 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -31,6 +31,7 @@ */ #include <linux/crc32.h> +#include <linux/slab.h> #include "ubifs.h" /* @@ -1159,8 +1160,8 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, * o exact match, i.e. the found zero-level znode contains key @key, then %1 * is returned and slot number of the matched branch is stored in @n; * o not exact match, which means that zero-level znode does not contain - * @key, then %0 is returned and slot number of the closed branch is stored - * in @n; + * @key, then %0 is returned and slot number of the closest branch is stored + * in @n; * o @key is so small that it is even less than the lowest key of the * leftmost zero-level node, then %0 is returned and %0 is stored in @n. * @@ -1176,6 +1177,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, unsigned long time = get_seconds(); dbg_tnc("search key %s", DBGKEY(key)); + ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); znode = c->zroot.znode; if (unlikely(!znode)) { @@ -1433,7 +1435,7 @@ static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) * @lnum: LEB number is returned here * @offs: offset is returned here * - * This function look up and reads node with key @key. The caller has to make + * This function looks up and reads node with key @key. The caller has to make * sure the @node buffer is large enough to fit the node. Returns zero in case * of success, %-ENOENT if the node was not found, and a negative error code in * case of failure. The node location can be returned in @lnum and @offs. @@ -2965,7 +2967,7 @@ static struct ubifs_znode *right_znode(struct ubifs_info *c, * * This function searches an indexing node by its first key @key and its * address @lnum:@offs. It looks up the indexing tree by pulling all indexing - * nodes it traverses to TNC. This function is called fro indexing nodes which + * nodes it traverses to TNC. This function is called for indexing nodes which * were found on the media by scanning, for example when garbage-collecting or * when doing in-the-gaps commit. This means that the indexing node which is * looked for does not have to have exactly the same leftmost key @key, because @@ -2987,6 +2989,8 @@ static struct ubifs_znode *lookup_znode(struct ubifs_info *c, struct ubifs_znode *znode, *zn; int n, nn; + ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); + /* * The arguments have probably been read off flash, so don't assume * they are valid. @@ -3268,3 +3272,73 @@ out_unlock: mutex_unlock(&c->tnc_mutex); return err; } + +#ifdef CONFIG_UBIFS_FS_DEBUG + +/** + * dbg_check_inode_size - check if inode size is correct. + * @c: UBIFS file-system description object + * @inum: inode number + * @size: inode size + * + * This function makes sure that the inode size (@size) is correct and it does + * not have any pages beyond @size. Returns zero if the inode is OK, %-EINVAL + * if it has a data page beyond @size, and other negative error code in case of + * other errors. + */ +int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, + loff_t size) +{ + int err, n; + union ubifs_key from_key, to_key, *key; + struct ubifs_znode *znode; + unsigned int block; + + if (!S_ISREG(inode->i_mode)) + return 0; + if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) + return 0; + + block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT; + data_key_init(c, &from_key, inode->i_ino, block); + highest_data_key(c, &to_key, inode->i_ino); + + mutex_lock(&c->tnc_mutex); + err = ubifs_lookup_level0(c, &from_key, &znode, &n); + if (err < 0) + goto out_unlock; + + if (err) { + err = -EINVAL; + key = &from_key; + goto out_dump; + } + + err = tnc_next(c, &znode, &n); + if (err == -ENOENT) { + err = 0; + goto out_unlock; + } + if (err < 0) + goto out_unlock; + + ubifs_assert(err == 0); + key = &znode->zbranch[n].key; + if (!key_in_range(c, key, &from_key, &to_key)) + goto out_unlock; + +out_dump: + block = key_block(c, key); + ubifs_err("inode %lu has size %lld, but there are data at offset %lld " + "(data key %s)", (unsigned long)inode->i_ino, size, + ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key)); + dbg_dump_inode(c, inode); + dbg_dump_stack(); + err = -EINVAL; + +out_unlock: + mutex_unlock(&c->tnc_mutex); + return err; +} + +#endif /* CONFIG_UBIFS_FS_DEBUG */ diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index fde8d12..53288e5 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -245,7 +245,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p) * it is more comprehensive and less efficient than is needed for this * purpose. */ - sleb = ubifs_scan(c, lnum, 0, c->ileb_buf); + sleb = ubifs_scan(c, lnum, 0, c->ileb_buf, 0); c->ileb_len = 0; if (IS_ERR(sleb)) return PTR_ERR(sleb); diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 3eee07e..191ca78 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h @@ -135,6 +135,13 @@ /* The key is always at the same position in all keyed nodes */ #define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key) +/* Garbage collector journal head number */ +#define UBIFS_GC_HEAD 0 +/* Base journal head number */ +#define UBIFS_BASE_HEAD 1 +/* Data journal head number */ +#define UBIFS_DATA_HEAD 2 + /* * LEB Properties Tree node types. * diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 1bf01d8..381d6b2 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -28,6 +28,7 @@ #include <linux/fs.h> #include <linux/err.h> #include <linux/sched.h> +#include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/spinlock.h> #include <linux/mutex.h> @@ -95,8 +96,9 @@ */ #define BGT_NAME_PATTERN "ubifs_bgt%d_%d" -/* Default write-buffer synchronization timeout in seconds */ -#define DEFAULT_WBUF_TIMEOUT_SECS 5 +/* Write-buffer synchronization timeout interval in seconds */ +#define WBUF_TIMEOUT_SOFTLIMIT 3 +#define WBUF_TIMEOUT_HARDLIMIT 5 /* Maximum possible inode number (only 32-bit inodes are supported now) */ #define MAX_INUM 0xFFFFFFFF @@ -104,12 +106,10 @@ /* Number of non-data journal heads */ #define NONDATA_JHEADS_CNT 2 -/* Garbage collector head */ -#define GCHD 0 -/* Base journal head number */ -#define BASEHD 1 -/* First "general purpose" journal head */ -#define DATAHD 2 +/* Shorter names for journal head numbers for internal usage */ +#define GCHD UBIFS_GC_HEAD +#define BASEHD UBIFS_BASE_HEAD +#define DATAHD UBIFS_DATA_HEAD /* 'No change' value for 'ubifs_change_lp()' */ #define LPROPS_NC 0x80000001 @@ -119,8 +119,12 @@ * in TNC. However, when replaying, it is handy to introduce fake "truncation" * keys for truncation nodes because the code becomes simpler. So we define * %UBIFS_TRUN_KEY type. + * + * But otherwise, out of the journal reply scope, the truncation keys are + * invalid. */ -#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT +#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT +#define UBIFS_INVALID_KEY UBIFS_KEY_TYPES_CNT /* * How much a directory entry/extended attribute entry adds to the parent/host @@ -379,7 +383,7 @@ struct ubifs_gced_idx_leb { * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot * make sure @inode->i_size is always changed under @ui_mutex, because it - * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock + * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock * with 'ubifs_writepage()' (see file.c). All the other inode fields are * changed under @ui_mutex, so they do not need "shadow" fields. Note, one * could consider to rework locking and base it on "shadow" fields. @@ -654,7 +658,8 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c, * @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit * and @softlimit + @delta) * @timer: write-buffer timer - * @need_sync: it is set if its timer expired and needs sync + * @no_timer: non-zero if this write-buffer does not have a timer + * @need_sync: non-zero if the timer expired and the wbuf needs sync'ing * @next_ino: points to the next position of the following inode number * @inodes: stores the inode numbers of the nodes which are in wbuf * @@ -683,7 +688,8 @@ struct ubifs_wbuf { ktime_t softlimit; unsigned long long delta; struct hrtimer timer; - int need_sync; + unsigned int no_timer:1; + unsigned int need_sync:1; int next_ino; ino_t *inodes; }; @@ -1026,6 +1032,8 @@ struct ubifs_debug_info; * @max_leb_cnt: maximum count of logical eraseblocks * @old_leb_cnt: count of logical eraseblocks before re-size * @ro_media: the underlying UBI volume is read-only + * @ro_mount: the file-system was mounted as read-only + * @ro_error: UBIFS switched to R/O mode because an error happened * * @dirty_pg_cnt: number of dirty pages (not used) * @dirty_zn_cnt: number of dirty znodes @@ -1166,11 +1174,14 @@ struct ubifs_debug_info; * @replay_sqnum: sequence number of node currently being replayed * @need_recovery: file-system needs recovery * @replaying: set to %1 during journal replay - * @unclean_leb_list: LEBs to recover when mounting ro to rw - * @rcvrd_mst_node: recovered master node to write when mounting ro to rw + * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W + * mode + * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted + * FS to R/W mode * @size_tree: inode size information for recovery - * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) - * @always_chk_crc: always check CRCs (while mounting and remounting rw) + * @remounting_rw: set while re-mounting from R/O mode to R/W mode + * @always_chk_crc: always check CRCs (while mounting and remounting to R/W + * mode) * @mount_opts: UBIFS-specific mount options * * @dbg: debugging-related information @@ -1266,7 +1277,9 @@ struct ubifs_info { int leb_cnt; int max_leb_cnt; int old_leb_cnt; - int ro_media; + unsigned int ro_media:1; + unsigned int ro_mount:1; + unsigned int ro_error:1; atomic_long_t dirty_pg_cnt; atomic_long_t dirty_zn_cnt; @@ -1448,7 +1461,7 @@ int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode); /* scan.c */ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum, - int offs, void *sbuf); + int offs, void *sbuf, int quiet); void ubifs_scan_destroy(struct ubifs_scan_leb *sleb); int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, int offs, int quiet); @@ -1573,7 +1586,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot); int ubifs_tnc_end_commit(struct ubifs_info *c); /* shrinker.c */ -int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask); +int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); /* commit.c */ int ubifs_bg_thread(void *info); @@ -1673,9 +1686,10 @@ const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c); const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c); const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c); const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c); +int ubifs_calc_dark(const struct ubifs_info *c, int spc); /* file.c */ -int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync); +int ubifs_fsync(struct file *file, int datasync); int ubifs_setattr(struct dentry *dentry, struct iattr *attr); /* dir.c */ diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index cfd31e2..c74400f 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -55,9 +55,10 @@ * ACL support is not implemented. */ +#include "ubifs.h" +#include <linux/slab.h> #include <linux/xattr.h> #include <linux/posix_acl_xattr.h> -#include "ubifs.h" /* * Limit the number of extended attributes per inode so that the total size @@ -78,9 +79,9 @@ enum { SECURITY_XATTR, }; -static struct inode_operations none_inode_operations; -static struct address_space_operations none_address_operations; -static struct file_operations none_file_operations; +static const struct inode_operations none_inode_operations; +static const struct address_space_operations none_address_operations; +static const struct file_operations none_file_operations; /** * create_xattr - create an extended attribute. |