summaryrefslogtreecommitdiff
path: root/fs/ubifs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ubifs')
-rw-r--r--fs/ubifs/budget.c36
-rw-r--r--fs/ubifs/commit.c7
-rw-r--r--fs/ubifs/debug.c281
-rw-r--r--fs/ubifs/debug.h9
-rw-r--r--fs/ubifs/dir.c13
-rw-r--r--fs/ubifs/file.c119
-rw-r--r--fs/ubifs/gc.c181
-rw-r--r--fs/ubifs/io.c80
-rw-r--r--fs/ubifs/ioctl.c1
-rw-r--r--fs/ubifs/journal.c16
-rw-r--r--fs/ubifs/key.h49
-rw-r--r--fs/ubifs/log.c23
-rw-r--r--fs/ubifs/lprops.c43
-rw-r--r--fs/ubifs/lpt.c22
-rw-r--r--fs/ubifs/lpt_commit.c6
-rw-r--r--fs/ubifs/master.c23
-rw-r--r--fs/ubifs/misc.h9
-rw-r--r--fs/ubifs/orphan.c7
-rw-r--r--fs/ubifs/recovery.c98
-rw-r--r--fs/ubifs/replay.c31
-rw-r--r--fs/ubifs/sb.c10
-rw-r--r--fs/ubifs/scan.c56
-rw-r--r--fs/ubifs/shrinker.c4
-rw-r--r--fs/ubifs/super.c201
-rw-r--r--fs/ubifs/tnc.c82
-rw-r--r--fs/ubifs/tnc_commit.c2
-rw-r--r--fs/ubifs/ubifs-media.h7
-rw-r--r--fs/ubifs/ubifs.h54
-rw-r--r--fs/ubifs/xattr.c9
29 files changed, 902 insertions, 577 deletions
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index eaf6d89..c8ff0d1 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -54,41 +54,17 @@
* @nr_to_write: how many dirty pages to write-back
*
* This function shrinks UBIFS liability by means of writing back some amount
- * of dirty inodes and their pages. Returns the amount of pages which were
- * written back. The returned value does not include dirty inodes which were
- * synchronized.
+ * of dirty inodes and their pages.
*
* Note, this function synchronizes even VFS inodes which are locked
* (@i_mutex) by the caller of the budgeting function, because write-back does
* not touch @i_mutex.
*/
-static int shrink_liability(struct ubifs_info *c, int nr_to_write)
+static void shrink_liability(struct ubifs_info *c, int nr_to_write)
{
- int nr_written;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_NONE,
- .range_end = LLONG_MAX,
- .nr_to_write = nr_to_write,
- };
-
- generic_sync_sb_inodes(c->vfs_sb, &wbc);
- nr_written = nr_to_write - wbc.nr_to_write;
-
- if (!nr_written) {
- /*
- * Re-try again but wait on pages/inodes which are being
- * written-back concurrently (e.g., by pdflush).
- */
- memset(&wbc, 0, sizeof(struct writeback_control));
- wbc.sync_mode = WB_SYNC_ALL;
- wbc.range_end = LLONG_MAX;
- wbc.nr_to_write = nr_to_write;
- generic_sync_sb_inodes(c->vfs_sb, &wbc);
- nr_written = nr_to_write - wbc.nr_to_write;
- }
-
- dbg_budg("%d pages were written back", nr_written);
- return nr_written;
+ down_read(&c->vfs_sb->s_umount);
+ writeback_inodes_sb(c->vfs_sb);
+ up_read(&c->vfs_sb->s_umount);
}
/**
@@ -741,7 +717,7 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
* ubifs_get_free_space - return amount of free space.
* @c: UBIFS file-system description object
*
- * This function calculates and retuns amount of free space to report to
+ * This function calculates and returns amount of free space to report to
* user-space.
*/
long long ubifs_get_free_space(struct ubifs_info *c)
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index f3a7945..02429d8 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -45,6 +45,7 @@
#include <linux/freezer.h>
#include <linux/kthread.h>
+#include <linux/slab.h>
#include "ubifs.h"
/**
@@ -62,7 +63,9 @@ static int do_commit(struct ubifs_info *c)
struct ubifs_lp_stats lst;
dbg_cmt("start");
- if (c->ro_media) {
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+
+ if (c->ro_error) {
err = -EROFS;
goto out_up;
}
@@ -510,7 +513,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
int first = 1, iip;
struct ubifs_debug_info *d = c->dbg;
- union ubifs_key lower_key, upper_key, l_key, u_key;
+ union ubifs_key uninitialized_var(lower_key), upper_key, l_key, u_key;
unsigned long long uninitialized_var(last_sqnum);
struct ubifs_idx_node *idx;
struct list_head list;
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index ce2cd83..0bee4db 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -34,6 +34,7 @@
#include <linux/moduleparam.h>
#include <linux/debugfs.h>
#include <linux/math64.h>
+#include <linux/slab.h>
#ifdef CONFIG_UBIFS_FS_DEBUG
@@ -210,6 +211,20 @@ const char *dbg_cstate(int cmt_state)
}
}
+const char *dbg_jhead(int jhead)
+{
+ switch (jhead) {
+ case GCHD:
+ return "0 (GC)";
+ case BASEHD:
+ return "1 (base)";
+ case DATAHD:
+ return "2 (data)";
+ default:
+ return "unknown journal head";
+ }
+}
+
static void dump_ch(const struct ubifs_ch *ch)
{
printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic));
@@ -336,13 +351,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
le32_to_cpu(sup->fmt_version));
printk(KERN_DEBUG "\ttime_gran %u\n",
le32_to_cpu(sup->time_gran));
- printk(KERN_DEBUG "\tUUID %02X%02X%02X%02X-%02X%02X"
- "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n",
- sup->uuid[0], sup->uuid[1], sup->uuid[2], sup->uuid[3],
- sup->uuid[4], sup->uuid[5], sup->uuid[6], sup->uuid[7],
- sup->uuid[8], sup->uuid[9], sup->uuid[10], sup->uuid[11],
- sup->uuid[12], sup->uuid[13], sup->uuid[14],
- sup->uuid[15]);
+ printk(KERN_DEBUG "\tUUID %pUB\n",
+ sup->uuid);
break;
}
case UBIFS_MST_NODE:
@@ -623,8 +633,9 @@ void dbg_dump_budg(struct ubifs_info *c)
/* If we are in R/O mode, journal heads do not exist */
if (c->jheads)
for (i = 0; i < c->jhead_cnt; i++)
- printk(KERN_DEBUG "\tjhead %d\t LEB %d\n",
- c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum);
+ printk(KERN_DEBUG "\tjhead %s\t LEB %d\n",
+ dbg_jhead(c->jheads[i].wbuf.jhead),
+ c->jheads[i].wbuf.lnum);
for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
bud = rb_entry(rb, struct ubifs_bud, rb);
printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum);
@@ -648,9 +659,90 @@ void dbg_dump_budg(struct ubifs_info *c)
void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
{
- printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), "
- "flags %#x\n", lp->lnum, lp->free, lp->dirty,
- c->leb_size - lp->free - lp->dirty, lp->flags);
+ int i, spc, dark = 0, dead = 0;
+ struct rb_node *rb;
+ struct ubifs_bud *bud;
+
+ spc = lp->free + lp->dirty;
+ if (spc < c->dead_wm)
+ dead = spc;
+ else
+ dark = ubifs_calc_dark(c, spc);
+
+ if (lp->flags & LPROPS_INDEX)
+ printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d "
+ "free + dirty %-8d flags %#x (", lp->lnum, lp->free,
+ lp->dirty, c->leb_size - spc, spc, lp->flags);
+ else
+ printk(KERN_DEBUG "LEB %-7d free %-8d dirty %-8d used %-8d "
+ "free + dirty %-8d dark %-4d dead %-4d nodes fit %-3d "
+ "flags %#-4x (", lp->lnum, lp->free, lp->dirty,
+ c->leb_size - spc, spc, dark, dead,
+ (int)(spc / UBIFS_MAX_NODE_SZ), lp->flags);
+
+ if (lp->flags & LPROPS_TAKEN) {
+ if (lp->flags & LPROPS_INDEX)
+ printk(KERN_CONT "index, taken");
+ else
+ printk(KERN_CONT "taken");
+ } else {
+ const char *s;
+
+ if (lp->flags & LPROPS_INDEX) {
+ switch (lp->flags & LPROPS_CAT_MASK) {
+ case LPROPS_DIRTY_IDX:
+ s = "dirty index";
+ break;
+ case LPROPS_FRDI_IDX:
+ s = "freeable index";
+ break;
+ default:
+ s = "index";
+ }
+ } else {
+ switch (lp->flags & LPROPS_CAT_MASK) {
+ case LPROPS_UNCAT:
+ s = "not categorized";
+ break;
+ case LPROPS_DIRTY:
+ s = "dirty";
+ break;
+ case LPROPS_FREE:
+ s = "free";
+ break;
+ case LPROPS_EMPTY:
+ s = "empty";
+ break;
+ case LPROPS_FREEABLE:
+ s = "freeable";
+ break;
+ default:
+ s = NULL;
+ break;
+ }
+ }
+ printk(KERN_CONT "%s", s);
+ }
+
+ for (rb = rb_first((struct rb_root *)&c->buds); rb; rb = rb_next(rb)) {
+ bud = rb_entry(rb, struct ubifs_bud, rb);
+ if (bud->lnum == lp->lnum) {
+ int head = 0;
+ for (i = 0; i < c->jhead_cnt; i++) {
+ if (lp->lnum == c->jheads[i].wbuf.lnum) {
+ printk(KERN_CONT ", jhead %s",
+ dbg_jhead(i));
+ head = 1;
+ }
+ }
+ if (!head)
+ printk(KERN_CONT ", bud of jhead %s",
+ dbg_jhead(bud->jhead));
+ }
+ }
+ if (lp->lnum == c->gc_lnum)
+ printk(KERN_CONT ", GC LEB");
+ printk(KERN_CONT ")\n");
}
void dbg_dump_lprops(struct ubifs_info *c)
@@ -724,7 +816,7 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
current->pid, lnum);
- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
+ sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0);
if (IS_ERR(sleb)) {
ubifs_err("scan error %d", (int)PTR_ERR(sleb));
return;
@@ -909,8 +1001,10 @@ out:
ubifs_msg("saved lprops statistics dump");
dbg_dump_lstats(&d->saved_lst);
ubifs_get_lp_stats(c, &lst);
+
ubifs_msg("current lprops statistics dump");
- dbg_dump_lstats(&d->saved_lst);
+ dbg_dump_lstats(&lst);
+
spin_lock(&c->space_lock);
dbg_dump_budg(c);
spin_unlock(&c->space_lock);
@@ -1916,7 +2010,7 @@ static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
inum = key_inum_flash(c, &dent->key);
fscki1 = read_add_inode(c, priv, inum);
if (IS_ERR(fscki1)) {
- err = PTR_ERR(fscki);
+ err = PTR_ERR(fscki1);
ubifs_err("error %d while processing entry node and "
"trying to find parent inode node %lu",
err, (unsigned long)inum);
@@ -2145,6 +2239,162 @@ out_free:
return err;
}
+/**
+ * dbg_check_data_nodes_order - check that list of data nodes is sorted.
+ * @c: UBIFS file-system description object
+ * @head: the list of nodes ('struct ubifs_scan_node' objects)
+ *
+ * This function returns zero if the list of data nodes is sorted correctly,
+ * and %-EINVAL if not.
+ */
+int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
+{
+ struct list_head *cur;
+ struct ubifs_scan_node *sa, *sb;
+
+ if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ return 0;
+
+ for (cur = head->next; cur->next != head; cur = cur->next) {
+ ino_t inuma, inumb;
+ uint32_t blka, blkb;
+
+ cond_resched();
+ sa = container_of(cur, struct ubifs_scan_node, list);
+ sb = container_of(cur->next, struct ubifs_scan_node, list);
+
+ if (sa->type != UBIFS_DATA_NODE) {
+ ubifs_err("bad node type %d", sa->type);
+ dbg_dump_node(c, sa->node);
+ return -EINVAL;
+ }
+ if (sb->type != UBIFS_DATA_NODE) {
+ ubifs_err("bad node type %d", sb->type);
+ dbg_dump_node(c, sb->node);
+ return -EINVAL;
+ }
+
+ inuma = key_inum(c, &sa->key);
+ inumb = key_inum(c, &sb->key);
+
+ if (inuma < inumb)
+ continue;
+ if (inuma > inumb) {
+ ubifs_err("larger inum %lu goes before inum %lu",
+ (unsigned long)inuma, (unsigned long)inumb);
+ goto error_dump;
+ }
+
+ blka = key_block(c, &sa->key);
+ blkb = key_block(c, &sb->key);
+
+ if (blka > blkb) {
+ ubifs_err("larger block %u goes before %u", blka, blkb);
+ goto error_dump;
+ }
+ if (blka == blkb) {
+ ubifs_err("two data nodes for the same block");
+ goto error_dump;
+ }
+ }
+
+ return 0;
+
+error_dump:
+ dbg_dump_node(c, sa->node);
+ dbg_dump_node(c, sb->node);
+ return -EINVAL;
+}
+
+/**
+ * dbg_check_nondata_nodes_order - check that list of data nodes is sorted.
+ * @c: UBIFS file-system description object
+ * @head: the list of nodes ('struct ubifs_scan_node' objects)
+ *
+ * This function returns zero if the list of non-data nodes is sorted correctly,
+ * and %-EINVAL if not.
+ */
+int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
+{
+ struct list_head *cur;
+ struct ubifs_scan_node *sa, *sb;
+
+ if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ return 0;
+
+ for (cur = head->next; cur->next != head; cur = cur->next) {
+ ino_t inuma, inumb;
+ uint32_t hasha, hashb;
+
+ cond_resched();
+ sa = container_of(cur, struct ubifs_scan_node, list);
+ sb = container_of(cur->next, struct ubifs_scan_node, list);
+
+ if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
+ sa->type != UBIFS_XENT_NODE) {
+ ubifs_err("bad node type %d", sa->type);
+ dbg_dump_node(c, sa->node);
+ return -EINVAL;
+ }
+ if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
+ sa->type != UBIFS_XENT_NODE) {
+ ubifs_err("bad node type %d", sb->type);
+ dbg_dump_node(c, sb->node);
+ return -EINVAL;
+ }
+
+ if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
+ ubifs_err("non-inode node goes before inode node");
+ goto error_dump;
+ }
+
+ if (sa->type == UBIFS_INO_NODE && sb->type != UBIFS_INO_NODE)
+ continue;
+
+ if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
+ /* Inode nodes are sorted in descending size order */
+ if (sa->len < sb->len) {
+ ubifs_err("smaller inode node goes first");
+ goto error_dump;
+ }
+ continue;
+ }
+
+ /*
+ * This is either a dentry or xentry, which should be sorted in
+ * ascending (parent ino, hash) order.
+ */
+ inuma = key_inum(c, &sa->key);
+ inumb = key_inum(c, &sb->key);
+
+ if (inuma < inumb)
+ continue;
+ if (inuma > inumb) {
+ ubifs_err("larger inum %lu goes before inum %lu",
+ (unsigned long)inuma, (unsigned long)inumb);
+ goto error_dump;
+ }
+
+ hasha = key_block(c, &sa->key);
+ hashb = key_block(c, &sb->key);
+
+ if (hasha > hashb) {
+ ubifs_err("larger hash %u goes before %u", hasha, hashb);
+ goto error_dump;
+ }
+ }
+
+ return 0;
+
+error_dump:
+ ubifs_msg("dumping first node");
+ dbg_dump_node(c, sa->node);
+ ubifs_msg("dumping second node");
+ dbg_dump_node(c, sb->node);
+ return -EINVAL;
+ return 0;
+}
+
static int invocation_cnt;
int dbg_force_in_the_gaps(void)
@@ -2531,6 +2781,7 @@ static const struct file_operations dfs_fops = {
.open = open_debugfs_file,
.write = write_debugfs_file,
.owner = THIS_MODULE,
+ .llseek = default_llseek,
};
/**
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index c1cd73b..69ebe47 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -271,6 +271,7 @@ void ubifs_debugging_exit(struct ubifs_info *c);
/* Dump functions */
const char *dbg_ntype(int type);
const char *dbg_cstate(int cmt_state);
+const char *dbg_jhead(int jhead);
const char *dbg_get_key_dump(const struct ubifs_info *c,
const union ubifs_key *key);
void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
@@ -321,6 +322,10 @@ void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
int dbg_check_lprops(struct ubifs_info *c);
int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
int row, int col);
+int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
+ loff_t size);
+int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
+int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
/* Force the use of in-the-gaps method for testing */
@@ -425,6 +430,7 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
#define dbg_ntype(type) ""
#define dbg_cstate(cmt_state) ""
+#define dbg_jhead(jhead) ""
#define dbg_get_key_dump(c, key) ({})
#define dbg_dump_inode(c, inode) ({})
#define dbg_dump_node(c, node) ({})
@@ -460,6 +466,9 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
#define dbg_check_heap(c, heap, cat, add_pos) ({})
#define dbg_check_lprops(c) 0
#define dbg_check_lpt_nodes(c, cnode, row, col) 0
+#define dbg_check_inode_size(c, inode, size) 0
+#define dbg_check_data_nodes_order(c, head) 0
+#define dbg_check_nondata_nodes_order(c, head) 0
#define dbg_force_in_the_gaps_enabled 0
#define dbg_force_in_the_gaps() 0
#define dbg_failure_mode 0
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 552fb01..14f64b6 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -104,14 +104,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
*/
inode->i_flags |= (S_NOCMTIME);
- inode->i_uid = current_fsuid();
- if (dir->i_mode & S_ISGID) {
- inode->i_gid = dir->i_gid;
- if (S_ISDIR(mode))
- mode |= S_ISGID;
- } else
- inode->i_gid = current_fsgid();
- inode->i_mode = mode;
+ inode_init_owner(inode, dir, mode);
inode->i_mtime = inode->i_atime = inode->i_ctime =
ubifs_current_time(inode);
inode->i_mapping->nrpages = 0;
@@ -557,7 +550,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
lock_2_inodes(dir, inode);
inc_nlink(inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
inode->i_ctime = ubifs_current_time(inode);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
@@ -1120,7 +1113,7 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (release)
ubifs_release_budget(c, &ino_req);
if (IS_SYNC(old_inode))
- err = old_inode->i_sb->s_op->write_inode(old_inode, 1);
+ err = old_inode->i_sb->s_op->write_inode(old_inode, NULL);
return err;
out_cancel:
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 6d34dc7..d77db7e 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -21,39 +21,38 @@
*/
/*
- * This file implements VFS file and inode operations of regular files, device
+ * This file implements VFS file and inode operations for regular files, device
* nodes and symlinks as well as address space operations.
*
- * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the
- * page is dirty and is used for budgeting purposes - dirty pages should not be
- * budgeted. The PG_checked flag is set if full budgeting is required for the
- * page e.g., when it corresponds to a file hole or it is just beyond the file
- * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to
- * fail in this function, and the budget is released in 'ubifs_write_end()'. So
- * the PG_private and PG_checked flags carry the information about how the page
- * was budgeted, to make it possible to release the budget properly.
+ * UBIFS uses 2 page flags: @PG_private and @PG_checked. @PG_private is set if
+ * the page is dirty and is used for optimization purposes - dirty pages are
+ * not budgeted so the flag shows that 'ubifs_write_end()' should not release
+ * the budget for this page. The @PG_checked flag is set if full budgeting is
+ * required for the page e.g., when it corresponds to a file hole or it is
+ * beyond the file size. The budgeting is done in 'ubifs_write_begin()', because
+ * it is OK to fail in this function, and the budget is released in
+ * 'ubifs_write_end()'. So the @PG_private and @PG_checked flags carry
+ * information about how the page was budgeted, to make it possible to release
+ * the budget properly.
*
- * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations
- * we implement. However, this is not true for '->writepage()', which might be
- * called with 'i_mutex' unlocked. For example, when pdflush is performing
- * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the
- * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is
- * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim
- * path'. So, in '->writepage()' we are only guaranteed that the page is
- * locked.
+ * A thing to keep in mind: inode @i_mutex is locked in most VFS operations we
+ * implement. However, this is not true for 'ubifs_writepage()', which may be
+ * called with @i_mutex unlocked. For example, when pdflush is doing background
+ * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal"
+ * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the
+ * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()'
+ * we are only guaranteed that the page is locked.
*
- * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g.,
- * readahead path does not have it locked ("sys_read -> generic_file_aio_read
- * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is
- * not set as well. However, UBIFS disables readahead.
- *
- * This, for example means that there might be 2 concurrent '->writepage()'
- * calls for the same inode, but different inode dirty pages.
+ * Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
+ * read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
+ * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
+ * set as well. However, UBIFS disables readahead.
*/
#include "ubifs.h"
#include <linux/mount.h>
#include <linux/namei.h>
+#include <linux/slab.h>
static int read_block(struct inode *inode, void *addr, unsigned int block,
struct ubifs_data_node *dn)
@@ -434,8 +433,9 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
struct page *page;
ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
+ ubifs_assert(!c->ro_media && !c->ro_mount);
- if (unlikely(c->ro_media))
+ if (unlikely(c->ro_error))
return -EROFS;
/* Try out the fast-path part first */
@@ -449,9 +449,9 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
/*
* We change whole page so no need to load it. But we
* have to set the @PG_checked flag to make the further
- * code the page is new. This might be not true, but it
- * is better to budget more that to read the page from
- * the media.
+ * code know that the page is new. This might be not
+ * true, but it is better to budget more than to read
+ * the page from the media.
*/
SetPageChecked(page);
skipped_read = 1;
@@ -497,8 +497,8 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
}
/*
- * Whee, we aquired budgeting quickly - without involving
- * garbage-collection, committing or forceing write-back. We return
+ * Whee, we acquired budgeting quickly - without involving
+ * garbage-collection, committing or forcing write-back. We return
* with @ui->ui_mutex locked if we are appending pages, and unlocked
* otherwise. This is an optimization (slightly hacky though).
*/
@@ -562,7 +562,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
/*
* Return 0 to force VFS to repeat the whole operation, or the
- * error code if 'do_readpage()' failes.
+ * error code if 'do_readpage()' fails.
*/
copied = do_readpage(page);
goto out;
@@ -968,12 +968,16 @@ static int do_writepage(struct page *page, int len)
* the page locked, and it locks @ui_mutex. However, write-back does take inode
* @i_mutex, which means other VFS operations may be run on this inode at the
* same time. And the problematic one is truncation to smaller size, from where
- * we have to call 'vmtruncate()', which first changes @inode->i_size, then
+ * we have to call 'truncate_setsize()', which first changes @inode->i_size, then
* drops the truncated pages. And while dropping the pages, it takes the page
- * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with
+ * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with
* @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
* means that @inode->i_size is changed while @ui_mutex is unlocked.
*
+ * XXX(truncate): with the new truncate sequence this is not true anymore,
+ * and the calls to truncate_setsize can be move around freely. They should
+ * be moved to the very end of the truncate sequence.
+ *
* But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
* inode size. How do we do this if @inode->i_size may became smaller while we
* are in the middle of 'ubifs_writepage()'? The UBIFS solution is the
@@ -1013,7 +1017,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
/* Is the page fully inside @i_size? */
if (page->index < end_index) {
if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) {
- err = inode->i_sb->s_op->write_inode(inode, 1);
+ err = inode->i_sb->s_op->write_inode(inode, NULL);
if (err)
goto out_unlock;
/*
@@ -1041,7 +1045,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
kunmap_atomic(kaddr, KM_USER0);
if (i_size > synced_i_size) {
- err = inode->i_sb->s_op->write_inode(inode, 1);
+ err = inode->i_sb->s_op->write_inode(inode, NULL);
if (err)
goto out_unlock;
}
@@ -1126,9 +1130,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
budgeted = 0;
}
- err = vmtruncate(inode, new_size);
- if (err)
- goto out_budg;
+ truncate_setsize(inode, new_size);
if (offset) {
pgoff_t index = new_size >> PAGE_CACHE_SHIFT;
@@ -1175,11 +1177,11 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
ui->ui_size = inode->i_size;
/* Truncation changes inode [mc]time */
inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
- /* The other attributes may be changed at the same time as well */
+ /* Other attributes may be changed at the same time as well */
do_attr_changes(inode, attr);
-
err = ubifs_jnl_truncate(c, inode, old_size, new_size);
mutex_unlock(&ui->ui_mutex);
+
out_budg:
if (budgeted)
ubifs_release_budget(c, &req);
@@ -1215,16 +1217,14 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
if (attr->ia_valid & ATTR_SIZE) {
dbg_gen("size %lld -> %lld", inode->i_size, new_size);
- err = vmtruncate(inode, new_size);
- if (err)
- goto out;
+ truncate_setsize(inode, new_size);
}
mutex_lock(&ui->ui_mutex);
if (attr->ia_valid & ATTR_SIZE) {
/* Truncation changes inode [mc]time */
inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
- /* 'vmtruncate()' changed @i_size, update @ui_size */
+ /* 'truncate_setsize()' changed @i_size, update @ui_size */
ui->ui_size = inode->i_size;
}
@@ -1244,11 +1244,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
if (release)
ubifs_release_budget(c, &req);
if (IS_SYNC(inode))
- err = inode->i_sb->s_op->write_inode(inode, 1);
- return err;
-
-out:
- ubifs_release_budget(c, &req);
+ err = inode->i_sb->s_op->write_inode(inode, NULL);
return err;
}
@@ -1305,9 +1301,9 @@ static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
return NULL;
}
-int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int ubifs_fsync(struct file *file, int datasync)
{
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file->f_mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
int err;
@@ -1318,7 +1314,7 @@ int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync)
* the inode unless this is a 'datasync()' call.
*/
if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
- err = inode->i_sb->s_op->write_inode(inode, 1);
+ err = inode->i_sb->s_op->write_inode(inode, NULL);
if (err)
return err;
}
@@ -1391,7 +1387,6 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
int err;
- ssize_t ret;
struct inode *inode = iocb->ki_filp->f_mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -1399,17 +1394,7 @@ static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err)
return err;
- ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
- if (ret < 0)
- return ret;
-
- if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) {
- err = ubifs_sync_wbufs_by_inode(c, inode);
- if (err)
- return err;
- }
-
- return ret;
+ return generic_file_aio_write(iocb, iov, nr_segs, pos);
}
static int ubifs_set_page_dirty(struct page *page)
@@ -1455,9 +1440,9 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vm
dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index,
i_size_read(inode));
- ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
+ ubifs_assert(!c->ro_media && !c->ro_mount);
- if (unlikely(c->ro_media))
+ if (unlikely(c->ro_error))
return VM_FAULT_SIGBUS; /* -EROFS */
/*
@@ -1536,7 +1521,7 @@ out_unlock:
return err;
}
-static struct vm_operations_struct ubifs_file_vm_ops = {
+static const struct vm_operations_struct ubifs_file_vm_ops = {
.fault = filemap_fault,
.page_mkwrite = ubifs_vm_page_mkwrite,
};
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index f0f5f15..151f108 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -53,7 +53,9 @@
* good, and GC takes extra care when moving them.
*/
+#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/list_sort.h>
#include "ubifs.h"
/*
@@ -108,101 +110,6 @@ static int switch_gc_head(struct ubifs_info *c)
}
/**
- * list_sort - sort a list.
- * @priv: private data, passed to @cmp
- * @head: the list to sort
- * @cmp: the elements comparison function
- *
- * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
- * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
- * in ascending order.
- *
- * The comparison function @cmp is supposed to return a negative value if @a is
- * than @b, and a positive value if @a is greater than @b. If @a and @b are
- * equivalent, then it does not matter what this function returns.
- */
-static void list_sort(void *priv, struct list_head *head,
- int (*cmp)(void *priv, struct list_head *a,
- struct list_head *b))
-{
- struct list_head *p, *q, *e, *list, *tail, *oldhead;
- int insize, nmerges, psize, qsize, i;
-
- if (list_empty(head))
- return;
-
- list = head->next;
- list_del(head);
- insize = 1;
- for (;;) {
- p = oldhead = list;
- list = tail = NULL;
- nmerges = 0;
-
- while (p) {
- nmerges++;
- q = p;
- psize = 0;
- for (i = 0; i < insize; i++) {
- psize++;
- q = q->next == oldhead ? NULL : q->next;
- if (!q)
- break;
- }
-
- qsize = insize;
- while (psize > 0 || (qsize > 0 && q)) {
- if (!psize) {
- e = q;
- q = q->next;
- qsize--;
- if (q == oldhead)
- q = NULL;
- } else if (!qsize || !q) {
- e = p;
- p = p->next;
- psize--;
- if (p == oldhead)
- p = NULL;
- } else if (cmp(priv, p, q) <= 0) {
- e = p;
- p = p->next;
- psize--;
- if (p == oldhead)
- p = NULL;
- } else {
- e = q;
- q = q->next;
- qsize--;
- if (q == oldhead)
- q = NULL;
- }
- if (tail)
- tail->next = e;
- else
- list = e;
- e->prev = tail;
- tail = e;
- }
- p = q;
- }
-
- tail->next = list;
- list->prev = tail;
-
- if (nmerges <= 1)
- break;
-
- insize *= 2;
- }
-
- head->next = list;
- head->prev = list->prev;
- list->prev->next = head;
- list->prev = head;
-}
-
-/**
* data_nodes_cmp - compare 2 data nodes.
* @priv: UBIFS file-system description object
* @a: first data node
@@ -218,10 +125,16 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
struct ubifs_scan_node *sa, *sb;
cond_resched();
+ if (a == b)
+ return 0;
+
sa = list_entry(a, struct ubifs_scan_node, list);
sb = list_entry(b, struct ubifs_scan_node, list);
+
ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY);
ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY);
+ ubifs_assert(sa->type == UBIFS_DATA_NODE);
+ ubifs_assert(sb->type == UBIFS_DATA_NODE);
inuma = key_inum(c, &sa->key);
inumb = key_inum(c, &sb->key);
@@ -250,28 +163,40 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
*/
int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
{
- int typea, typeb;
ino_t inuma, inumb;
struct ubifs_info *c = priv;
struct ubifs_scan_node *sa, *sb;
cond_resched();
+ if (a == b)
+ return 0;
+
sa = list_entry(a, struct ubifs_scan_node, list);
sb = list_entry(b, struct ubifs_scan_node, list);
- typea = key_type(c, &sa->key);
- typeb = key_type(c, &sb->key);
- ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY);
+
+ ubifs_assert(key_type(c, &sa->key) != UBIFS_DATA_KEY &&
+ key_type(c, &sb->key) != UBIFS_DATA_KEY);
+ ubifs_assert(sa->type != UBIFS_DATA_NODE &&
+ sb->type != UBIFS_DATA_NODE);
/* Inodes go before directory entries */
- if (typea == UBIFS_INO_KEY) {
- if (typeb == UBIFS_INO_KEY)
+ if (sa->type == UBIFS_INO_NODE) {
+ if (sb->type == UBIFS_INO_NODE)
return sb->len - sa->len;
return -1;
}
- if (typeb == UBIFS_INO_KEY)
+ if (sb->type == UBIFS_INO_NODE)
return 1;
- ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY);
+ ubifs_assert(key_type(c, &sa->key) == UBIFS_DENT_KEY ||
+ key_type(c, &sa->key) == UBIFS_XENT_KEY);
+ ubifs_assert(key_type(c, &sb->key) == UBIFS_DENT_KEY ||
+ key_type(c, &sb->key) == UBIFS_XENT_KEY);
+ ubifs_assert(sa->type == UBIFS_DENT_NODE ||
+ sa->type == UBIFS_XENT_NODE);
+ ubifs_assert(sb->type == UBIFS_DENT_NODE ||
+ sb->type == UBIFS_XENT_NODE);
+
inuma = key_inum(c, &sa->key);
inumb = key_inum(c, &sb->key);
@@ -317,17 +242,33 @@ int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
struct list_head *nondata, int *min)
{
+ int err;
struct ubifs_scan_node *snod, *tmp;
*min = INT_MAX;
/* Separate data nodes and non-data nodes */
list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
- int err;
+ ubifs_assert(snod->type == UBIFS_INO_NODE ||
+ snod->type == UBIFS_DATA_NODE ||
+ snod->type == UBIFS_DENT_NODE ||
+ snod->type == UBIFS_XENT_NODE ||
+ snod->type == UBIFS_TRUN_NODE);
+
+ if (snod->type != UBIFS_INO_NODE &&
+ snod->type != UBIFS_DATA_NODE &&
+ snod->type != UBIFS_DENT_NODE &&
+ snod->type != UBIFS_XENT_NODE) {
+ /* Probably truncation node, zap it */
+ list_del(&snod->list);
+ kfree(snod);
+ continue;
+ }
- ubifs_assert(snod->type != UBIFS_IDX_NODE);
- ubifs_assert(snod->type != UBIFS_REF_NODE);
- ubifs_assert(snod->type != UBIFS_CS_NODE);
+ ubifs_assert(key_type(c, &snod->key) == UBIFS_DATA_KEY ||
+ key_type(c, &snod->key) == UBIFS_INO_KEY ||
+ key_type(c, &snod->key) == UBIFS_DENT_KEY ||
+ key_type(c, &snod->key) == UBIFS_XENT_KEY);
err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
snod->offs, 0);
@@ -351,6 +292,13 @@ static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
/* Sort data and non-data nodes */
list_sort(c, &sleb->nodes, &data_nodes_cmp);
list_sort(c, nondata, &nondata_nodes_cmp);
+
+ err = dbg_check_data_nodes_order(c, &sleb->nodes);
+ if (err)
+ return err;
+ err = dbg_check_nondata_nodes_order(c, nondata);
+ if (err)
+ return err;
return 0;
}
@@ -529,7 +477,7 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
* We scan the entire LEB even though we only really need to scan up to
* (c->leb_size - lp->free).
*/
- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
if (IS_ERR(sleb))
return PTR_ERR(sleb);
@@ -668,13 +616,14 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
ubifs_assert_cmt_locked(c);
+ ubifs_assert(!c->ro_media && !c->ro_mount);
if (ubifs_gc_should_commit(c))
return -EAGAIN;
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
- if (c->ro_media) {
+ if (c->ro_error) {
ret = -EROFS;
goto out_unlock;
}
@@ -770,14 +719,12 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
ret = ubifs_garbage_collect_leb(c, &lp);
if (ret < 0) {
- if (ret == -EAGAIN || ret == -ENOSPC) {
+ if (ret == -EAGAIN) {
/*
- * These codes are not errors, so we have to
- * return the LEB to lprops. But if the
- * 'ubifs_return_leb()' function fails, its
- * failure code is propagated to the caller
- * instead of the original '-EAGAIN' or
- * '-ENOSPC'.
+ * This is not error, so we have to return the
+ * LEB to lprops. But if 'ubifs_return_leb()'
+ * fails, its failure code is propagated to the
+ * caller instead of the original '-EAGAIN'.
*/
err = ubifs_return_leb(c, lp.lnum);
if (err)
@@ -867,8 +814,8 @@ out_unlock:
out:
ubifs_assert(ret < 0);
ubifs_assert(ret != -ENOSPC && ret != -EAGAIN);
- ubifs_ro_mode(c, ret);
ubifs_wbuf_sync_nolock(wbuf);
+ ubifs_ro_mode(c, ret);
mutex_unlock(&wbuf->io_mutex);
ubifs_return_leb(c, lp.lnum);
return ret;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index bc58571..d821731 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -51,6 +51,7 @@
*/
#include <linux/crc32.h>
+#include <linux/slab.h>
#include "ubifs.h"
/**
@@ -60,9 +61,10 @@
*/
void ubifs_ro_mode(struct ubifs_info *c, int err)
{
- if (!c->ro_media) {
- c->ro_media = 1;
+ if (!c->ro_error) {
+ c->ro_error = 1;
c->no_chk_data_crc = 0;
+ c->vfs_sb->s_flags |= MS_RDONLY;
ubifs_warn("switched to read-only mode, error %d", err);
dbg_dump_stack();
}
@@ -297,6 +299,7 @@ static enum hrtimer_restart wbuf_timer_callback_nolock(struct hrtimer *timer)
{
struct ubifs_wbuf *wbuf = container_of(timer, struct ubifs_wbuf, timer);
+ dbg_io("jhead %s", dbg_jhead(wbuf->jhead));
wbuf->need_sync = 1;
wbuf->c->need_wbuf_sync = 1;
ubifs_wake_up_bgt(wbuf->c);
@@ -311,8 +314,13 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
{
ubifs_assert(!hrtimer_active(&wbuf->timer));
- if (!ktime_to_ns(wbuf->softlimit))
+ if (wbuf->no_timer)
return;
+ dbg_io("set timer for jhead %s, %llu-%llu millisecs",
+ dbg_jhead(wbuf->jhead),
+ div_u64(ktime_to_ns(wbuf->softlimit), USEC_PER_SEC),
+ div_u64(ktime_to_ns(wbuf->softlimit) + wbuf->delta,
+ USEC_PER_SEC));
hrtimer_start_range_ns(&wbuf->timer, wbuf->softlimit, wbuf->delta,
HRTIMER_MODE_REL);
}
@@ -323,11 +331,8 @@ static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
*/
static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
{
- /*
- * If the syncer is waiting for the lock (from the background thread's
- * context) and another task is changing write-buffer then the syncing
- * should be canceled.
- */
+ if (wbuf->no_timer)
+ return;
wbuf->need_sync = 0;
hrtimer_cancel(&wbuf->timer);
}
@@ -349,13 +354,13 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
/* Write-buffer is empty or not seeked */
return 0;
- dbg_io("LEB %d:%d, %d bytes",
- wbuf->lnum, wbuf->offs, wbuf->used);
- ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
+ dbg_io("LEB %d:%d, %d bytes, jhead %s",
+ wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead));
ubifs_assert(!(wbuf->avail & 7));
ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size);
+ ubifs_assert(!c->ro_media && !c->ro_mount);
- if (c->ro_media)
+ if (c->ro_error)
return -EROFS;
ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail);
@@ -390,7 +395,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
* @offs: logical eraseblock offset to seek to
* @dtype: data type
*
- * This function targets the write buffer to logical eraseblock @lnum:@offs.
+ * This function targets the write-buffer to logical eraseblock @lnum:@offs.
* The write-buffer is synchronized if it is not empty. Returns zero in case of
* success and a negative error code in case of failure.
*/
@@ -399,7 +404,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
{
const struct ubifs_info *c = wbuf->c;
- dbg_io("LEB %d:%d", lnum, offs);
+ dbg_io("LEB %d:%d, jhead %s", lnum, offs, dbg_jhead(wbuf->jhead));
ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
ubifs_assert(offs >= 0 && offs <= c->leb_size);
ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
@@ -435,11 +440,12 @@ int ubifs_bg_wbufs_sync(struct ubifs_info *c)
{
int err, i;
+ ubifs_assert(!c->ro_media && !c->ro_mount);
if (!c->need_wbuf_sync)
return 0;
c->need_wbuf_sync = 0;
- if (c->ro_media) {
+ if (c->ro_error) {
err = -EROFS;
goto out_timers;
}
@@ -506,14 +512,15 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
struct ubifs_info *c = wbuf->c;
int err, written, n, aligned_len = ALIGN(len, 8), offs;
- dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len,
- dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum,
- wbuf->offs + wbuf->used);
+ dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
+ dbg_ntype(((struct ubifs_ch *)buf)->node_type),
+ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs + wbuf->used);
ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size);
ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
+ ubifs_assert(!c->ro_media && !c->ro_mount);
if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
err = -ENOSPC;
@@ -522,7 +529,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
cancel_wbuf_timer_nolock(wbuf);
- if (c->ro_media)
+ if (c->ro_error)
return -EROFS;
if (aligned_len <= wbuf->avail) {
@@ -533,8 +540,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
memcpy(wbuf->buf + wbuf->used, buf, len);
if (aligned_len == wbuf->avail) {
- dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum,
- wbuf->offs);
+ dbg_io("flush jhead %s wbuf to LEB %d:%d",
+ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
wbuf->offs, c->min_io_size,
wbuf->dtype);
@@ -562,7 +569,8 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
* minimal I/O unit. We have to fill and flush write-buffer and switch
* to the next min. I/O unit.
*/
- dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs);
+ dbg_io("flush jhead %s wbuf to LEB %d:%d",
+ dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
c->min_io_size, wbuf->dtype);
@@ -657,8 +665,9 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
buf_len);
ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
+ ubifs_assert(!c->ro_media && !c->ro_mount);
- if (c->ro_media)
+ if (c->ro_error)
return -EROFS;
ubifs_prepare_node(c, buf, len, 1);
@@ -695,7 +704,8 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
int err, rlen, overlap;
struct ubifs_ch *ch = buf;
- dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
+ dbg_io("LEB %d:%d, %s, length %d, jhead %s", lnum, offs,
+ dbg_ntype(type), len, dbg_jhead(wbuf->jhead));
ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
ubifs_assert(!(offs & 7) && offs < c->leb_size);
ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
@@ -808,7 +818,8 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
return 0;
out:
- ubifs_err("bad node at LEB %d:%d", lnum, offs);
+ ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs,
+ ubi_is_mapped(c->ubi, lnum));
dbg_dump_node(c, buf);
dbg_dump_stack();
return -EINVAL;
@@ -819,13 +830,12 @@ out:
* @c: UBIFS file-system description object
* @wbuf: write-buffer to initialize
*
- * This function initializes write buffer. Returns zero in case of success
+ * This function initializes write-buffer. Returns zero in case of success
* %-ENOMEM in case of failure.
*/
int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
{
size_t size;
- ktime_t hardlimit;
wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL);
if (!wbuf->buf)
@@ -851,22 +861,16 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
hrtimer_init(&wbuf->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
wbuf->timer.function = wbuf_timer_callback_nolock;
- /*
- * Make write-buffer soft limit to be 20% of the hard limit. The
- * write-buffer timer is allowed to expire any time between the soft
- * and hard limits.
- */
- hardlimit = ktime_set(DEFAULT_WBUF_TIMEOUT_SECS, 0);
- wbuf->delta = (DEFAULT_WBUF_TIMEOUT_SECS * NSEC_PER_SEC) * 2 / 10;
- wbuf->softlimit = ktime_sub_ns(hardlimit, wbuf->delta);
- hrtimer_set_expires_range_ns(&wbuf->timer, wbuf->softlimit,
- wbuf->delta);
+ wbuf->softlimit = ktime_set(WBUF_TIMEOUT_SOFTLIMIT, 0);
+ wbuf->delta = WBUF_TIMEOUT_HARDLIMIT - WBUF_TIMEOUT_SOFTLIMIT;
+ wbuf->delta *= 1000000000ULL;
+ ubifs_assert(wbuf->delta <= ULONG_MAX);
return 0;
}
/**
* ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array.
- * @wbuf: the write-buffer whereto add
+ * @wbuf: the write-buffer where to add
* @inum: the inode number
*
* This function adds an inode number to the inode array of the write-buffer.
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 6db7a6b..8aacd64 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -25,7 +25,6 @@
/* This file implements EXT2-compatible extended attribute ioctl() calls */
#include <linux/compat.h>
-#include <linux/smp_lock.h>
#include <linux/mount.h>
#include "ubifs.h"
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 64b5f3a..914f1bd 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -122,11 +122,12 @@ static int reserve_space(struct ubifs_info *c, int jhead, int len)
* better to try to allocate space at the ends of eraseblocks. This is
* what the squeeze parameter does.
*/
+ ubifs_assert(!c->ro_media && !c->ro_mount);
squeeze = (jhead == BASEHD);
again:
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
- if (c->ro_media) {
+ if (c->ro_error) {
err = -EROFS;
goto out_unlock;
}
@@ -158,7 +159,7 @@ again:
* some. But the write-buffer mutex has to be unlocked because
* GC also takes it.
*/
- dbg_jnl("no free space jhead %d, run GC", jhead);
+ dbg_jnl("no free space in jhead %s, run GC", dbg_jhead(jhead));
mutex_unlock(&wbuf->io_mutex);
lnum = ubifs_garbage_collect(c, 0);
@@ -173,7 +174,8 @@ again:
* because we dropped @wbuf->io_mutex, so try once
* again.
*/
- dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead);
+ dbg_jnl("GC couldn't make a free LEB for jhead %s",
+ dbg_jhead(jhead));
if (retries++ < 2) {
dbg_jnl("retry (%d)", retries);
goto again;
@@ -184,7 +186,7 @@ again:
}
mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
- dbg_jnl("got LEB %d for jhead %d", lnum, jhead);
+ dbg_jnl("got LEB %d for jhead %s", lnum, dbg_jhead(jhead));
avail = c->leb_size - wbuf->offs - wbuf->used;
if (wbuf->lnum != -1 && avail >= len) {
@@ -255,7 +257,8 @@ static int write_node(struct ubifs_info *c, int jhead, void *node, int len,
*lnum = c->jheads[jhead].wbuf.lnum;
*offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
- dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
+ dbg_jnl("jhead %s, LEB %d:%d, len %d",
+ dbg_jhead(jhead), *lnum, *offs, len);
ubifs_prepare_node(c, node, len, 0);
return ubifs_wbuf_write_nolock(wbuf, node, len);
@@ -285,7 +288,8 @@ static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
*lnum = c->jheads[jhead].wbuf.lnum;
*offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
- dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
+ dbg_jnl("jhead %s, LEB %d:%d, len %d",
+ dbg_jhead(jhead), *lnum, *offs, len);
err = ubifs_wbuf_write_nolock(wbuf, buf, len);
if (err)
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 5fa27ea..92a8491 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -229,23 +229,6 @@ static inline void xent_key_init(const struct ubifs_info *c,
}
/**
- * xent_key_init_hash - initialize extended attribute entry key without
- * re-calculating hash function.
- * @c: UBIFS file-system description object
- * @key: key to initialize
- * @inum: host inode number
- * @hash: extended attribute entry name hash
- */
-static inline void xent_key_init_hash(const struct ubifs_info *c,
- union ubifs_key *key, ino_t inum,
- uint32_t hash)
-{
- ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
- key->u32[0] = inum;
- key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS);
-}
-
-/**
* xent_key_init_flash - initialize on-flash extended attribute entry key.
* @c: UBIFS file-system description object
* @k: key to initialize
@@ -295,22 +278,15 @@ static inline void data_key_init(const struct ubifs_info *c,
}
/**
- * data_key_init_flash - initialize on-flash data key.
+ * highest_data_key - get the highest possible data key for an inode.
* @c: UBIFS file-system description object
- * @k: key to initialize
+ * @key: key to initialize
* @inum: inode number
- * @block: block number
*/
-static inline void data_key_init_flash(const struct ubifs_info *c, void *k,
- ino_t inum, unsigned int block)
+static inline void highest_data_key(const struct ubifs_info *c,
+ union ubifs_key *key, ino_t inum)
{
- union ubifs_key *key = k;
-
- ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK));
- key->j32[0] = cpu_to_le32(inum);
- key->j32[1] = cpu_to_le32(block |
- (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS));
- memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
+ data_key_init(c, key, inum, UBIFS_S_KEY_BLOCK_MASK);
}
/**
@@ -330,6 +306,20 @@ static inline void trun_key_init(const struct ubifs_info *c,
}
/**
+ * invalid_key_init - initialize invalid node key.
+ * @c: UBIFS file-system description object
+ * @key: key to initialize
+ *
+ * This is a helper function which marks a @key object as invalid.
+ */
+static inline void invalid_key_init(const struct ubifs_info *c,
+ union ubifs_key *key)
+{
+ key->u32[0] = 0xDEADBEAF;
+ key->u32[1] = UBIFS_INVALID_KEY;
+}
+
+/**
* key_type - get key type.
* @c: UBIFS file-system description object
* @key: key to get type of
@@ -554,4 +544,5 @@ static inline unsigned long long key_max_inode_size(const struct ubifs_info *c)
return 0;
}
}
+
#endif /* !__UBIFS_KEY_H__ */
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 56e3377..4d0cb12 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -159,7 +159,7 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
jhead = &c->jheads[bud->jhead];
list_add_tail(&bud->list, &jhead->buds_list);
} else
- ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY));
+ ubifs_assert(c->replaying && c->ro_mount);
/*
* Note, although this is a new bud, we anyway account this space now,
@@ -169,8 +169,8 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
*/
c->bud_bytes += c->leb_size - bud->start;
- dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum,
- bud->start, bud->jhead, c->bud_bytes);
+ dbg_log("LEB %d:%d, jhead %s, bud_bytes %lld", bud->lnum,
+ bud->start, dbg_jhead(bud->jhead), c->bud_bytes);
spin_unlock(&c->buds_lock);
}
@@ -223,8 +223,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
}
mutex_lock(&c->log_mutex);
-
- if (c->ro_media) {
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error) {
err = -EROFS;
goto out_unlock;
}
@@ -355,16 +355,16 @@ static void remove_buds(struct ubifs_info *c)
* heads (non-closed buds).
*/
c->cmt_bud_bytes += wbuf->offs - bud->start;
- dbg_log("preserve %d:%d, jhead %d, bud bytes %d, "
+ dbg_log("preserve %d:%d, jhead %s, bud bytes %d, "
"cmt_bud_bytes %lld", bud->lnum, bud->start,
- bud->jhead, wbuf->offs - bud->start,
+ dbg_jhead(bud->jhead), wbuf->offs - bud->start,
c->cmt_bud_bytes);
bud->start = wbuf->offs;
} else {
c->cmt_bud_bytes += c->leb_size - bud->start;
- dbg_log("remove %d:%d, jhead %d, bud bytes %d, "
+ dbg_log("remove %d:%d, jhead %s, bud bytes %d, "
"cmt_bud_bytes %lld", bud->lnum, bud->start,
- bud->jhead, c->leb_size - bud->start,
+ dbg_jhead(bud->jhead), c->leb_size - bud->start,
c->cmt_bud_bytes);
rb_erase(p1, &c->buds);
/*
@@ -429,7 +429,8 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
if (lnum == -1 || offs == c->leb_size)
continue;
- dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i);
+ dbg_log("add ref to LEB %d:%d for jhead %s",
+ lnum, offs, dbg_jhead(i));
ref = buf + len;
ref->ch.node_type = UBIFS_REF_NODE;
ref->lnum = cpu_to_le32(lnum);
@@ -695,7 +696,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
lnum = c->ltail_lnum;
write_lnum = lnum;
while (1) {
- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 0);
if (IS_ERR(sleb)) {
err = PTR_ERR(sleb);
goto out_free;
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 4cdd284..4d4ca38 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -281,7 +281,7 @@ void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
case LPROPS_FREE:
if (add_to_lpt_heap(c, lprops, cat))
break;
- /* No more room on heap so make it uncategorized */
+ /* No more room on heap so make it un-categorized */
cat = LPROPS_UNCAT;
/* Fall through */
case LPROPS_UNCAT:
@@ -375,8 +375,8 @@ void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
* @lprops: LEB properties
*
* A LEB may have fallen off of the bottom of a heap, and ended up as
- * uncategorized even though it has enough space for us now. If that is the case
- * this function will put the LEB back onto a heap.
+ * un-categorized even though it has enough space for us now. If that is the
+ * case this function will put the LEB back onto a heap.
*/
void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops)
{
@@ -436,10 +436,10 @@ int ubifs_categorize_lprops(const struct ubifs_info *c,
/**
* change_category - change LEB properties category.
* @c: UBIFS file-system description object
- * @lprops: LEB properties to recategorize
+ * @lprops: LEB properties to re-categorize
*
* LEB properties are categorized to enable fast find operations. When the LEB
- * properties change they must be recategorized.
+ * properties change they must be re-categorized.
*/
static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
{
@@ -461,21 +461,18 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
}
/**
- * calc_dark - calculate LEB dark space size.
+ * ubifs_calc_dark - calculate LEB dark space size.
* @c: the UBIFS file-system description object
* @spc: amount of free and dirty space in the LEB
*
- * This function calculates amount of dark space in an LEB which has @spc bytes
- * of free and dirty space. Returns the calculations result.
+ * This function calculates and returns amount of dark space in an LEB which
+ * has @spc bytes of free and dirty space.
*
- * Dark space is the space which is not always usable - it depends on which
- * nodes are written in which order. E.g., if an LEB has only 512 free bytes,
- * it is dark space, because it cannot fit a large data node. So UBIFS cannot
- * count on this LEB and treat these 512 bytes as usable because it is not true
- * if, for example, only big chunks of uncompressible data will be written to
- * the FS.
+ * UBIFS is trying to account the space which might not be usable, and this
+ * space is called "dark space". For example, if an LEB has only %512 free
+ * bytes, it is dark space, because it cannot fit a large data node.
*/
-static int calc_dark(struct ubifs_info *c, int spc)
+int ubifs_calc_dark(const struct ubifs_info *c, int spc)
{
ubifs_assert(!(spc & 7));
@@ -518,7 +515,7 @@ static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops)
* @free: new free space amount
* @dirty: new dirty space amount
* @flags: new flags
- * @idx_gc_cnt: change to the count of idx_gc list
+ * @idx_gc_cnt: change to the count of @idx_gc list
*
* This function changes LEB properties (@free, @dirty or @flag). However, the
* property which has the %LPROPS_NC value is not changed. Returns a pointer to
@@ -535,7 +532,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
{
/*
* This is the only function that is allowed to change lprops, so we
- * discard the const qualifier.
+ * discard the "const" qualifier.
*/
struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp;
@@ -575,7 +572,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
if (old_spc < c->dead_wm)
c->lst.total_dead -= old_spc;
else
- c->lst.total_dark -= calc_dark(c, old_spc);
+ c->lst.total_dark -= ubifs_calc_dark(c, old_spc);
c->lst.total_used -= c->leb_size - old_spc;
}
@@ -616,7 +613,7 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
if (new_spc < c->dead_wm)
c->lst.total_dead += new_spc;
else
- c->lst.total_dark += calc_dark(c, new_spc);
+ c->lst.total_dark += ubifs_calc_dark(c, new_spc);
c->lst.total_used += c->leb_size - new_spc;
}
@@ -1096,7 +1093,7 @@ static int scan_check_cb(struct ubifs_info *c,
}
}
- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
+ sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0);
if (IS_ERR(sleb)) {
/*
* After an unclean unmount, empty and freeable LEBs
@@ -1107,7 +1104,7 @@ static int scan_check_cb(struct ubifs_info *c,
"- continuing checking");
lst->empty_lebs += 1;
lst->total_free += c->leb_size;
- lst->total_dark += calc_dark(c, c->leb_size);
+ lst->total_dark += ubifs_calc_dark(c, c->leb_size);
return LPT_SCAN_CONTINUE;
}
@@ -1117,7 +1114,7 @@ static int scan_check_cb(struct ubifs_info *c,
"- continuing checking");
lst->total_free += lp->free;
lst->total_dirty += lp->dirty;
- lst->total_dark += calc_dark(c, c->leb_size);
+ lst->total_dark += ubifs_calc_dark(c, c->leb_size);
return LPT_SCAN_CONTINUE;
}
data->err = PTR_ERR(sleb);
@@ -1235,7 +1232,7 @@ static int scan_check_cb(struct ubifs_info *c,
if (spc < c->dead_wm)
lst->total_dead += spc;
else
- lst->total_dark += calc_dark(c, spc);
+ lst->total_dark += ubifs_calc_dark(c, spc);
}
ubifs_scan_destroy(sleb);
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index b2792e8..72775d3 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -46,6 +46,7 @@
#include "ubifs.h"
#include <linux/crc16.h>
#include <linux/math64.h>
+#include <linux/slab.h>
/**
* do_calc_lpt_geom - calculate sizes for the LPT area.
@@ -1362,6 +1363,7 @@ static int read_lsave(struct ubifs_info *c)
goto out;
for (i = 0; i < c->lsave_cnt; i++) {
int lnum = c->lsave[i];
+ struct ubifs_lprops *lprops;
/*
* Due to automatic resizing, the values in the lsave table
@@ -1369,7 +1371,11 @@ static int read_lsave(struct ubifs_info *c)
*/
if (lnum >= c->leb_cnt)
continue;
- ubifs_lpt_lookup(c, lnum);
+ lprops = ubifs_lpt_lookup(c, lnum);
+ if (IS_ERR(lprops)) {
+ err = PTR_ERR(lprops);
+ goto out;
+ }
}
out:
vfree(buf);
@@ -1456,13 +1462,13 @@ struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum)
shft -= UBIFS_LPT_FANOUT_SHIFT;
nnode = ubifs_get_nnode(c, nnode, iip);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
}
iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
shft -= UBIFS_LPT_FANOUT_SHIFT;
pnode = ubifs_get_pnode(c, nnode, iip);
if (IS_ERR(pnode))
- return ERR_PTR(PTR_ERR(pnode));
+ return ERR_CAST(pnode);
iip = (i & (UBIFS_LPT_FANOUT - 1));
dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
pnode->lprops[iip].free, pnode->lprops[iip].dirty,
@@ -1585,7 +1591,7 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
nnode = c->nroot;
nnode = dirty_cow_nnode(c, nnode);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
i = lnum - c->main_first;
shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
for (h = 1; h < c->lpt_hght; h++) {
@@ -1593,19 +1599,19 @@ struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
shft -= UBIFS_LPT_FANOUT_SHIFT;
nnode = ubifs_get_nnode(c, nnode, iip);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
nnode = dirty_cow_nnode(c, nnode);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
}
iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
shft -= UBIFS_LPT_FANOUT_SHIFT;
pnode = ubifs_get_pnode(c, nnode, iip);
if (IS_ERR(pnode))
- return ERR_PTR(PTR_ERR(pnode));
+ return ERR_CAST(pnode);
pnode = dirty_cow_pnode(c, pnode);
if (IS_ERR(pnode))
- return ERR_PTR(PTR_ERR(pnode));
+ return ERR_CAST(pnode);
iip = (i & (UBIFS_LPT_FANOUT - 1));
dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
pnode->lprops[iip].free, pnode->lprops[iip].dirty,
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 8cbfb82..5c90dec 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -26,6 +26,7 @@
*/
#include <linux/crc16.h>
+#include <linux/slab.h>
#include "ubifs.h"
/**
@@ -645,7 +646,7 @@ static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i)
shft -= UBIFS_LPT_FANOUT_SHIFT;
nnode = ubifs_get_nnode(c, nnode, iip);
if (IS_ERR(nnode))
- return ERR_PTR(PTR_ERR(nnode));
+ return ERR_CAST(nnode);
}
iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
return ubifs_get_pnode(c, nnode, iip);
@@ -704,6 +705,9 @@ static int make_tree_dirty(struct ubifs_info *c)
struct ubifs_pnode *pnode;
pnode = pnode_lookup(c, 0);
+ if (IS_ERR(pnode))
+ return PTR_ERR(pnode);
+
while (pnode) {
do_make_pnode_dirty(c, pnode);
pnode = next_pnode_to_dirty(c, pnode);
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index a88f338..21f47af 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -29,7 +29,8 @@
* @c: UBIFS file-system description object
*
* This function scans the master node LEBs and search for the latest master
- * node. Returns zero in case of success and a negative error code in case of
+ * node. Returns zero in case of success, %-EUCLEAN if there master area is
+ * corrupted and requires recovery, and a negative error code in case of
* failure.
*/
static int scan_for_master(struct ubifs_info *c)
@@ -40,7 +41,7 @@ static int scan_for_master(struct ubifs_info *c)
lnum = UBIFS_MST_LNUM;
- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
if (IS_ERR(sleb))
return PTR_ERR(sleb);
nodes_cnt = sleb->nodes_cnt;
@@ -48,7 +49,7 @@ static int scan_for_master(struct ubifs_info *c)
snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
list);
if (snod->type != UBIFS_MST_NODE)
- goto out;
+ goto out_dump;
memcpy(c->mst_node, snod->node, snod->len);
offs = snod->offs;
}
@@ -56,7 +57,7 @@ static int scan_for_master(struct ubifs_info *c)
lnum += 1;
- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
if (IS_ERR(sleb))
return PTR_ERR(sleb);
if (sleb->nodes_cnt != nodes_cnt)
@@ -65,7 +66,7 @@ static int scan_for_master(struct ubifs_info *c)
goto out;
snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list);
if (snod->type != UBIFS_MST_NODE)
- goto out;
+ goto out_dump;
if (snod->offs != offs)
goto out;
if (memcmp((void *)c->mst_node + UBIFS_CH_SZ,
@@ -78,6 +79,12 @@ static int scan_for_master(struct ubifs_info *c)
out:
ubifs_scan_destroy(sleb);
+ return -EUCLEAN;
+
+out_dump:
+ ubifs_err("unexpected node type %d master LEB %d:%d",
+ snod->type, lnum, snod->offs);
+ ubifs_scan_destroy(sleb);
return -EINVAL;
}
@@ -256,7 +263,8 @@ int ubifs_read_master(struct ubifs_info *c)
err = scan_for_master(c);
if (err) {
- err = ubifs_recover_master_node(c);
+ if (err == -EUCLEAN)
+ err = ubifs_recover_master_node(c);
if (err)
/*
* Note, we do not free 'c->mst_node' here because the
@@ -353,7 +361,8 @@ int ubifs_write_master(struct ubifs_info *c)
{
int err, lnum, offs, len;
- if (c->ro_media)
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error)
return -EROFS;
lnum = UBIFS_MST_LNUM;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 4fa81d8..c3de04d 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -132,7 +132,8 @@ static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum)
{
int err;
- if (c->ro_media)
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error)
return -EROFS;
err = ubi_leb_unmap(c->ubi, lnum);
if (err) {
@@ -159,7 +160,8 @@ static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum,
{
int err;
- if (c->ro_media)
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error)
return -EROFS;
err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
if (err) {
@@ -186,7 +188,8 @@ static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum,
{
int err;
- if (c->ro_media)
+ ubifs_assert(!c->ro_media && !c->ro_mount);
+ if (c->ro_error)
return -EROFS;
err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
if (err) {
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 152a7b3..82009c7 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -670,9 +670,10 @@ static int kill_orphans(struct ubifs_info *c)
struct ubifs_scan_leb *sleb;
dbg_rcvry("LEB %d", lnum);
- sleb = ubifs_scan(c, lnum, 0, c->sbuf);
+ sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
if (IS_ERR(sleb)) {
- sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0);
+ if (PTR_ERR(sleb) == -EUCLEAN)
+ sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0);
if (IS_ERR(sleb)) {
err = PTR_ERR(sleb);
break;
@@ -899,7 +900,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
struct ubifs_scan_leb *sleb;
- sleb = ubifs_scan(c, lnum, 0, c->dbg->buf);
+ sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0);
if (IS_ERR(sleb)) {
err = PTR_ERR(sleb);
break;
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 8056052..77e9b87 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -23,14 +23,15 @@
/*
* This file implements functions needed to recover from unclean un-mounts.
* When UBIFS is mounted, it checks a flag on the master node to determine if
- * an un-mount was completed sucessfully. If not, the process of mounting
- * incorparates additional checking and fixing of on-flash data structures.
+ * an un-mount was completed successfully. If not, the process of mounting
+ * incorporates additional checking and fixing of on-flash data structures.
* UBIFS always cleans away all remnants of an unclean un-mount, so that
* errors do not accumulate. However UBIFS defers recovery if it is mounted
* read-only, and the flash is not modified in that case.
*/
#include <linux/crc32.h>
+#include <linux/slab.h>
#include "ubifs.h"
/**
@@ -53,6 +54,25 @@ static int is_empty(void *buf, int len)
}
/**
+ * first_non_ff - find offset of the first non-0xff byte.
+ * @buf: buffer to search in
+ * @len: length of buffer
+ *
+ * This function returns offset of the first non-0xff byte in @buf or %-1 if
+ * the buffer contains only 0xff bytes.
+ */
+static int first_non_ff(void *buf, int len)
+{
+ uint8_t *p = buf;
+ int i;
+
+ for (i = 0; i < len; i++)
+ if (*p++ != 0xff)
+ return i;
+ return -1;
+}
+
+/**
* get_master_node - get the last valid master node allowing for corruption.
* @c: UBIFS file-system description object
* @lnum: LEB number
@@ -267,12 +287,12 @@ int ubifs_recover_master_node(struct ubifs_info *c)
mst = mst2;
}
- dbg_rcvry("recovered master node from LEB %d",
+ ubifs_msg("recovered master node from LEB %d",
(mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
- if ((c->vfs_sb->s_flags & MS_RDONLY)) {
+ if (c->ro_mount) {
/* Read-only mode. Keep a copy for switching to rw mode */
c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
if (!c->rcvrd_mst_node) {
@@ -357,11 +377,7 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
empty_offs = ALIGN(offs + 1, c->min_io_size);
check_len = c->leb_size - empty_offs;
p = buf + empty_offs - offs;
-
- for (; check_len > 0; check_len--)
- if (*p++ != 0xff)
- return 0;
- return 1;
+ return is_empty(p, check_len);
}
/**
@@ -453,7 +469,7 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
endpt = snod->offs + snod->len;
}
- if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) {
+ if (c->ro_mount && !c->remounting_rw) {
/* Add to recovery list */
struct ubifs_unclean_leb *ucleb;
@@ -543,8 +559,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
*
* This function does a scan of a LEB, but caters for errors that might have
* been caused by the unclean unmount from which we are attempting to recover.
- *
- * This function returns %0 on success and a negative error code on failure.
+ * Returns %0 in case of success, %-EUCLEAN if an unrecoverable corruption is
+ * found, and a negative error code in case of failure.
*/
struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
int offs, void *sbuf, int grouped)
@@ -643,7 +659,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
goto corrupted;
default:
dbg_err("unknown");
- goto corrupted;
+ err = -EINVAL;
+ goto error;
}
}
@@ -652,8 +669,13 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
clean_buf(c, &buf, lnum, &offs, &len);
need_clean = 1;
} else {
- ubifs_err("corrupt empty space at LEB %d:%d",
- lnum, offs);
+ int corruption = first_non_ff(buf, len);
+
+ ubifs_err("corrupt empty space LEB %d:%d, corruption "
+ "starts at %d", lnum, offs, corruption);
+ /* Make sure we dump interesting non-0xFF data */
+ offs = corruption;
+ buf += corruption;
goto corrupted;
}
}
@@ -750,7 +772,8 @@ out_free:
* @sbuf: LEB-sized buffer to use
*
* This function does a scan of a LEB, but caters for errors that might have
- * been caused by the unclean unmount from which we are attempting to recover.
+ * been caused by unclean reboots from which we are attempting to recover
+ * (assume that only the last log LEB can be corrupted by an unclean reboot).
*
* This function returns %0 on success and a negative error code on failure.
*/
@@ -769,7 +792,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
* We can only recover at the end of the log, so check that the
* next log LEB is empty or out of date.
*/
- sleb = ubifs_scan(c, next_lnum, 0, sbuf);
+ sleb = ubifs_scan(c, next_lnum, 0, sbuf, 0);
if (IS_ERR(sleb))
return sleb;
if (sleb->nodes_cnt) {
@@ -813,7 +836,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
static int recover_head(const struct ubifs_info *c, int lnum, int offs,
void *sbuf)
{
- int len, err, need_clean = 0;
+ int len, err;
if (c->min_io_size > 1)
len = c->min_io_size;
@@ -827,19 +850,7 @@ static int recover_head(const struct ubifs_info *c, int lnum, int offs,
/* Read at the head location and check it is empty flash */
err = ubi_read(c->ubi, lnum, sbuf, offs, len);
- if (err)
- need_clean = 1;
- else {
- uint8_t *p = sbuf;
-
- while (len--)
- if (*p++ != 0xff) {
- need_clean = 1;
- break;
- }
- }
-
- if (need_clean) {
+ if (err || !is_empty(sbuf, len)) {
dbg_rcvry("cleaning head at %d:%d", lnum, offs);
if (offs == 0)
return ubifs_leb_unmap(c, lnum);
@@ -873,7 +884,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
{
int err;
- ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw);
+ ubifs_assert(!c->ro_mount || c->remounting_rw);
dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
@@ -1053,8 +1064,21 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
}
err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
if (err) {
- if (err == -ENOSPC)
- dbg_err("could not find a dirty LEB");
+ /*
+ * There are no dirty or empty LEBs subject to here being
+ * enough for the index. Try to use
+ * 'ubifs_find_free_leb_for_idx()', which will return any empty
+ * LEBs (ignoring index requirements). If the index then
+ * doesn't have enough LEBs the recovery commit will fail -
+ * which is the same result anyway i.e. recovery fails. So
+ * there is no problem ignoring index requirements and just
+ * grabbing a free LEB since we have already established there
+ * is not a dirty LEB we could have used instead.
+ */
+ if (err == -ENOSPC) {
+ dbg_rcvry("could not find a dirty LEB");
+ goto find_free;
+ }
return err;
}
ubifs_assert(!(lp.flags & LPROPS_INDEX));
@@ -1129,8 +1153,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
find_free:
/*
* There is no GC head LEB or the free space in the GC head LEB is too
- * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so
- * GC is not run.
+ * small, or there are not dirty LEBs. Allocate gc_lnum by calling
+ * 'ubifs_find_free_leb_for_idx()' so GC is not run.
*/
lnum = ubifs_find_free_leb_for_idx(c);
if (lnum < 0) {
@@ -1438,7 +1462,7 @@ int ubifs_recover_size(struct ubifs_info *c)
}
}
if (e->exists && e->i_size < e->d_size) {
- if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) {
+ if (!e->inode && c->ro_mount) {
/* Fix the inode size and pin it in memory */
struct inode *inode;
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 11cc801..eed0fcf 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -506,7 +506,7 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
if (c->need_recovery)
sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
else
- sleb = ubifs_scan(c, lnum, offs, c->sbuf);
+ sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
if (IS_ERR(sleb))
return PTR_ERR(sleb);
@@ -627,8 +627,7 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
ubifs_assert(sleb->endpt - offs >= used);
ubifs_assert(sleb->endpt % c->min_io_size == 0);
- if (sleb->endpt + c->min_io_size <= c->leb_size &&
- !(c->vfs_sb->s_flags & MS_RDONLY))
+ if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount)
err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum,
sleb->endpt, UBI_SHORTTERM);
@@ -836,10 +835,16 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
const struct ubifs_cs_node *node;
dbg_mnt("replay log LEB %d:%d", lnum, offs);
- sleb = ubifs_scan(c, lnum, offs, sbuf);
+ sleb = ubifs_scan(c, lnum, offs, sbuf, c->need_recovery);
if (IS_ERR(sleb)) {
- if (c->need_recovery)
- sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
+ if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
+ return PTR_ERR(sleb);
+ /*
+ * Note, the below function will recover this log LEB only if
+ * it is the last, because unclean reboots can possibly corrupt
+ * only the tail of the log.
+ */
+ sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
if (IS_ERR(sleb))
return PTR_ERR(sleb);
}
@@ -850,7 +855,6 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
}
node = sleb->buf;
-
snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
if (c->cs_sqnum == 0) {
/*
@@ -897,7 +901,6 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
}
list_for_each_entry(snod, &sleb->nodes, list) {
-
cond_resched();
if (snod->sqnum >= SQNUM_WATERMARK) {
@@ -957,7 +960,7 @@ out:
return err;
out_dump:
- ubifs_err("log error detected while replying the log at LEB %d:%d",
+ ubifs_err("log error detected while replaying the log at LEB %d:%d",
lnum, offs + snod->offs);
dbg_dump_node(c, snod->node);
ubifs_scan_destroy(sleb);
@@ -1010,7 +1013,6 @@ out:
int ubifs_replay_journal(struct ubifs_info *c)
{
int err, i, lnum, offs, free;
- void *sbuf = NULL;
BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
@@ -1025,14 +1027,8 @@ int ubifs_replay_journal(struct ubifs_info *c)
return -EINVAL;
}
- sbuf = vmalloc(c->leb_size);
- if (!sbuf)
- return -ENOMEM;
-
dbg_mnt("start replaying the journal");
-
c->replaying = 1;
-
lnum = c->ltail_lnum = c->lhead_lnum;
offs = c->lhead_offs;
@@ -1045,7 +1041,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
lnum = UBIFS_LOG_LNUM;
offs = 0;
}
- err = replay_log_leb(c, lnum, offs, sbuf);
+ err = replay_log_leb(c, lnum, offs, c->sbuf);
if (err == 1)
/* We hit the end of the log */
break;
@@ -1078,7 +1074,6 @@ int ubifs_replay_journal(struct ubifs_info *c)
out:
destroy_replay_tree(c);
destroy_bud_list(c);
- vfree(sbuf);
c->replaying = 0;
return err;
}
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 57085e4..bf31b47 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -27,6 +27,7 @@
*/
#include "ubifs.h"
+#include <linux/slab.h>
#include <linux/random.h>
#include <linux/math64.h>
@@ -541,11 +542,8 @@ int ubifs_read_superblock(struct ubifs_info *c)
* due to the unavailability of time-travelling equipment.
*/
if (c->fmt_version > UBIFS_FORMAT_VERSION) {
- struct super_block *sb = c->vfs_sb;
- int mounting_ro = sb->s_flags & MS_RDONLY;
-
- ubifs_assert(!c->ro_media || mounting_ro);
- if (!mounting_ro ||
+ ubifs_assert(!c->ro_media || c->ro_mount);
+ if (!c->ro_mount ||
c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
ubifs_err("on-flash format version is w%d/r%d, but "
"software only supports up to version "
@@ -623,7 +621,7 @@ int ubifs_read_superblock(struct ubifs_info *c)
c->old_leb_cnt = c->leb_cnt;
if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) {
c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size);
- if (c->vfs_sb->s_flags & MS_RDONLY)
+ if (c->ro_mount)
dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs",
c->old_leb_cnt, c->leb_cnt);
else {
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 0ed8247..3e1ee57 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -108,10 +108,9 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
/* Make the node pads to 8-byte boundary */
if ((node_len + pad_len) & 7) {
- if (!quiet) {
+ if (!quiet)
dbg_err("bad padding length %d - %d",
offs, offs + node_len + pad_len);
- }
return SCANNED_A_BAD_PAD_NODE;
}
@@ -198,7 +197,7 @@ int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
struct ubifs_ino_node *ino = buf;
struct ubifs_scan_node *snod;
- snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS);
+ snod = kmalloc(sizeof(struct ubifs_scan_node), GFP_NOFS);
if (!snod)
return -ENOMEM;
@@ -213,13 +212,15 @@ int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
case UBIFS_DENT_NODE:
case UBIFS_XENT_NODE:
case UBIFS_DATA_NODE:
- case UBIFS_TRUN_NODE:
/*
* The key is in the same place in all keyed
* nodes.
*/
key_read(c, &ino->key, &snod->key);
break;
+ default:
+ invalid_key_init(c, &snod->key);
+ break;
}
list_add_tail(&snod->list, &sleb->nodes);
sleb->nodes_cnt += 1;
@@ -238,12 +239,12 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
{
int len;
- ubifs_err("corrupted data at LEB %d:%d", lnum, offs);
+ ubifs_err("corruption at LEB %d:%d", lnum, offs);
if (dbg_failure_mode)
return;
len = c->leb_size - offs;
- if (len > 4096)
- len = 4096;
+ if (len > 8192)
+ len = 8192;
dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs);
print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
}
@@ -253,13 +254,19 @@ void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
* @c: UBIFS file-system description object
* @lnum: logical eraseblock number
* @offs: offset to start at (usually zero)
- * @sbuf: scan buffer (must be c->leb_size)
+ * @sbuf: scan buffer (must be of @c->leb_size bytes in size)
+ * @quiet: print no messages
*
* This function scans LEB number @lnum and returns complete information about
- * its contents. Returns an error code in case of failure.
+ * its contents. Returns the scaned information in case of success and,
+ * %-EUCLEAN if the LEB neads recovery, and other negative error codes in case
+ * of failure.
+ *
+ * If @quiet is non-zero, this function does not print large and scary
+ * error messages and flash dumps in case of errors.
*/
struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
- int offs, void *sbuf)
+ int offs, void *sbuf, int quiet)
{
void *buf = sbuf + offs;
int err, len = c->leb_size - offs;
@@ -278,8 +285,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
cond_resched();
- ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
-
+ ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
if (ret > 0) {
/* Padding bytes or a valid padding node */
offs += ret;
@@ -304,7 +310,8 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
goto corrupted;
default:
dbg_err("unknown");
- goto corrupted;
+ err = -EINVAL;
+ goto error;
}
err = ubifs_add_snod(c, sleb, buf, offs);
@@ -317,8 +324,12 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
len -= node_len;
}
- if (offs % c->min_io_size)
- goto corrupted;
+ if (offs % c->min_io_size) {
+ if (!quiet)
+ ubifs_err("empty space starts at non-aligned offset %d",
+ offs);
+ goto corrupted;;
+ }
ubifs_end_scan(c, sleb, lnum, offs);
@@ -327,18 +338,25 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
break;
for (; len; offs++, buf++, len--)
if (*(uint8_t *)buf != 0xff) {
- ubifs_err("corrupt empty space at LEB %d:%d",
- lnum, offs);
+ if (!quiet)
+ ubifs_err("corrupt empty space at LEB %d:%d",
+ lnum, offs);
goto corrupted;
}
return sleb;
corrupted:
- ubifs_scanned_corruption(c, lnum, offs, buf);
+ if (!quiet) {
+ ubifs_scanned_corruption(c, lnum, offs, buf);
+ ubifs_err("LEB %d scanning failed", lnum);
+ }
err = -EUCLEAN;
+ ubifs_scan_destroy(sleb);
+ return ERR_PTR(err);
+
error:
- ubifs_err("LEB %d scanning failed", lnum);
+ ubifs_err("LEB %d scanning failed, error %d", lnum, err);
ubifs_scan_destroy(sleb);
return ERR_PTR(err);
}
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 02feb59..46961c0 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -250,7 +250,7 @@ static int kick_a_thread(void)
dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
- c->ro_media) {
+ c->ro_mount || c->ro_error) {
mutex_unlock(&c->umount_mutex);
continue;
}
@@ -277,7 +277,7 @@ static int kick_a_thread(void)
return 0;
}
-int ubifs_shrinker(int nr, gfp_t gfp_mask)
+int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask)
{
int freed, contention = 0;
long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 79fad43..6e11c29 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -36,7 +36,6 @@
#include <linux/mount.h>
#include <linux/math64.h>
#include <linux/writeback.h>
-#include <linux/smp_lock.h>
#include "ubifs.h"
/*
@@ -273,18 +272,26 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb)
return &ui->vfs_inode;
};
+static void ubifs_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+ struct ubifs_inode *ui = ubifs_inode(inode);
+ INIT_LIST_HEAD(&inode->i_dentry);
+ kmem_cache_free(ubifs_inode_slab, ui);
+}
+
static void ubifs_destroy_inode(struct inode *inode)
{
struct ubifs_inode *ui = ubifs_inode(inode);
kfree(ui->data);
- kmem_cache_free(ubifs_inode_slab, inode);
+ call_rcu(&inode->i_rcu, ubifs_i_callback);
}
/*
* Note, Linux write-back code calls this without 'i_mutex'.
*/
-static int ubifs_write_inode(struct inode *inode, int wait)
+static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
int err = 0;
struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -318,6 +325,8 @@ static int ubifs_write_inode(struct inode *inode, int wait)
if (err)
ubifs_err("can't write inode %lu, error %d",
inode->i_ino, err);
+ else
+ err = dbg_check_inode_size(c, inode, ui->ui_size);
}
ui->dirty = 0;
@@ -326,7 +335,7 @@ static int ubifs_write_inode(struct inode *inode, int wait)
return err;
}
-static void ubifs_delete_inode(struct inode *inode)
+static void ubifs_evict_inode(struct inode *inode)
{
int err;
struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -342,9 +351,12 @@ static void ubifs_delete_inode(struct inode *inode)
dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode);
ubifs_assert(!atomic_read(&inode->i_count));
- ubifs_assert(inode->i_nlink == 0);
truncate_inode_pages(&inode->i_data, 0);
+
+ if (inode->i_nlink)
+ goto done;
+
if (is_bad_inode(inode))
goto out;
@@ -366,7 +378,8 @@ out:
c->nospace = c->nospace_rp = 0;
smp_wmb();
}
- clear_inode(inode);
+done:
+ end_writeback(inode);
}
static void ubifs_dirty_inode(struct inode *inode)
@@ -438,12 +451,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
{
int i, err;
struct ubifs_info *c = sb->s_fs_info;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .range_start = 0,
- .range_end = LLONG_MAX,
- .nr_to_write = LONG_MAX,
- };
/*
* Zero @wait is just an advisory thing to help the file system shove
@@ -454,17 +461,6 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
return 0;
/*
- * VFS calls '->sync_fs()' before synchronizing all dirty inodes and
- * pages, so synchronize them first, then commit the journal. Strictly
- * speaking, it is not necessary to commit the journal here,
- * synchronizing write-buffers would be enough. But committing makes
- * UBIFS free space predictions much more accurate, so we want to let
- * the user be able to get more accurate results of 'statfs()' after
- * they synchronize the file system.
- */
- generic_sync_sb_inodes(sb, &wbc);
-
- /*
* Synchronize write buffers, because 'ubifs_run_commit()' does not
* do this if it waits for an already running commit.
*/
@@ -474,6 +470,13 @@ static int ubifs_sync_fs(struct super_block *sb, int wait)
return err;
}
+ /*
+ * Strictly speaking, it is not necessary to commit the journal here,
+ * synchronizing write-buffers would be enough. But committing makes
+ * UBIFS free space predictions much more accurate, so we want to let
+ * the user be able to get more accurate results of 'statfs()' after
+ * they synchronize the file system.
+ */
err = ubifs_run_commit(c);
if (err)
return err;
@@ -797,7 +800,7 @@ static int alloc_wbufs(struct ubifs_info *c)
* does not need to be synchronized by timer.
*/
c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
- c->jheads[GCHD].wbuf.softlimit = ktime_set(0, 0);
+ c->jheads[GCHD].wbuf.no_timer = 1;
return 0;
}
@@ -986,7 +989,7 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options,
switch (token) {
/*
* %Opt_fast_unmount and %Opt_norm_unmount options are ignored.
- * We accepte them in order to be backware-compatible. But this
+ * We accept them in order to be backward-compatible. But this
* should be removed at some point.
*/
case Opt_fast_unmount:
@@ -1142,11 +1145,11 @@ static int check_free_space(struct ubifs_info *c)
*/
static int mount_ubifs(struct ubifs_info *c)
{
- struct super_block *sb = c->vfs_sb;
- int err, mounted_read_only = (sb->s_flags & MS_RDONLY);
+ int err;
long long x;
size_t sz;
+ c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY);
err = init_constants_early(c);
if (err)
return err;
@@ -1159,7 +1162,7 @@ static int mount_ubifs(struct ubifs_info *c)
if (err)
goto out_free;
- if (c->empty && (mounted_read_only || c->ro_media)) {
+ if (c->empty && (c->ro_mount || c->ro_media)) {
/*
* This UBI volume is empty, and read-only, or the file system
* is mounted read-only - we cannot format it.
@@ -1170,7 +1173,7 @@ static int mount_ubifs(struct ubifs_info *c)
goto out_free;
}
- if (c->ro_media && !mounted_read_only) {
+ if (c->ro_media && !c->ro_mount) {
ubifs_err("cannot mount read-write - read-only media");
err = -EROFS;
goto out_free;
@@ -1190,7 +1193,7 @@ static int mount_ubifs(struct ubifs_info *c)
if (!c->sbuf)
goto out_free;
- if (!mounted_read_only) {
+ if (!c->ro_mount) {
c->ileb_buf = vmalloc(c->leb_size);
if (!c->ileb_buf)
goto out_free;
@@ -1233,7 +1236,7 @@ static int mount_ubifs(struct ubifs_info *c)
}
sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
- if (!mounted_read_only) {
+ if (!c->ro_mount) {
err = alloc_wbufs(c);
if (err)
goto out_cbuf;
@@ -1259,12 +1262,12 @@ static int mount_ubifs(struct ubifs_info *c)
if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
ubifs_msg("recovery needed");
c->need_recovery = 1;
- if (!mounted_read_only) {
+ if (!c->ro_mount) {
err = ubifs_recover_inl_heads(c, c->sbuf);
if (err)
goto out_master;
}
- } else if (!mounted_read_only) {
+ } else if (!c->ro_mount) {
/*
* Set the "dirty" flag so that if we reboot uncleanly we
* will notice this immediately on the next mount.
@@ -1275,7 +1278,7 @@ static int mount_ubifs(struct ubifs_info *c)
goto out_master;
}
- err = ubifs_lpt_init(c, 1, !mounted_read_only);
+ err = ubifs_lpt_init(c, 1, !c->ro_mount);
if (err)
goto out_lpt;
@@ -1287,11 +1290,14 @@ static int mount_ubifs(struct ubifs_info *c)
if (err)
goto out_journal;
- err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only);
+ /* Calculate 'min_idx_lebs' after journal replay */
+ c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
+
+ err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount);
if (err)
goto out_orphans;
- if (!mounted_read_only) {
+ if (!c->ro_mount) {
int lnum;
err = check_free_space(c);
@@ -1313,6 +1319,8 @@ static int mount_ubifs(struct ubifs_info *c)
if (err)
goto out_orphans;
err = ubifs_rcvry_gc_commit(c);
+ if (err)
+ goto out_orphans;
} else {
err = take_gc_lnum(c);
if (err)
@@ -1324,7 +1332,7 @@ static int mount_ubifs(struct ubifs_info *c)
*/
err = ubifs_leb_unmap(c, c->gc_lnum);
if (err)
- return err;
+ goto out_orphans;
}
err = dbg_check_lprops(c);
@@ -1351,7 +1359,7 @@ static int mount_ubifs(struct ubifs_info *c)
spin_unlock(&ubifs_infos_lock);
if (c->need_recovery) {
- if (mounted_read_only)
+ if (c->ro_mount)
ubifs_msg("recovery deferred");
else {
c->need_recovery = 0;
@@ -1378,7 +1386,7 @@ static int mount_ubifs(struct ubifs_info *c)
ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
c->vi.ubi_num, c->vi.vol_id, c->vi.name);
- if (mounted_read_only)
+ if (c->ro_mount)
ubifs_msg("mounted read-only");
x = (long long)c->main_lebs * c->leb_size;
ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d "
@@ -1399,12 +1407,7 @@ static int mount_ubifs(struct ubifs_info *c)
c->leb_size, c->leb_size >> 10);
dbg_msg("data journal heads: %d",
c->jhead_cnt - NONDATA_JHEADS_CNT);
- dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X"
- "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X",
- c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3],
- c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7],
- c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11],
- c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]);
+ dbg_msg("UUID: %pUB", c->uuid);
dbg_msg("big_lpt %d", c->big_lpt);
dbg_msg("log LEBs: %d (%d - %d)",
c->log_lebs, UBIFS_LOG_LNUM, c->log_last);
@@ -1645,7 +1648,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
}
dbg_gen("re-mounted read-write");
- c->vfs_sb->s_flags &= ~MS_RDONLY;
+ c->ro_mount = 0;
c->remounting_rw = 0;
c->always_chk_crc = 0;
err = dbg_check_space_info(c);
@@ -1681,7 +1684,7 @@ static void ubifs_remount_ro(struct ubifs_info *c)
int i, err;
ubifs_assert(!c->need_recovery);
- ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
+ ubifs_assert(!c->ro_mount);
mutex_lock(&c->umount_mutex);
if (c->bgt) {
@@ -1691,10 +1694,8 @@ static void ubifs_remount_ro(struct ubifs_info *c)
dbg_save_space_info(c);
- for (i = 0; i < c->jhead_cnt; i++) {
+ for (i = 0; i < c->jhead_cnt; i++)
ubifs_wbuf_sync(&c->jheads[i].wbuf);
- hrtimer_cancel(&c->jheads[i].wbuf.timer);
- }
c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
@@ -1709,6 +1710,7 @@ static void ubifs_remount_ro(struct ubifs_info *c)
vfree(c->ileb_buf);
c->ileb_buf = NULL;
ubifs_lpt_free(c, 1);
+ c->ro_mount = 1;
err = dbg_check_space_info(c);
if (err)
ubifs_ro_mode(c, err);
@@ -1723,8 +1725,6 @@ static void ubifs_put_super(struct super_block *sb)
ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num,
c->vi.vol_id);
- lock_kernel();
-
/*
* The following asserts are only valid if there has not been a failure
* of the media. For example, there will be dirty inodes if we failed
@@ -1742,7 +1742,7 @@ static void ubifs_put_super(struct super_block *sb)
* the mutex is locked.
*/
mutex_lock(&c->umount_mutex);
- if (!(c->vfs_sb->s_flags & MS_RDONLY)) {
+ if (!c->ro_mount) {
/*
* First of all kill the background thread to make sure it does
* not interfere with un-mounting and freeing resources.
@@ -1752,25 +1752,22 @@ static void ubifs_put_super(struct super_block *sb)
c->bgt = NULL;
}
- /* Synchronize write-buffers */
- if (c->jheads)
- for (i = 0; i < c->jhead_cnt; i++) {
- ubifs_wbuf_sync(&c->jheads[i].wbuf);
- hrtimer_cancel(&c->jheads[i].wbuf.timer);
- }
-
/*
- * On fatal errors c->ro_media is set to 1, in which case we do
+ * On fatal errors c->ro_error is set to 1, in which case we do
* not write the master node.
*/
- if (!c->ro_media) {
+ if (!c->ro_error) {
+ int err;
+
+ /* Synchronize write-buffers */
+ for (i = 0; i < c->jhead_cnt; i++)
+ ubifs_wbuf_sync(&c->jheads[i].wbuf);
+
/*
* We are being cleanly unmounted which means the
* orphans were killed - indicate this in the master
* node. Also save the reserved GC LEB number.
*/
- int err;
-
c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
@@ -1783,6 +1780,10 @@ static void ubifs_put_super(struct super_block *sb)
*/
ubifs_err("failed to write master node, "
"error %d", err);
+ } else {
+ for (i = 0; i < c->jhead_cnt; i++)
+ /* Make sure write-buffer timers are canceled */
+ hrtimer_cancel(&c->jheads[i].wbuf.timer);
}
}
@@ -1791,8 +1792,6 @@ static void ubifs_put_super(struct super_block *sb)
ubi_close_volume(c->ubi);
mutex_unlock(&c->umount_mutex);
kfree(c);
-
- unlock_kernel();
}
static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
@@ -1808,22 +1807,21 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
return err;
}
- lock_kernel();
- if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
+ if (c->ro_mount && !(*flags & MS_RDONLY)) {
+ if (c->ro_error) {
+ ubifs_msg("cannot re-mount R/W due to prior errors");
+ return -EROFS;
+ }
if (c->ro_media) {
- ubifs_msg("cannot re-mount due to prior errors");
- unlock_kernel();
+ ubifs_msg("cannot re-mount R/W - UBI volume is R/O");
return -EROFS;
}
err = ubifs_remount_rw(c);
- if (err) {
- unlock_kernel();
+ if (err)
return err;
- }
- } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) {
- if (c->ro_media) {
- ubifs_msg("cannot re-mount due to prior errors");
- unlock_kernel();
+ } else if (!c->ro_mount && (*flags & MS_RDONLY)) {
+ if (c->ro_error) {
+ ubifs_msg("cannot re-mount R/O due to prior errors");
return -EROFS;
}
ubifs_remount_ro(c);
@@ -1838,7 +1836,6 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
}
ubifs_assert(c->lst.taken_empty_lebs > 0);
- unlock_kernel();
return 0;
}
@@ -1847,7 +1844,7 @@ const struct super_operations ubifs_super_operations = {
.destroy_inode = ubifs_destroy_inode,
.put_super = ubifs_put_super,
.write_inode = ubifs_write_inode,
- .delete_inode = ubifs_delete_inode,
+ .evict_inode = ubifs_evict_inode,
.statfs = ubifs_statfs,
.dirty_inode = ubifs_dirty_inode,
.remount_fs = ubifs_remount_fs,
@@ -1860,22 +1857,32 @@ const struct super_operations ubifs_super_operations = {
* @name: UBI volume name
* @mode: UBI volume open mode
*
- * There are several ways to specify UBI volumes when mounting UBIFS:
- * o ubiX_Y - UBI device number X, volume Y;
- * o ubiY - UBI device number 0, volume Y;
+ * The primary method of mounting UBIFS is by specifying the UBI volume
+ * character device node path. However, UBIFS may also be mounted withoug any
+ * character device node using one of the following methods:
+ *
+ * o ubiX_Y - mount UBI device number X, volume Y;
+ * o ubiY - mount UBI device number 0, volume Y;
* o ubiX:NAME - mount UBI device X, volume with name NAME;
* o ubi:NAME - mount UBI device 0, volume with name NAME.
*
* Alternative '!' separator may be used instead of ':' (because some shells
* like busybox may interpret ':' as an NFS host name separator). This function
- * returns ubi volume object in case of success and a negative error code in
- * case of failure.
+ * returns UBI volume description object in case of success and a negative
+ * error code in case of failure.
*/
static struct ubi_volume_desc *open_ubi(const char *name, int mode)
{
+ struct ubi_volume_desc *ubi;
int dev, vol;
char *endptr;
+ /* First, try to open using the device node path method */
+ ubi = ubi_open_volume_path(name, mode);
+ if (!IS_ERR(ubi))
+ return ubi;
+
+ /* Try the "nodev" method */
if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i')
return ERR_PTR(-EINVAL);
@@ -1970,12 +1977,14 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
*
* Read-ahead will be disabled because @c->bdi.ra_pages is 0.
*/
+ c->bdi.name = "ubifs",
c->bdi.capabilities = BDI_CAP_MAP_COPY;
c->bdi.unplug_io_fn = default_unplug_io_fn;
err = bdi_init(&c->bdi);
if (err)
goto out_close;
- err = bdi_register(&c->bdi, NULL, "ubifs");
+ err = bdi_register(&c->bdi, NULL, "ubifs_%d_%d",
+ c->vi.ubi_num, c->vi.vol_id);
if (err)
goto out_bdi;
@@ -1983,6 +1992,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
if (err)
goto out_bdi;
+ sb->s_bdi = &c->bdi;
sb->s_fs_info = c;
sb->s_magic = UBIFS_SUPER_MAGIC;
sb->s_blocksize = UBIFS_BLOCK_SIZE;
@@ -2036,8 +2046,8 @@ static int sb_test(struct super_block *sb, void *data)
return c->vi.cdev == *dev;
}
-static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
- const char *name, void *data, struct vfsmount *mnt)
+static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
+ const char *name, void *data)
{
struct ubi_volume_desc *ubi;
struct ubi_volume_info vi;
@@ -2053,9 +2063,9 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
*/
ubi = open_ubi(name, UBI_READONLY);
if (IS_ERR(ubi)) {
- ubifs_err("cannot open \"%s\", error %d",
- name, (int)PTR_ERR(ubi));
- return PTR_ERR(ubi);
+ dbg_err("cannot open \"%s\", error %d",
+ name, (int)PTR_ERR(ubi));
+ return ERR_CAST(ubi);
}
ubi_get_volume_info(ubi, &vi);
@@ -2068,9 +2078,11 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
}
if (sb->s_root) {
+ struct ubifs_info *c1 = sb->s_fs_info;
+
/* A new mount point for already mounted UBIFS */
dbg_gen("this ubi volume is already mounted");
- if ((flags ^ sb->s_flags) & MS_RDONLY) {
+ if (!!(flags & MS_RDONLY) != c1->ro_mount) {
err = -EBUSY;
goto out_deact;
}
@@ -2091,20 +2103,19 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
/* 'fill_super()' opens ubi again so we must close it here */
ubi_close_volume(ubi);
- simple_set_mnt(mnt, sb);
- return 0;
+ return dget(sb->s_root);
out_deact:
deactivate_locked_super(sb);
out_close:
ubi_close_volume(ubi);
- return err;
+ return ERR_PTR(err);
}
static struct file_system_type ubifs_fs_type = {
.name = "ubifs",
.owner = THIS_MODULE,
- .get_sb = ubifs_get_sb,
+ .mount = ubifs_mount,
.kill_sb = kill_anon_super,
};
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index f249f7b..ad9cf01 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -31,6 +31,7 @@
*/
#include <linux/crc32.h>
+#include <linux/slab.h>
#include "ubifs.h"
/*
@@ -1159,8 +1160,8 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
* o exact match, i.e. the found zero-level znode contains key @key, then %1
* is returned and slot number of the matched branch is stored in @n;
* o not exact match, which means that zero-level znode does not contain
- * @key, then %0 is returned and slot number of the closed branch is stored
- * in @n;
+ * @key, then %0 is returned and slot number of the closest branch is stored
+ * in @n;
* o @key is so small that it is even less than the lowest key of the
* leftmost zero-level node, then %0 is returned and %0 is stored in @n.
*
@@ -1176,6 +1177,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
unsigned long time = get_seconds();
dbg_tnc("search key %s", DBGKEY(key));
+ ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
znode = c->zroot.znode;
if (unlikely(!znode)) {
@@ -1433,7 +1435,7 @@ static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1)
* @lnum: LEB number is returned here
* @offs: offset is returned here
*
- * This function look up and reads node with key @key. The caller has to make
+ * This function looks up and reads node with key @key. The caller has to make
* sure the @node buffer is large enough to fit the node. Returns zero in case
* of success, %-ENOENT if the node was not found, and a negative error code in
* case of failure. The node location can be returned in @lnum and @offs.
@@ -2965,7 +2967,7 @@ static struct ubifs_znode *right_znode(struct ubifs_info *c,
*
* This function searches an indexing node by its first key @key and its
* address @lnum:@offs. It looks up the indexing tree by pulling all indexing
- * nodes it traverses to TNC. This function is called fro indexing nodes which
+ * nodes it traverses to TNC. This function is called for indexing nodes which
* were found on the media by scanning, for example when garbage-collecting or
* when doing in-the-gaps commit. This means that the indexing node which is
* looked for does not have to have exactly the same leftmost key @key, because
@@ -2987,6 +2989,8 @@ static struct ubifs_znode *lookup_znode(struct ubifs_info *c,
struct ubifs_znode *znode, *zn;
int n, nn;
+ ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
+
/*
* The arguments have probably been read off flash, so don't assume
* they are valid.
@@ -3268,3 +3272,73 @@ out_unlock:
mutex_unlock(&c->tnc_mutex);
return err;
}
+
+#ifdef CONFIG_UBIFS_FS_DEBUG
+
+/**
+ * dbg_check_inode_size - check if inode size is correct.
+ * @c: UBIFS file-system description object
+ * @inum: inode number
+ * @size: inode size
+ *
+ * This function makes sure that the inode size (@size) is correct and it does
+ * not have any pages beyond @size. Returns zero if the inode is OK, %-EINVAL
+ * if it has a data page beyond @size, and other negative error code in case of
+ * other errors.
+ */
+int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
+ loff_t size)
+{
+ int err, n;
+ union ubifs_key from_key, to_key, *key;
+ struct ubifs_znode *znode;
+ unsigned int block;
+
+ if (!S_ISREG(inode->i_mode))
+ return 0;
+ if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
+ return 0;
+
+ block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
+ data_key_init(c, &from_key, inode->i_ino, block);
+ highest_data_key(c, &to_key, inode->i_ino);
+
+ mutex_lock(&c->tnc_mutex);
+ err = ubifs_lookup_level0(c, &from_key, &znode, &n);
+ if (err < 0)
+ goto out_unlock;
+
+ if (err) {
+ err = -EINVAL;
+ key = &from_key;
+ goto out_dump;
+ }
+
+ err = tnc_next(c, &znode, &n);
+ if (err == -ENOENT) {
+ err = 0;
+ goto out_unlock;
+ }
+ if (err < 0)
+ goto out_unlock;
+
+ ubifs_assert(err == 0);
+ key = &znode->zbranch[n].key;
+ if (!key_in_range(c, key, &from_key, &to_key))
+ goto out_unlock;
+
+out_dump:
+ block = key_block(c, key);
+ ubifs_err("inode %lu has size %lld, but there are data at offset %lld "
+ "(data key %s)", (unsigned long)inode->i_ino, size,
+ ((loff_t)block) << UBIFS_BLOCK_SHIFT, DBGKEY(key));
+ dbg_dump_inode(c, inode);
+ dbg_dump_stack();
+ err = -EINVAL;
+
+out_unlock:
+ mutex_unlock(&c->tnc_mutex);
+ return err;
+}
+
+#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index fde8d12..53288e5 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -245,7 +245,7 @@ static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
* it is more comprehensive and less efficient than is needed for this
* purpose.
*/
- sleb = ubifs_scan(c, lnum, 0, c->ileb_buf);
+ sleb = ubifs_scan(c, lnum, 0, c->ileb_buf, 0);
c->ileb_len = 0;
if (IS_ERR(sleb))
return PTR_ERR(sleb);
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 3eee07e..191ca78 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -135,6 +135,13 @@
/* The key is always at the same position in all keyed nodes */
#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key)
+/* Garbage collector journal head number */
+#define UBIFS_GC_HEAD 0
+/* Base journal head number */
+#define UBIFS_BASE_HEAD 1
+/* Data journal head number */
+#define UBIFS_DATA_HEAD 2
+
/*
* LEB Properties Tree node types.
*
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 1bf01d8..381d6b2 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -28,6 +28,7 @@
#include <linux/fs.h>
#include <linux/err.h>
#include <linux/sched.h>
+#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
@@ -95,8 +96,9 @@
*/
#define BGT_NAME_PATTERN "ubifs_bgt%d_%d"
-/* Default write-buffer synchronization timeout in seconds */
-#define DEFAULT_WBUF_TIMEOUT_SECS 5
+/* Write-buffer synchronization timeout interval in seconds */
+#define WBUF_TIMEOUT_SOFTLIMIT 3
+#define WBUF_TIMEOUT_HARDLIMIT 5
/* Maximum possible inode number (only 32-bit inodes are supported now) */
#define MAX_INUM 0xFFFFFFFF
@@ -104,12 +106,10 @@
/* Number of non-data journal heads */
#define NONDATA_JHEADS_CNT 2
-/* Garbage collector head */
-#define GCHD 0
-/* Base journal head number */
-#define BASEHD 1
-/* First "general purpose" journal head */
-#define DATAHD 2
+/* Shorter names for journal head numbers for internal usage */
+#define GCHD UBIFS_GC_HEAD
+#define BASEHD UBIFS_BASE_HEAD
+#define DATAHD UBIFS_DATA_HEAD
/* 'No change' value for 'ubifs_change_lp()' */
#define LPROPS_NC 0x80000001
@@ -119,8 +119,12 @@
* in TNC. However, when replaying, it is handy to introduce fake "truncation"
* keys for truncation nodes because the code becomes simpler. So we define
* %UBIFS_TRUN_KEY type.
+ *
+ * But otherwise, out of the journal reply scope, the truncation keys are
+ * invalid.
*/
-#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT
+#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT
+#define UBIFS_INVALID_KEY UBIFS_KEY_TYPES_CNT
/*
* How much a directory entry/extended attribute entry adds to the parent/host
@@ -379,7 +383,7 @@ struct ubifs_gced_idx_leb {
* The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
* @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
* make sure @inode->i_size is always changed under @ui_mutex, because it
- * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock
+ * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock
* with 'ubifs_writepage()' (see file.c). All the other inode fields are
* changed under @ui_mutex, so they do not need "shadow" fields. Note, one
* could consider to rework locking and base it on "shadow" fields.
@@ -654,7 +658,8 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
* @delta: hard and soft timeouts delta (the timer expire inteval is @softlimit
* and @softlimit + @delta)
* @timer: write-buffer timer
- * @need_sync: it is set if its timer expired and needs sync
+ * @no_timer: non-zero if this write-buffer does not have a timer
+ * @need_sync: non-zero if the timer expired and the wbuf needs sync'ing
* @next_ino: points to the next position of the following inode number
* @inodes: stores the inode numbers of the nodes which are in wbuf
*
@@ -683,7 +688,8 @@ struct ubifs_wbuf {
ktime_t softlimit;
unsigned long long delta;
struct hrtimer timer;
- int need_sync;
+ unsigned int no_timer:1;
+ unsigned int need_sync:1;
int next_ino;
ino_t *inodes;
};
@@ -1026,6 +1032,8 @@ struct ubifs_debug_info;
* @max_leb_cnt: maximum count of logical eraseblocks
* @old_leb_cnt: count of logical eraseblocks before re-size
* @ro_media: the underlying UBI volume is read-only
+ * @ro_mount: the file-system was mounted as read-only
+ * @ro_error: UBIFS switched to R/O mode because an error happened
*
* @dirty_pg_cnt: number of dirty pages (not used)
* @dirty_zn_cnt: number of dirty znodes
@@ -1166,11 +1174,14 @@ struct ubifs_debug_info;
* @replay_sqnum: sequence number of node currently being replayed
* @need_recovery: file-system needs recovery
* @replaying: set to %1 during journal replay
- * @unclean_leb_list: LEBs to recover when mounting ro to rw
- * @rcvrd_mst_node: recovered master node to write when mounting ro to rw
+ * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W
+ * mode
+ * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted
+ * FS to R/W mode
* @size_tree: inode size information for recovery
- * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY)
- * @always_chk_crc: always check CRCs (while mounting and remounting rw)
+ * @remounting_rw: set while re-mounting from R/O mode to R/W mode
+ * @always_chk_crc: always check CRCs (while mounting and remounting to R/W
+ * mode)
* @mount_opts: UBIFS-specific mount options
*
* @dbg: debugging-related information
@@ -1266,7 +1277,9 @@ struct ubifs_info {
int leb_cnt;
int max_leb_cnt;
int old_leb_cnt;
- int ro_media;
+ unsigned int ro_media:1;
+ unsigned int ro_mount:1;
+ unsigned int ro_error:1;
atomic_long_t dirty_pg_cnt;
atomic_long_t dirty_zn_cnt;
@@ -1448,7 +1461,7 @@ int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode);
/* scan.c */
struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
- int offs, void *sbuf);
+ int offs, void *sbuf, int quiet);
void ubifs_scan_destroy(struct ubifs_scan_leb *sleb);
int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
int offs, int quiet);
@@ -1573,7 +1586,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
int ubifs_tnc_end_commit(struct ubifs_info *c);
/* shrinker.c */
-int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask);
+int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask);
/* commit.c */
int ubifs_bg_thread(void *info);
@@ -1673,9 +1686,10 @@ const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c);
const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c);
const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c);
const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c);
+int ubifs_calc_dark(const struct ubifs_info *c, int spc);
/* file.c */
-int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync);
+int ubifs_fsync(struct file *file, int datasync);
int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
/* dir.c */
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index cfd31e2..c74400f 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -55,9 +55,10 @@
* ACL support is not implemented.
*/
+#include "ubifs.h"
+#include <linux/slab.h>
#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
-#include "ubifs.h"
/*
* Limit the number of extended attributes per inode so that the total size
@@ -78,9 +79,9 @@ enum {
SECURITY_XATTR,
};
-static struct inode_operations none_inode_operations;
-static struct address_space_operations none_address_operations;
-static struct file_operations none_file_operations;
+static const struct inode_operations none_inode_operations;
+static const struct address_space_operations none_address_operations;
+static const struct file_operations none_file_operations;
/**
* create_xattr - create an extended attribute.