From 9b945d537db86557e5b5820b4b52df94c35b3829 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 10 Oct 2009 22:58:10 +0900 Subject: nilfs2: get rid of BUG_ON use in btree lookup routines The current btree lookup routines make a kernel oops when detected inconsistency in btree blocks. These routines should instead return a proper error code because the inconsistency usually comes from corruption of on-disk metadata. This fixes the issue by converting BUG_ON calls to proper error handlings. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index e25b507..420c9ec 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -444,6 +444,18 @@ nilfs_btree_get_node(const struct nilfs_btree *btree, nilfs_btree_get_nonroot_node(path, level); } +static inline int +nilfs_btree_bad_node(struct nilfs_btree_node *node, int level) +{ + if (unlikely(nilfs_btree_node_get_level(node) != level)) { + dump_stack(); + printk(KERN_CRIT "NILFS: btree level mismatch: %d != %d\n", + nilfs_btree_node_get_level(node), level); + return 1; + } + return 0; +} + static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, struct nilfs_btree_path *path, __u64 key, __u64 *ptrp, int minlevel) @@ -467,7 +479,8 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(path, level); - BUG_ON(level != nilfs_btree_node_get_level(node)); + if (nilfs_btree_bad_node(node, level)) + return -EINVAL; if (!found) found = nilfs_btree_node_lookup(node, key, &index); else @@ -512,7 +525,8 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(path, level); - BUG_ON(level != nilfs_btree_node_get_level(node)); + if (nilfs_btree_bad_node(node, level)) + return -EINVAL; index = nilfs_btree_node_get_nchildren(node) - 1; ptr = nilfs_btree_node_get_ptr(btree, node, index); path[level].bp_index = index; -- cgit v0.10.2 From 6600b9dd8e0d4a60c610f216b78d992a598bc52a Mon Sep 17 00:00:00 2001 From: Jiro SEKIBA Date: Mon, 9 Nov 2009 19:10:11 +0900 Subject: nilfs2: move definition of struct nilfs_btree_node This is a trivial patch to expose struct nilfs_fs_btree_node. The struct should be exposed outside of kernel, for it is disk format. Signed-off-by: Jiro SEKIBA Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 0e72bbb..4b82d84 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -34,28 +34,6 @@ struct nilfs_btree; struct nilfs_btree_path; /** - * struct nilfs_btree_node - B-tree node - * @bn_flags: flags - * @bn_level: level - * @bn_nchildren: number of children - * @bn_pad: padding - */ -struct nilfs_btree_node { - __u8 bn_flags; - __u8 bn_level; - __le16 bn_nchildren; - __le32 bn_pad; -}; - -/* flags */ -#define NILFS_BTREE_NODE_ROOT 0x01 - -/* level */ -#define NILFS_BTREE_LEVEL_DATA 0 -#define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) -#define NILFS_BTREE_LEVEL_MAX 14 - -/** * struct nilfs_btree - B-tree structure * @bt_bmap: bmap base structure */ diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index ce52040..72289d2 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -403,6 +403,28 @@ struct nilfs_segment_summary { #define NILFS_SS_GC 0x0010 /* segment written for cleaner operation */ /** + * struct nilfs_btree_node - B-tree node + * @bn_flags: flags + * @bn_level: level + * @bn_nchildren: number of children + * @bn_pad: padding + */ +struct nilfs_btree_node { + __u8 bn_flags; + __u8 bn_level; + __le16 bn_nchildren; + __le32 bn_pad; +}; + +/* flags */ +#define NILFS_BTREE_NODE_ROOT 0x01 + +/* level */ +#define NILFS_BTREE_LEVEL_DATA 0 +#define NILFS_BTREE_LEVEL_NODE_MIN (NILFS_BTREE_LEVEL_DATA + 1) +#define NILFS_BTREE_LEVEL_MAX 14 + +/** * struct nilfs_palloc_group_desc - block group descriptor * @pg_nfrees: number of free entries in block group */ -- cgit v0.10.2 From 91f1953bf3243a4215b57d8e2f317a7035924de7 Mon Sep 17 00:00:00 2001 From: Jiro SEKIBA Date: Thu, 12 Nov 2009 14:07:26 +0900 Subject: nilfs2: Using nobarrier option instead of barrier=off Since most of fs using nofoobar style option, modified barrier=off option as nobarrier. Signed-off-by: Jiro SEKIBA Signed-off-by: Ryusuke Konishi diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index 01539f4..cbd8779 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt @@ -49,8 +49,7 @@ Mount options NILFS2 supports the following mount options: (*) == default -barrier=on(*) This enables/disables barriers. barrier=off disables - it, barrier=on enables it. +nobarrier Disables barriers. errors=continue(*) Keep going on a filesystem error. errors=remount-ro Remount the filesystem read-only on an error. errors=panic Panic and halt the machine if an error occurs. diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 644e667..02dcbb0 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -490,7 +490,7 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) struct nilfs_sb_info *sbi = NILFS_SB(sb); if (!nilfs_test_opt(sbi, BARRIER)) - seq_printf(seq, ",barrier=off"); + seq_printf(seq, ",nobarrier"); if (nilfs_test_opt(sbi, SNAPSHOT)) seq_printf(seq, ",cp=%llu", (unsigned long long int)sbi->s_snapshot_cno); @@ -568,7 +568,7 @@ static const struct export_operations nilfs_export_ops = { enum { Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_barrier, Opt_snapshot, Opt_order, + Opt_nobarrier, Opt_snapshot, Opt_order, Opt_err, }; @@ -576,7 +576,7 @@ static match_table_t tokens = { {Opt_err_cont, "errors=continue"}, {Opt_err_panic, "errors=panic"}, {Opt_err_ro, "errors=remount-ro"}, - {Opt_barrier, "barrier=%s"}, + {Opt_nobarrier, "nobarrier"}, {Opt_snapshot, "cp=%u"}, {Opt_order, "order=%s"}, {Opt_err, NULL} @@ -612,13 +612,8 @@ static int parse_options(char *options, struct super_block *sb) token = match_token(p, tokens, args); switch (token) { - case Opt_barrier: - if (match_bool(&args[0], &option)) - return 0; - if (option) - nilfs_set_opt(sbi, BARRIER); - else - nilfs_clear_opt(sbi, BARRIER); + case Opt_nobarrier: + nilfs_clear_opt(sbi, BARRIER); break; case Opt_order: if (strcmp(args[0].from, "relaxed") == 0) -- cgit v0.10.2 From e2073e78575e3690ea3cce67b11b7b1de8e85fd3 Mon Sep 17 00:00:00 2001 From: Jiro SEKIBA Date: Thu, 12 Nov 2009 14:07:27 +0900 Subject: nilfs2: cleanup unused match_bool function match_bool function is not used anymore. Signed-off-by: Jiro SEKIBA Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 02dcbb0..b6837f4 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -582,19 +582,6 @@ static match_table_t tokens = { {Opt_err, NULL} }; -static int match_bool(substring_t *s, int *result) -{ - int len = s->to - s->from; - - if (strncmp(s->from, "on", len) == 0) - *result = 1; - else if (strncmp(s->from, "off", len) == 0) - *result = 0; - else - return 1; - return 0; -} - static int parse_options(char *options, struct super_block *sb) { struct nilfs_sb_info *sbi = NILFS_SB(sb); -- cgit v0.10.2 From a49762fd119d191dcbb2f638a2dbc2ed53f4e2bb Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 11 Nov 2009 01:28:48 +0900 Subject: nilfs2: remove buffer locking in nilfs_mark_inode_dirty This lock is eliminable because inodes on the buffer can be updated independently. Although a log writer also fills in bmap data on the on-disk inodes, this update is exclusively done by a log writer lock. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 2a0a5a3..412b25a 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -525,7 +525,6 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh); - /* The buffer is guarded with lock_buffer() by the caller */ if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size); set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); @@ -745,9 +744,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) "failed to reget inode block.\n"); return err; } - lock_buffer(ibh); nilfs_update_inode(inode, ibh); - unlock_buffer(ibh); nilfs_mdt_mark_buffer_dirty(ibh); nilfs_mdt_mark_dirty(sbi->s_ifile); brelse(ibh); -- cgit v0.10.2 From 30db4e6c3d51a89e4923e525303f714e6508bbd0 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 11 Nov 2009 22:37:59 +0900 Subject: nilfs2: remove buffer locking from btree code lock_buffer() and unlock_buffer() uses in btree.c are eliminable because btree functions gain buffer heads through nilfs_btnode_get(), which never returns an on-the-fly buffer. Although nilfs_clear_dirty_page() and nilfs_copy_back_pages() in nilfs_commit_gcdat_inode() juggle btree node buffers of DAT, this is safe because these operations are protected by a log writer lock or the metadata file semaphore of DAT. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 420c9ec..c9aab29 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -652,13 +652,11 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree, { if (level < nilfs_btree_height(btree) - 1) { do { - lock_buffer(path[level].bp_bh); nilfs_btree_node_set_key( nilfs_btree_get_nonroot_node(path, level), path[level].bp_index, key); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); - unlock_buffer(path[level].bp_bh); } while ((path[level].bp_index == 0) && (++level < nilfs_btree_height(btree) - 1)); } @@ -677,13 +675,11 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree, struct nilfs_btree_node *node; if (level < nilfs_btree_height(btree) - 1) { - lock_buffer(path[level].bp_bh); node = nilfs_btree_get_nonroot_node(path, level); nilfs_btree_node_insert(btree, node, *keyp, *ptrp, path[level].bp_index); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); - unlock_buffer(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, @@ -703,9 +699,6 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, struct nilfs_btree_node *node, *left; int nchildren, lnchildren, n, move; - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); @@ -726,9 +719,6 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); @@ -754,9 +744,6 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree, struct nilfs_btree_node *node, *right; int nchildren, rnchildren, n, move; - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); @@ -777,9 +764,6 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(right, 0)); @@ -808,9 +792,6 @@ static void nilfs_btree_split(struct nilfs_btree *btree, __u64 newptr; int nchildren, n, move; - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); @@ -829,9 +810,6 @@ static void nilfs_btree_split(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - newkey = nilfs_btree_node_get_key(right, 0); newptr = path[level].bp_newreq.bpr_ptr; @@ -866,8 +844,6 @@ static void nilfs_btree_grow(struct nilfs_btree *btree, struct nilfs_btree_node *root, *child; int n; - lock_buffer(path[level].bp_sib_bh); - root = nilfs_btree_get_root(btree); child = nilfs_btree_get_sib_node(path, level); @@ -879,8 +855,6 @@ static void nilfs_btree_grow(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_sib_bh); - path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; @@ -1037,11 +1011,9 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, stats->bs_nblocks++; - lock_buffer(bh); nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data, 0, level, 0, NULL, NULL); - unlock_buffer(bh); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_split; } @@ -1066,10 +1038,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, if (ret < 0) goto err_out_curr_node; - lock_buffer(bh); nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data, 0, level, 0, NULL, NULL); - unlock_buffer(bh); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_grow; @@ -1168,13 +1138,11 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree, struct nilfs_btree_node *node; if (level < nilfs_btree_height(btree) - 1) { - lock_buffer(path[level].bp_bh); node = nilfs_btree_get_nonroot_node(path, level); nilfs_btree_node_delete(btree, node, keyp, ptrp, path[level].bp_index); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); - unlock_buffer(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); @@ -1194,9 +1162,6 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree, nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); @@ -1211,9 +1176,6 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); @@ -1231,9 +1193,6 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree, nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); @@ -1248,9 +1207,6 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(right, 0)); @@ -1269,9 +1225,6 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree, nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); @@ -1282,9 +1235,6 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; @@ -1300,9 +1250,6 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree, nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); - lock_buffer(path[level].bp_sib_bh); - node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); @@ -1313,9 +1260,6 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree, if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); - unlock_buffer(path[level].bp_bh); - unlock_buffer(path[level].bp_sib_bh); - nilfs_btnode_delete(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level + 1].bp_index++; @@ -1330,7 +1274,6 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree, nilfs_btree_do_delete(btree, path, level, keyp, ptrp); - lock_buffer(path[level].bp_bh); root = nilfs_btree_get_root(btree); child = nilfs_btree_get_nonroot_node(path, level); @@ -1338,7 +1281,6 @@ static void nilfs_btree_shrink(struct nilfs_btree *btree, nilfs_btree_node_set_level(root, level); n = nilfs_btree_node_get_nchildren(child); nilfs_btree_node_move_left(btree, root, child, n); - unlock_buffer(path[level].bp_bh); nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = NULL; @@ -1713,7 +1655,6 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat); /* create child node at level 1 */ - lock_buffer(bh); node = (struct nilfs_btree_node *)bh->b_data; nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs); nilfs_btree_node_insert(btree, node, @@ -1723,7 +1664,6 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, if (!nilfs_bmap_dirty(bmap)) nilfs_bmap_set_dirty(bmap); - unlock_buffer(bh); brelse(bh); /* create root node at level 2 */ -- cgit v0.10.2 From 09bf4aae0a3c471b721c43e7bdb6132200d907b2 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 5 Nov 2009 15:53:27 +0900 Subject: nilfs2: stop marking metadata inode dirty within btree operations Since metadata file routines mark the inode dirty after they successfully changed bmap objects, nilfs_mdt_mark_dirty() calls in nilfs_bmap_add_blocks() and nilfs_bmap_sub_blocks() are redundant. This removes these overlapping calls from the bmap routines. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 08834df..afc060a 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -402,18 +402,14 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n) { inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); - if (NILFS_MDT(bmap->b_inode)) - nilfs_mdt_mark_dirty(bmap->b_inode); - else + if (!NILFS_MDT(bmap->b_inode)) mark_inode_dirty(bmap->b_inode); } void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n) { inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); - if (NILFS_MDT(bmap->b_inode)) - nilfs_mdt_mark_dirty(bmap->b_inode); - else + if (!NILFS_MDT(bmap->b_inode)) mark_inode_dirty(bmap->b_inode); } -- cgit v0.10.2 From 9cb4e0d2b99e8b0e5e269d898ae6ab1967647c5a Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 6 Nov 2009 01:00:48 +0900 Subject: nilfs2: move out mark_inode_dirty calls from bmap routines Previously, nilfs_bmap_add_blocks() and nilfs_bmap_sub_blocks() called mark_inode_dirty() after they changed the number of data blocks. This moves these calls outside bmap outermost functions like nilfs_bmap_insert() or nilfs_bmap_truncate(). This will mitigate overhead for truncate or delete operation since they repeatedly remove set of blocks. Nearly 10 percent improvement was observed for removal of a large file: # dd if=/dev/zero of=/test/aaa bs=1M count=512 # time rm /test/aaa real 2.968s -> 2.705s Further optimization may be possible by eliminating these mark_inode_dirty() uses though I avoid mixing separate changes here. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index afc060a..f4a14ea 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -402,15 +402,11 @@ int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *bmap) void nilfs_bmap_add_blocks(const struct nilfs_bmap *bmap, int n) { inode_add_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); - if (!NILFS_MDT(bmap->b_inode)) - mark_inode_dirty(bmap->b_inode); } void nilfs_bmap_sub_blocks(const struct nilfs_bmap *bmap, int n) { inode_sub_bytes(bmap->b_inode, (1 << bmap->b_inode->i_blkbits) * n); - if (!NILFS_MDT(bmap->b_inode)) - mark_inode_dirty(bmap->b_inode); } __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap, diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 412b25a..a16c179 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -97,6 +97,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, nilfs_transaction_abort(inode->i_sb); goto out; } + mark_inode_dirty(inode); nilfs_transaction_commit(inode->i_sb); /* never fails */ /* Error handling should be detailed */ set_buffer_new(bh_result); @@ -598,6 +599,7 @@ void nilfs_truncate(struct inode *inode) if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); + mark_inode_dirty(inode); nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); nilfs_transaction_commit(sb); /* May construct a logical segment and may fail in sync mode. @@ -622,6 +624,7 @@ void nilfs_delete_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); nilfs_truncate_bmap(ii, 0); + mark_inode_dirty(inode); nilfs_free_inode(inode); /* nilfs_free_inode() marks inode buffer dirty */ if (IS_SYNC(inode)) -- cgit v0.10.2 From 5731e191f254af9135ad843119804a500528ecf3 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 12 Nov 2009 22:42:04 +0900 Subject: nilfs2: add size option of private object to metadata file allocator This adds an optional "object size" argument to nilfs_mdt_new_common() function; the argument specifies the size of private object attached to a newly allocated metadata file inode. This will afford space to keep local variables for meta data files. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index e6de0a2..32b04da 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -212,9 +212,10 @@ void nilfs_destroy_gccache(struct the_nilfs *nilfs) static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino, __u64 cno) { - struct inode *inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS); + struct inode *inode; struct nilfs_inode_info *ii; + inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS, 0); if (!inode) return NULL; diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index f632611..62074e8 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -445,9 +445,17 @@ static const struct file_operations def_mdt_fops; * longer than those of the super block structs; they may continue for * several consecutive mounts/umounts. This would need discussions. */ +/** + * nilfs_mdt_new_common - allocate a pseudo inode for metadata file + * @nilfs: nilfs object + * @sb: super block instance the metadata file belongs to + * @ino: inode number + * @gfp_mask: gfp mask for data pages + * @objsz: size of the private object attached to inode->i_private + */ struct inode * nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, - ino_t ino, gfp_t gfp_mask) + ino_t ino, gfp_t gfp_mask, size_t objsz) { struct inode *inode = nilfs_alloc_inode_common(nilfs); @@ -455,8 +463,9 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, return NULL; else { struct address_space * const mapping = &inode->i_data; - struct nilfs_mdt_info *mi = kzalloc(sizeof(*mi), GFP_NOFS); + struct nilfs_mdt_info *mi; + mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS); if (!mi) { nilfs_destroy_inode(inode); return NULL; @@ -513,11 +522,11 @@ nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, } struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, - ino_t ino) + ino_t ino, size_t objsz) { - struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, - NILFS_MDT_GFP); + struct inode *inode; + inode = nilfs_mdt_new_common(nilfs, sb, ino, NILFS_MDT_GFP, objsz); if (!inode) return NULL; diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index 4315997..3eb40e8 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -74,9 +74,10 @@ int nilfs_mdt_forget_block(struct inode *, unsigned long); int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); int nilfs_mdt_fetch_dirty(struct inode *); -struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t); +struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t, + size_t); struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, - ino_t, gfp_t); + ino_t, gfp_t, size_t); void nilfs_mdt_destroy(struct inode *); void nilfs_mdt_clear(struct inode *); void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index b6837f4..77f2e47 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -363,7 +363,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) list_add(&sbi->s_list, &nilfs->ns_supers); up_write(&nilfs->ns_super_sem); - sbi->s_ifile = nilfs_mdt_new(nilfs, sbi->s_super, NILFS_IFILE_INO); + sbi->s_ifile = nilfs_mdt_new(nilfs, sbi->s_super, NILFS_IFILE_INO, 0); if (!sbi->s_ifile) return -ENOMEM; diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index ad391a8..a80bbb7 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -187,19 +187,19 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, inode_size = nilfs->ns_inode_size; err = -ENOMEM; - nilfs->ns_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO); + nilfs->ns_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, 0); if (unlikely(!nilfs->ns_dat)) goto failed; - nilfs->ns_gc_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO); + nilfs->ns_gc_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, 0); if (unlikely(!nilfs->ns_gc_dat)) goto failed_dat; - nilfs->ns_cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO); + nilfs->ns_cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO, 0); if (unlikely(!nilfs->ns_cpfile)) goto failed_gc_dat; - nilfs->ns_sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO); + nilfs->ns_sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO, 0); if (unlikely(!nilfs->ns_sufile)) goto failed_cpfile; -- cgit v0.10.2 From 79739565e15f2adbc482207a0800fc127c84d1a0 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 12 Nov 2009 23:56:43 +0900 Subject: nilfs2: separate constructor of metadata files This will displace nilfs_mdt_new() constructor with individual metadata file constructors like nilfs_dat_new(), new_sufile_new(), nilfs_cpfile_new(), and nilfs_ifile_new(). This makes it possible for each metadata file to have own intialization code. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 3f5d5d0..2aaefae 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -926,3 +926,19 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) up_read(&NILFS_MDT(cpfile)->mi_sem); return ret; } + +/** + * nilfs_cpfile_new - create cpfile + * @nilfs: nilfs object + * @cpsize: size of a checkpoint entry + */ +struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize) +{ + struct inode *cpfile; + + cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO, 0); + if (cpfile) + nilfs_mdt_set_entry_size(cpfile, cpsize, + sizeof(struct nilfs_cpfile_header)); + return cpfile; +} diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index debea89..8ff2b4f 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -40,4 +40,6 @@ int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned, size_t); +struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize); + #endif /* _NILFS_CPFILE_H */ diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 1ff8e15..239a422 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -425,3 +425,26 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, return nvi; } + +/** + * nilfs_dat_new - create dat file + * @nilfs: nilfs object + * @entry_size: size of a dat entry + */ +struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size) +{ + static struct lock_class_key dat_lock_key; + struct inode *dat; + int err; + + dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, 0); + if (dat) { + err = nilfs_palloc_init_blockgroup(dat, entry_size); + if (unlikely(err)) { + nilfs_mdt_destroy(dat); + return NULL; + } + lockdep_set_class(&NILFS_MDT(dat)->mi_sem, &dat_lock_key); + } + return dat; +} diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index 406070d..98f3306 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h @@ -53,4 +53,6 @@ int nilfs_dat_freev(struct inode *, __u64 *, size_t); int nilfs_dat_move(struct inode *, __u64, sector_t); ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); +struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size); + #endif /* _NILFS_DAT_H */ diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index de86401..c1c1fd3 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -148,3 +148,24 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, } return err; } + +/** + * nilfs_ifile_new - create inode file + * @sbi: nilfs_sb_info struct + * @inode_size: size of an inode + */ +struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size) +{ + struct inode *ifile; + int err; + + ifile = nilfs_mdt_new(sbi->s_nilfs, sbi->s_super, NILFS_IFILE_INO, 0); + if (ifile) { + err = nilfs_palloc_init_blockgroup(ifile, inode_size); + if (unlikely(err)) { + nilfs_mdt_destroy(ifile); + return NULL; + } + } + return ifile; +} diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index ecc3ba76..cbca32e 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -49,4 +49,6 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); int nilfs_ifile_delete_inode(struct inode *, ino_t); int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); +struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size); + #endif /* _NILFS_IFILE_H */ diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 37994d4..6fb707a 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -657,3 +657,19 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, up_read(&NILFS_MDT(sufile)->mi_sem); return ret; } + +/** + * nilfs_sufile_new - create sufile + * @nilfs: nilfs object + * @susize: size of a segment usage entry + */ +struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize) +{ + struct inode *sufile; + + sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO, 0); + if (sufile) + nilfs_mdt_set_entry_size(sufile, susize, + sizeof(struct nilfs_sufile_header)); + return sufile; +} diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 0e99e5c..50d3191 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -62,6 +62,8 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, struct buffer_head *); +struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize); + /** * nilfs_sufile_scrap - make a segment garbage * @sufile: inode of segment usage file diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 77f2e47..05ae52a 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -363,14 +363,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) list_add(&sbi->s_list, &nilfs->ns_supers); up_write(&nilfs->ns_super_sem); - sbi->s_ifile = nilfs_mdt_new(nilfs, sbi->s_super, NILFS_IFILE_INO, 0); + sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size); if (!sbi->s_ifile) return -ENOMEM; - err = nilfs_palloc_init_blockgroup(sbi->s_ifile, nilfs->ns_inode_size); - if (unlikely(err)) - goto failed; - down_read(&nilfs->ns_segctor_sem); err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, &bh_cp); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index a80bbb7..d4a731f 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -166,7 +166,6 @@ void put_nilfs(struct the_nilfs *nilfs) static int nilfs_load_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, sector_t sr_block) { - static struct lock_class_key dat_lock_key; struct buffer_head *bh_sr; struct nilfs_super_root *raw_sr; struct nilfs_super_block **sbp = nilfs->ns_sbp; @@ -187,38 +186,23 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, inode_size = nilfs->ns_inode_size; err = -ENOMEM; - nilfs->ns_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, 0); + nilfs->ns_dat = nilfs_dat_new(nilfs, dat_entry_size); if (unlikely(!nilfs->ns_dat)) goto failed; - nilfs->ns_gc_dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, 0); + nilfs->ns_gc_dat = nilfs_dat_new(nilfs, dat_entry_size); if (unlikely(!nilfs->ns_gc_dat)) goto failed_dat; - nilfs->ns_cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO, 0); + nilfs->ns_cpfile = nilfs_cpfile_new(nilfs, checkpoint_size); if (unlikely(!nilfs->ns_cpfile)) goto failed_gc_dat; - nilfs->ns_sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO, 0); + nilfs->ns_sufile = nilfs_sufile_new(nilfs, segment_usage_size); if (unlikely(!nilfs->ns_sufile)) goto failed_cpfile; - err = nilfs_palloc_init_blockgroup(nilfs->ns_dat, dat_entry_size); - if (unlikely(err)) - goto failed_sufile; - - err = nilfs_palloc_init_blockgroup(nilfs->ns_gc_dat, dat_entry_size); - if (unlikely(err)) - goto failed_sufile; - - lockdep_set_class(&NILFS_MDT(nilfs->ns_dat)->mi_sem, &dat_lock_key); - lockdep_set_class(&NILFS_MDT(nilfs->ns_gc_dat)->mi_sem, &dat_lock_key); - nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); - nilfs_mdt_set_entry_size(nilfs->ns_cpfile, checkpoint_size, - sizeof(struct nilfs_cpfile_header)); - nilfs_mdt_set_entry_size(nilfs->ns_sufile, segment_usage_size, - sizeof(struct nilfs_sufile_header)); err = nilfs_mdt_read_inode_direct( nilfs->ns_dat, bh_sr, NILFS_SR_DAT_OFFSET(inode_size)); -- cgit v0.10.2 From 8707df38478c8e0958b706f0ea1cdf99d00a9469 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 13 Nov 2009 01:36:56 +0900 Subject: nilfs2: separate read method of meta data files on super root block Will displace nilfs_mdt_read_inode_direct function with an individual read method: nilfs_dat_read, nilfs_sufile_read, nilfs_cpfile_read. This provides the opportunity to initialize local variables of each metadata file after reading the inode. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 2aaefae..d5ad54e 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -928,6 +928,16 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) } /** + * nilfs_cpfile_read - read cpfile inode + * @cpfile: cpfile inode + * @raw_inode: on-disk cpfile inode + */ +int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode) +{ + return nilfs_read_inode_common(cpfile, raw_inode); +} + +/** * nilfs_cpfile_new - create cpfile * @nilfs: nilfs object * @cpsize: size of a checkpoint entry diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index 8ff2b4f..bc0809e 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -40,6 +40,7 @@ int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned, size_t); +int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode); struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize); #endif /* _NILFS_CPFILE_H */ diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 239a422..eff3169 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -427,6 +427,16 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, } /** + * nilfs_dat_read - read dat inode + * @dat: dat inode + * @raw_inode: on-disk dat inode + */ +int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode) +{ + return nilfs_read_inode_common(dat, raw_inode); +} + +/** * nilfs_dat_new - create dat file * @nilfs: nilfs object * @entry_size: size of a dat entry diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index 98f3306..d31c3aa 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h @@ -53,6 +53,7 @@ int nilfs_dat_freev(struct inode *, __u64 *, size_t); int nilfs_dat_move(struct inode *, __u64, sector_t); ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); +int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode); struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size); #endif /* _NILFS_DAT_H */ diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 6fb707a..b344f27 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -659,6 +659,16 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, } /** + * nilfs_sufile_read - read sufile inode + * @sufile: sufile inode + * @raw_inode: on-disk sufile inode + */ +int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode) +{ + return nilfs_read_inode_common(sufile, raw_inode); +} + +/** * nilfs_sufile_new - create sufile * @nilfs: nilfs object * @susize: size of a segment usage entry diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 50d3191..b303a80 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -62,6 +62,7 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, struct buffer_head *); +int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode); struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize); /** diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index d4a731f..bc7760c 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -204,18 +204,18 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); - err = nilfs_mdt_read_inode_direct( - nilfs->ns_dat, bh_sr, NILFS_SR_DAT_OFFSET(inode_size)); + err = nilfs_dat_read(nilfs->ns_dat, (void *)bh_sr->b_data + + NILFS_SR_DAT_OFFSET(inode_size)); if (unlikely(err)) goto failed_sufile; - err = nilfs_mdt_read_inode_direct( - nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(inode_size)); + err = nilfs_cpfile_read(nilfs->ns_cpfile, (void *)bh_sr->b_data + + NILFS_SR_CPFILE_OFFSET(inode_size)); if (unlikely(err)) goto failed_sufile; - err = nilfs_mdt_read_inode_direct( - nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(inode_size)); + err = nilfs_sufile_read(nilfs->ns_sufile, (void *)bh_sr->b_data + + NILFS_SR_SUFILE_OFFSET(inode_size)); if (unlikely(err)) goto failed_sufile; -- cgit v0.10.2 From 3961f0e2775f84a8f81b0dcddb0b356ebfe0696b Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 13 Nov 2009 01:55:02 +0900 Subject: nilfs2: eliminate inlines to directly read/write inode of metadata files Removes two inline functions: nilfs_mdt_read_inode_direct() and nilfs_mdt_write_inode_direct(). Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index 3eb40e8..cd2903a 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -105,21 +105,4 @@ static inline __u64 nilfs_mdt_cno(struct inode *inode) #define nilfs_mdt_bgl_lock(inode, bg) \ (&NILFS_MDT(inode)->mi_bgl->locks[(bg) & (NR_BG_LOCKS-1)].lock) - -static inline int -nilfs_mdt_read_inode_direct(struct inode *inode, struct buffer_head *bh, - unsigned n) -{ - return nilfs_read_inode_common( - inode, (struct nilfs_inode *)(bh->b_data + n)); -} - -static inline void -nilfs_mdt_write_inode_direct(struct inode *inode, struct buffer_head *bh, - unsigned n) -{ - nilfs_write_inode_common( - inode, (struct nilfs_inode *)(bh->b_data + n), 1); -} - #endif /* _NILFS_MDT_H */ diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 6eff66a..d21179b 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -974,12 +974,12 @@ static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, nilfs->ns_nongc_ctime : sci->sc_seg_ctime); raw_sr->sr_flags = 0; - nilfs_mdt_write_inode_direct( - nilfs_dat_inode(nilfs), bh_sr, NILFS_SR_DAT_OFFSET(isz)); - nilfs_mdt_write_inode_direct( - nilfs->ns_cpfile, bh_sr, NILFS_SR_CPFILE_OFFSET(isz)); - nilfs_mdt_write_inode_direct( - nilfs->ns_sufile, bh_sr, NILFS_SR_SUFILE_OFFSET(isz)); + nilfs_write_inode_common(nilfs_dat_inode(nilfs), (void *)raw_sr + + NILFS_SR_DAT_OFFSET(isz), 1); + nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr + + NILFS_SR_CPFILE_OFFSET(isz), 1); + nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + + NILFS_SR_SUFILE_OFFSET(isz), 1); } static void nilfs_redirty_inodes(struct list_head *head) -- cgit v0.10.2 From fd66c0d5c377ee8146909d0eb9258539e4b0f293 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 13 Nov 2009 02:25:41 +0900 Subject: nilfs2: hide nilfs_mdt_clear calls in nilfs_mdt_destroy This will hide a function call of nilfs_mdt_clear() in nilfs_mdt_destroy(). This ensures nilfs_mdt_destroy() to do cleanup jobs included in nilfs_mdt_clear(). Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 32b04da..67d2099 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -266,7 +266,6 @@ struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno) */ void nilfs_clear_gcinode(struct inode *inode) { - nilfs_mdt_clear(inode); nilfs_mdt_destroy(inode); } diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 62074e8..3028e8f 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -553,14 +553,15 @@ void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow) &NILFS_I(orig)->i_btnode_cache; } -void nilfs_mdt_clear(struct inode *inode) +static void nilfs_mdt_clear(struct inode *inode) { struct nilfs_inode_info *ii = NILFS_I(inode); invalidate_mapping_pages(inode->i_mapping, 0, -1); truncate_inode_pages(inode->i_mapping, 0); - nilfs_bmap_clear(ii->i_bmap); + if (test_bit(NILFS_I_BMAP, &ii->i_state)) + nilfs_bmap_clear(ii->i_bmap); nilfs_btnode_cache_clear(&ii->i_btnode_cache); } @@ -568,6 +569,8 @@ void nilfs_mdt_destroy(struct inode *inode) { struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + nilfs_mdt_clear(inode); + kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ kfree(mdi); nilfs_destroy_inode(inode); diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index cd2903a..c396b6c 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -79,7 +79,6 @@ struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t, struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, ino_t, gfp_t, size_t); void nilfs_mdt_destroy(struct inode *); -void nilfs_mdt_clear(struct inode *); void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); void nilfs_mdt_set_shadow(struct inode *, struct inode *); diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 6dc8359..bcd386d 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -770,14 +770,8 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, nilfs_finish_roll_forward(nilfs, sbi, ri); } - nilfs_detach_checkpoint(sbi); - return 0; - failed: nilfs_detach_checkpoint(sbi); - nilfs_mdt_clear(nilfs->ns_cpfile); - nilfs_mdt_clear(nilfs->ns_sufile); - nilfs_mdt_clear(nilfs->ns_dat); return err; } diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 05ae52a..f526169 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -407,7 +407,6 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) { struct the_nilfs *nilfs = sbi->s_nilfs; - nilfs_mdt_clear(sbi->s_ifile); nilfs_mdt_destroy(sbi->s_ifile); sbi->s_ifile = NULL; down_write(&nilfs->ns_super_sem); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index bc7760c..75095ed 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -146,13 +146,9 @@ void put_nilfs(struct the_nilfs *nilfs) might_sleep(); if (nilfs_loaded(nilfs)) { - nilfs_mdt_clear(nilfs->ns_sufile); nilfs_mdt_destroy(nilfs->ns_sufile); - nilfs_mdt_clear(nilfs->ns_cpfile); nilfs_mdt_destroy(nilfs->ns_cpfile); - nilfs_mdt_clear(nilfs->ns_dat); nilfs_mdt_destroy(nilfs->ns_dat); - /* XXX: how and when to clear nilfs->ns_gc_dat? */ nilfs_mdt_destroy(nilfs->ns_gc_dat); } if (nilfs_init(nilfs)) { -- cgit v0.10.2 From 7b16c8a211c87d465c48ea324928f8057590b853 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 13 Nov 2009 03:10:21 +0900 Subject: nilfs2: unfold nilfs_sufile_block_get_header function This unfolds the nilfs_sufile_block_get_header() function for simplicity. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index b344f27..8834472 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -62,14 +62,6 @@ nilfs_sufile_segment_usages_in_block(const struct inode *sufile, __u64 curr, max - curr + 1); } -static inline struct nilfs_sufile_header * -nilfs_sufile_block_get_header(const struct inode *sufile, - struct buffer_head *bh, - void *kaddr) -{ - return kaddr + bh_offset(bh); -} - static struct nilfs_segment_usage * nilfs_sufile_block_get_segment_usage(const struct inode *sufile, __u64 segnum, struct buffer_head *bh, void *kaddr) @@ -270,7 +262,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) if (ret < 0) goto out_sem; kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); + header = kaddr + bh_offset(header_bh); ncleansegs = le64_to_cpu(header->sh_ncleansegs); last_alloc = le64_to_cpu(header->sh_last_alloc); kunmap_atomic(kaddr, KM_USER0); @@ -302,8 +294,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) kunmap_atomic(kaddr, KM_USER0); kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header( - sufile, header_bh, kaddr); + header = kaddr + bh_offset(header_bh); le64_add_cpu(&header->sh_ncleansegs, -1); le64_add_cpu(&header->sh_ndirtysegs, 1); header->sh_last_alloc = cpu_to_le64(segnum); @@ -515,7 +506,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) goto out_sem; kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = nilfs_sufile_block_get_header(sufile, header_bh, kaddr); + header = kaddr + bh_offset(header_bh); sustat->ss_nsegs = nilfs_sufile_get_nsegments(sufile); sustat->ss_ncleansegs = le64_to_cpu(header->sh_ncleansegs); sustat->ss_ndirtysegs = le64_to_cpu(header->sh_ndirtysegs); -- cgit v0.10.2 From aa474a220180d997caafcee372770d6ed6bf798a Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 13 Nov 2009 03:41:55 +0900 Subject: nilfs2: add local variable to cache the number of clean segments This makes it possible for sufile to get the number of clean segments faster. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 8834472..e9b4cec 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -31,6 +31,16 @@ #include "sufile.h" +struct nilfs_sufile_info { + struct nilfs_mdt_info mi; + unsigned long ncleansegs; +}; + +static inline struct nilfs_sufile_info *NILFS_SUI(struct inode *sufile) +{ + return (struct nilfs_sufile_info *)NILFS_MDT(sufile); +} + static inline unsigned long nilfs_sufile_segment_usages_per_block(const struct inode *sufile) { @@ -300,6 +310,7 @@ int nilfs_sufile_alloc(struct inode *sufile, __u64 *segnump) header->sh_last_alloc = cpu_to_le64(segnum); kunmap_atomic(kaddr, KM_USER0); + NILFS_SUI(sufile)->ncleansegs--; nilfs_mdt_mark_buffer_dirty(header_bh); nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); @@ -342,6 +353,8 @@ void nilfs_sufile_do_cancel_free(struct inode *sufile, __u64 segnum, kunmap_atomic(kaddr, KM_USER0); nilfs_sufile_mod_counter(header_bh, -1, 1); + NILFS_SUI(sufile)->ncleansegs--; + nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } @@ -371,6 +384,8 @@ void nilfs_sufile_do_scrap(struct inode *sufile, __u64 segnum, kunmap_atomic(kaddr, KM_USER0); nilfs_sufile_mod_counter(header_bh, clean ? (u64)-1 : 0, dirty ? 0 : 1); + NILFS_SUI(sufile)->ncleansegs -= clean; + nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } @@ -400,6 +415,8 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_sufile_mod_counter(header_bh, 1, sudirty ? (u64)-1 : 0); + NILFS_SUI(sufile)->ncleansegs++; + nilfs_mdt_mark_dirty(sufile); } @@ -541,13 +558,8 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) */ int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp) { - struct nilfs_sustat sustat; - int ret; - - ret = nilfs_sufile_get_stat(sufile, &sustat); - if (ret == 0) - *nsegsp = sustat.ss_ncleansegs; - return ret; + *nsegsp = NILFS_SUI(sufile)->ncleansegs; + return 0; } void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, @@ -568,8 +580,10 @@ void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, nilfs_segment_usage_set_error(su); kunmap_atomic(kaddr, KM_USER0); - if (suclean) + if (suclean) { nilfs_sufile_mod_counter(header_bh, -1, 0); + NILFS_SUI(sufile)->ncleansegs--; + } nilfs_mdt_mark_buffer_dirty(su_bh); nilfs_mdt_mark_dirty(sufile); } @@ -656,7 +670,25 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, */ int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode) { - return nilfs_read_inode_common(sufile, raw_inode); + struct nilfs_sufile_info *sui = NILFS_SUI(sufile); + struct buffer_head *header_bh; + struct nilfs_sufile_header *header; + void *kaddr; + int ret; + + ret = nilfs_read_inode_common(sufile, raw_inode); + if (ret < 0) + return ret; + + ret = nilfs_sufile_get_header_block(sufile, &header_bh); + if (!ret) { + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = kaddr + bh_offset(header_bh); + sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); + kunmap_atomic(kaddr, KM_USER0); + brelse(header_bh); + } + return ret; } /** @@ -668,7 +700,8 @@ struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize) { struct inode *sufile; - sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO, 0); + sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO, + sizeof(struct nilfs_sufile_info)); if (sufile) nilfs_mdt_set_entry_size(sufile, susize, sizeof(struct nilfs_sufile_header)); -- cgit v0.10.2 From ef7d4757a5b7b07a3a0d30d3ba6b587e574b28b9 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 13 Nov 2009 08:45:32 +0900 Subject: nilfs2: simplify nilfs_sufile_get_ncleansegs function Previously, this function took an status code to return possible error codes. The ("nilfs2: add local variable to cache the number of clean segments") patch removed the possibility to return errors. So, this simplifies the function definition to make it directly return the number of clean segments. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index e9b4cec..5f18aca 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -112,6 +112,15 @@ static void nilfs_sufile_mod_counter(struct buffer_head *header_bh, } /** + * nilfs_sufile_get_ncleansegs - return the number of clean segments + * @sufile: inode of segment usage file + */ +unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile) +{ + return NILFS_SUI(sufile)->ncleansegs; +} + +/** * nilfs_sufile_updatev - modify multiple segment usages at a time * @sufile: inode of segment usage file * @segnumv: array of segment numbers @@ -540,28 +549,6 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) return ret; } -/** - * nilfs_sufile_get_ncleansegs - get the number of clean segments - * @sufile: inode of segment usage file - * @nsegsp: pointer to the number of clean segments - * - * Description: nilfs_sufile_get_ncleansegs() acquires the number of clean - * segments. - * - * Return Value: On success, 0 is returned and the number of clean segments is - * stored in the place pointed by @nsegsp. On error, one of the following - * negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - */ -int nilfs_sufile_get_ncleansegs(struct inode *sufile, unsigned long *nsegsp) -{ - *nsegsp = NILFS_SUI(sufile)->ncleansegs; - return 0; -} - void nilfs_sufile_do_set_error(struct inode *sufile, __u64 segnum, struct buffer_head *header_bh, struct buffer_head *su_bh) diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index b303a80..c339ad5 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -34,6 +34,8 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments; } +unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); + int nilfs_sufile_alloc(struct inode *, __u64 *); int nilfs_sufile_get_segment_usage(struct inode *, __u64, struct nilfs_segment_usage **, @@ -41,7 +43,6 @@ int nilfs_sufile_get_segment_usage(struct inode *, __u64, void nilfs_sufile_put_segment_usage(struct inode *, __u64, struct buffer_head *); int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); -int nilfs_sufile_get_ncleansegs(struct inode *, unsigned long *); ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, size_t); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 75095ed..4d4d35c 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -612,30 +612,23 @@ int nilfs_count_free_blocks(struct the_nilfs *nilfs, sector_t *nblocks) { struct inode *dat = nilfs_dat_inode(nilfs); unsigned long ncleansegs; - int err; down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - err = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile, &ncleansegs); + ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - if (likely(!err)) - *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; - return err; + *nblocks = (sector_t)ncleansegs * nilfs->ns_blocks_per_segment; + return 0; } int nilfs_near_disk_full(struct the_nilfs *nilfs) { - struct inode *sufile = nilfs->ns_sufile; unsigned long ncleansegs, nincsegs; - int ret; - ret = nilfs_sufile_get_ncleansegs(sufile, &ncleansegs); - if (likely(!ret)) { - nincsegs = atomic_read(&nilfs->ns_ndirtyblks) / - nilfs->ns_blocks_per_segment + 1; - if (ncleansegs <= nilfs->ns_nrsvsegs + nincsegs) - ret++; - } - return ret; + ncleansegs = nilfs_sufile_get_ncleansegs(nilfs->ns_sufile); + nincsegs = atomic_read(&nilfs->ns_ndirtyblks) / + nilfs->ns_blocks_per_segment + 1; + + return ncleansegs <= nilfs->ns_nrsvsegs + nincsegs; } /** -- cgit v0.10.2 From b34a65069caa56b691ebab5ae0b8e54d16406d16 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 14 Nov 2009 00:09:47 +0900 Subject: nilfs2: avoid readahead on metadata file for create mode This turns off readhead action of metadata file if nilfs_mdt_get_block function was called with a create flag. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 3028e8f..948b1f8 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -186,7 +186,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, } static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, - struct buffer_head **out_bh) + int readahead, struct buffer_head **out_bh) { struct buffer_head *first_bh, *bh; unsigned long blkoff; @@ -200,16 +200,18 @@ static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, if (unlikely(err)) goto failed; - blkoff = block + 1; - for (i = 0; i < nr_ra_blocks; i++, blkoff++) { - err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); - if (likely(!err || err == -EEXIST)) - brelse(bh); - else if (err != -EBUSY) - break; /* abort readahead if bmap lookup failed */ - - if (!buffer_locked(first_bh)) - goto out_no_wait; + if (readahead) { + blkoff = block + 1; + for (i = 0; i < nr_ra_blocks; i++, blkoff++) { + err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); + if (likely(!err || err == -EEXIST)) + brelse(bh); + else if (err != -EBUSY) + break; + /* abort readahead if bmap lookup failed */ + if (!buffer_locked(first_bh)) + goto out_no_wait; + } } wait_on_buffer(first_bh); @@ -263,7 +265,7 @@ int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, /* Should be rewritten with merging nilfs_mdt_read_block() */ retry: - ret = nilfs_mdt_read_block(inode, blkoff, out_bh); + ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh); if (!create || ret != -ENOENT) return ret; @@ -371,7 +373,7 @@ int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) struct buffer_head *bh; int err; - err = nilfs_mdt_read_block(inode, block, &bh); + err = nilfs_mdt_read_block(inode, block, 0, &bh); if (unlikely(err)) return err; nilfs_mark_buffer_dirty(bh); -- cgit v0.10.2 From d501d7368937740e8d06671a4bfe4e236ed25bd0 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 13 Nov 2009 16:04:11 +0900 Subject: nilfs2: separate function for creating new btree node block Adds a separate routine for creating a btree node block. This is a preparation to reduce the depth of function calls during submitting btree node buffer. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 84c2538..fc3e4bd 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -68,6 +68,32 @@ void nilfs_btnode_cache_clear(struct address_space *btnc) truncate_inode_pages(btnc, 0); } +struct buffer_head * +nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) +{ + struct inode *inode = NILFS_BTNC_I(btnc); + struct buffer_head *bh; + + bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); + if (unlikely(!bh)) + return NULL; + + if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || + buffer_dirty(bh))) { + brelse(bh); + BUG(); + } + memset(bh->b_data, 0, 1 << inode->i_blkbits); + bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; + bh->b_blocknr = blocknr; + set_buffer_mapped(bh); + set_buffer_uptodate(bh); + + unlock_page(bh->b_page); + page_cache_release(bh->b_page); + return bh; +} + int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, sector_t pblocknr, struct buffer_head **pbh, int newblk) diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 3e22751..c53644c 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -40,6 +40,8 @@ struct nilfs_btnode_chkey_ctxt { void nilfs_btnode_cache_init_once(struct address_space *); void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); void nilfs_btnode_cache_clear(struct address_space *); +struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, + __u64 blocknr); int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, struct buffer_head **, int); int nilfs_btnode_get(struct address_space *, __u64, sector_t, -- cgit v0.10.2 From 45f4910bc0bb904bcf53aa04ee1b807abe1387a6 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 13 Nov 2009 16:25:19 +0900 Subject: nilfs2: use nilfs_btnode_create_block function This displaces nilfs_btnode_get() use to create new btree node block with nilfs_btnode_create_block. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index fc3e4bd..7086a2a 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -270,12 +270,13 @@ retry: unlock_page(obh->b_page); } - err = nilfs_btnode_get(btnc, newkey, 0, &nbh, 1); - if (likely(!err)) { - BUG_ON(nbh == obh); - ctxt->newbh = nbh; - } - return err; + nbh = nilfs_btnode_create_block(btnc, newkey); + if (!nbh) + return -ENOMEM; + + BUG_ON(nbh == obh); + ctxt->newbh = nbh; + return 0; failed_unlock: unlock_page(obh->b_page); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index c9aab29..7b0cc4f 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -122,12 +122,15 @@ static int nilfs_btree_get_new_block(const struct nilfs_btree *btree, { struct address_space *btnc = &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; - int ret; + struct buffer_head *bh; - ret = nilfs_btnode_get(btnc, ptr, 0, bhp, 1); - if (!ret) - set_buffer_nilfs_volatile(*bhp); - return ret; + bh = nilfs_btnode_create_block(btnc, ptr); + if (!bh) + return -ENOMEM; + + set_buffer_nilfs_volatile(bh); + *bhp = bh; + return 0; } static inline int -- cgit v0.10.2 From 75f65edfcc4a19d14fc8ab860846fad070c8db49 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 13 Nov 2009 16:30:41 +0900 Subject: nilfs2: remove newblk argument from nilfs_btnode_submit_block This removes the obsolete argument from nilfs_btnode_submit_block(). This will complete separating a create function of btree node. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 7086a2a..59658f0 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -95,8 +95,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) } int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, - sector_t pblocknr, struct buffer_head **pbh, - int newblk) + sector_t pblocknr, struct buffer_head **pbh) { struct buffer_head *bh; struct inode *inode = NILFS_BTNC_I(btnc); @@ -107,19 +106,6 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, return -ENOMEM; err = -EEXIST; /* internal code */ - if (newblk) { - if (unlikely(buffer_mapped(bh) || buffer_uptodate(bh) || - buffer_dirty(bh))) { - brelse(bh); - BUG(); - } - memset(bh->b_data, 0, 1 << inode->i_blkbits); - bh->b_bdev = NILFS_I_NILFS(inode)->ns_bdev; - bh->b_blocknr = blocknr; - set_buffer_mapped(bh); - set_buffer_uptodate(bh); - goto found; - } if (buffer_uptodate(bh) || buffer_dirty(bh)) goto found; @@ -162,12 +148,12 @@ out_locked: } int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr, - sector_t pblocknr, struct buffer_head **pbh, int newblk) + sector_t pblocknr, struct buffer_head **pbh) { struct buffer_head *bh; int err; - err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh, newblk); + err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh); if (err == -EEXIST) /* internal code (cache hit) */ return 0; if (unlikely(err)) diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index c53644c..3d5cf08 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -43,9 +43,9 @@ void nilfs_btnode_cache_clear(struct address_space *); struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr); int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, - struct buffer_head **, int); + struct buffer_head **); int nilfs_btnode_get(struct address_space *, __u64, sector_t, - struct buffer_head **, int); + struct buffer_head **); void nilfs_btnode_delete(struct buffer_head *); int nilfs_btnode_prepare_change_key(struct address_space *, struct nilfs_btnode_chkey_ctxt *); diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 67d2099..e16a666 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -149,7 +149,7 @@ int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, __u64 vbn, struct buffer_head **out_bh) { int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, - vbn ? : pbn, pbn, out_bh, 0); + vbn ? : pbn, pbn, out_bh); if (ret == -EEXIST) /* internal code (cache hit) */ ret = 0; return ret; -- cgit v0.10.2 From 1376e931b75f954057b1547ba25fcba594cef804 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 13 Nov 2009 16:49:09 +0900 Subject: nilfs2: eliminate nilfs_btnode_get function This removes the obsolete nilfs_btnode_get() function and makes nilfs_btree_get_block() directly call nilfs_btnode_submit_block(). This expansion will provide better opportunity for code optimization. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 59658f0..471e269 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -147,27 +147,6 @@ out_locked: return err; } -int nilfs_btnode_get(struct address_space *btnc, __u64 blocknr, - sector_t pblocknr, struct buffer_head **pbh) -{ - struct buffer_head *bh; - int err; - - err = nilfs_btnode_submit_block(btnc, blocknr, pblocknr, pbh); - if (err == -EEXIST) /* internal code (cache hit) */ - return 0; - if (unlikely(err)) - return err; - - bh = *pbh; - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) { - brelse(bh); - return -EIO; - } - return 0; -} - /** * nilfs_btnode_delete - delete B-tree node buffer * @bh: buffer to be deleted diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 3d5cf08..07da83f 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -44,8 +44,6 @@ struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr); int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, struct buffer_head **); -int nilfs_btnode_get(struct address_space *, __u64, sector_t, - struct buffer_head **); void nilfs_btnode_delete(struct buffer_head *); int nilfs_btnode_prepare_change_key(struct address_space *, struct nilfs_btnode_chkey_ctxt *); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 7b0cc4f..139b113 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -114,7 +114,18 @@ static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr, { struct address_space *btnc = &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; - return nilfs_btnode_get(btnc, ptr, 0, bhp, 0); + int err; + + err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp); + if (err) + return err == -EEXIST ? 0 : err; + + wait_on_buffer(*bhp); + if (!buffer_uptodate(*bhp)) { + brelse(*bhp); + return -EIO; + } + return 0; } static int nilfs_btree_get_new_block(const struct nilfs_btree *btree, -- cgit v0.10.2 From 141bbdba9c2c1592d56b019277774099a5412aea Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 14 Nov 2009 13:48:06 +0900 Subject: nilfs2: unfold nilfs_palloc_block_get_bitmap function This expands a trivial address calculation in the function into its every callsite. This expansion improves readability of the callers. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index d69e6ae..7e2b3ce 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -176,13 +176,6 @@ nilfs_palloc_block_get_group_desc(const struct inode *inode, group % nilfs_palloc_groups_per_desc_block(inode); } -static unsigned char * -nilfs_palloc_block_get_bitmap(const struct inode *inode, - const struct buffer_head *bh, void *kaddr) -{ - return (unsigned char *)(kaddr + bh_offset(bh)); -} - void *nilfs_palloc_block_get_entry(const struct inode *inode, __u64 nr, const struct buffer_head *bh, void *kaddr) { @@ -289,8 +282,7 @@ int nilfs_palloc_prepare_alloc_entry(struct inode *inode, if (ret < 0) goto out_desc; bitmap_kaddr = kmap(bitmap_bh->b_page); - bitmap = nilfs_palloc_block_get_bitmap( - inode, bitmap_bh, bitmap_kaddr); + bitmap = bitmap_kaddr + bh_offset(bitmap_bh); pos = nilfs_palloc_find_available_slot( inode, group, group_offset, bitmap, entries_per_group); @@ -351,8 +343,7 @@ void nilfs_palloc_commit_free_entry(struct inode *inode, desc = nilfs_palloc_block_get_group_desc(inode, group, req->pr_desc_bh, desc_kaddr); bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); - bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, - bitmap_kaddr); + bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), group_offset, bitmap)) @@ -385,8 +376,7 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode, desc = nilfs_palloc_block_get_group_desc(inode, group, req->pr_desc_bh, desc_kaddr); bitmap_kaddr = kmap(req->pr_bitmap_bh->b_page); - bitmap = nilfs_palloc_block_get_bitmap(inode, req->pr_bitmap_bh, - bitmap_kaddr); + bitmap = bitmap_kaddr + bh_offset(req->pr_bitmap_bh); if (!nilfs_clear_bit_atomic(nilfs_mdt_bgl_lock(inode, group), group_offset, bitmap)) printk(KERN_WARNING "%s: entry numer %llu already freed\n", @@ -472,8 +462,7 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) desc = nilfs_palloc_block_get_group_desc( inode, group, desc_bh, desc_kaddr); bitmap_kaddr = kmap(bitmap_bh->b_page); - bitmap = nilfs_palloc_block_get_bitmap( - inode, bitmap_bh, bitmap_kaddr); + bitmap = bitmap_kaddr + bh_offset(bitmap_bh); for (j = i, n = 0; (j < nitems) && nilfs_palloc_group_is_in(inode, group, entry_nrs[j]); -- cgit v0.10.2 From db38d5ad323362bfca118b52fe5906f97a69fb45 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 14 Nov 2009 15:54:27 +0900 Subject: nilfs2: add cache framework for persistent object allocator This adds setup and cleanup routines of the persistent object allocator cache. According to ftrace analyses, accessing buffers of the DAT file suffers indispensable overhead many times. To mitigate the overhead, This introduce cache framework for the persistent object allocator (palloc) which the DAT file and ifile are using. struct nilfs_palloc_cache represents the cache object per metadata file using palloc. The cache is initialized through nilfs_palloc_setup_cache() and destroyed by nilfs_palloc_destroy_cache(); callers of the former function will be added to individual allocators of DAT and ifile on successive patches. nilfs_palloc_destroy_cache() will be called from nilfs_mdt_destroy() if the cache is attached to a metadata file. A companion function nilfs_palloc_clear_cache() is provided to allow releasing buffer head references independently with the cleanup task. This adjunctive function will be used before invalidating pages of metadata file with the cache. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index 7e2b3ce..c56300d 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -491,3 +491,30 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems) } return 0; } + +void nilfs_palloc_setup_cache(struct inode *inode, + struct nilfs_palloc_cache *cache) +{ + NILFS_MDT(inode)->mi_palloc_cache = cache; + spin_lock_init(&cache->lock); +} + +void nilfs_palloc_clear_cache(struct inode *inode) +{ + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + spin_lock(&cache->lock); + brelse(cache->prev_desc.bh); + brelse(cache->prev_bitmap.bh); + brelse(cache->prev_entry.bh); + cache->prev_desc.bh = NULL; + cache->prev_bitmap.bh = NULL; + cache->prev_entry.bh = NULL; + spin_unlock(&cache->lock); +} + +void nilfs_palloc_destroy_cache(struct inode *inode) +{ + nilfs_palloc_clear_cache(inode); + NILFS_MDT(inode)->mi_palloc_cache = NULL; +} diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h index 4ace5475..f4543ac 100644 --- a/fs/nilfs2/alloc.h +++ b/fs/nilfs2/alloc.h @@ -69,4 +69,25 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t); #define nilfs_clear_bit_atomic ext2_clear_bit_atomic #define nilfs_find_next_zero_bit ext2_find_next_zero_bit +/* + * persistent object allocator cache + */ + +struct nilfs_bh_assoc { + unsigned long blkoff; + struct buffer_head *bh; +}; + +struct nilfs_palloc_cache { + spinlock_t lock; + struct nilfs_bh_assoc prev_desc; + struct nilfs_bh_assoc prev_bitmap; + struct nilfs_bh_assoc prev_entry; +}; + +void nilfs_palloc_setup_cache(struct inode *inode, + struct nilfs_palloc_cache *cache); +void nilfs_palloc_clear_cache(struct inode *inode); +void nilfs_palloc_destroy_cache(struct inode *inode); + #endif /* _NILFS_ALLOC_H */ diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 948b1f8..06713ff 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -571,6 +571,8 @@ void nilfs_mdt_destroy(struct inode *inode) { struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + if (mdi->mi_palloc_cache) + nilfs_palloc_destroy_cache(inode); nilfs_mdt_clear(inode); kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index c396b6c..6c4bbb0 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -36,6 +36,7 @@ * @mi_entry_size: size of an entry * @mi_first_entry_offset: offset to the first entry * @mi_entries_per_block: number of entries in a block + * @mi_palloc_cache: persistent object allocator cache * @mi_blocks_per_group: number of blocks in a group * @mi_blocks_per_desc_block: number of blocks per descriptor block */ @@ -46,6 +47,7 @@ struct nilfs_mdt_info { unsigned mi_entry_size; unsigned mi_first_entry_offset; unsigned long mi_entries_per_block; + struct nilfs_palloc_cache *mi_palloc_cache; unsigned long mi_blocks_per_group; unsigned long mi_blocks_per_desc_block; }; -- cgit v0.10.2 From 8908b2f70b795299d21706b5a97676cfe7f9056a Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 14 Nov 2009 16:35:01 +0900 Subject: nilfs2: add palloc cache to dat This adds the palloc cache to DAT file. The palloc cache is allocated on the extended region of nilfs_mdt_info struct. The struct nilfs_dat_info defines the extended on memory structure of DAT. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index eff3169..187dd07 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -33,6 +33,16 @@ #define NILFS_CNO_MIN ((__u64)1) #define NILFS_CNO_MAX (~(__u64)0) +struct nilfs_dat_info { + struct nilfs_mdt_info mi; + struct nilfs_palloc_cache palloc_cache; +}; + +static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) +{ + return (struct nilfs_dat_info *)NILFS_MDT(dat); +} + static int nilfs_dat_prepare_entry(struct inode *dat, struct nilfs_palloc_req *req, int create) { @@ -445,16 +455,20 @@ struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size) { static struct lock_class_key dat_lock_key; struct inode *dat; + struct nilfs_dat_info *di; int err; - dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, 0); + dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, sizeof(*di)); if (dat) { err = nilfs_palloc_init_blockgroup(dat, entry_size); if (unlikely(err)) { nilfs_mdt_destroy(dat); return NULL; } - lockdep_set_class(&NILFS_MDT(dat)->mi_sem, &dat_lock_key); + + di = NILFS_DAT_I(dat); + lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); + nilfs_palloc_setup_cache(dat, &di->palloc_cache); } return dat; } -- cgit v0.10.2 From c3ea56c80081b826df4a0ac797432179cf5b7cd2 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 15 Nov 2009 20:13:21 +0900 Subject: nilfs2: flush palloc cache before manipulating data pages of GC dat Data pages in gcdat metadata file (i.e. the secondary DAT for GC), are cleared or even moved back to the normal DAT when a shot of garbage collection was done. Buffer heads held by the palloc cache of gcdat must be cleared before these page cache manipulation. This adds nilfs_palloc_clear_cache() to ensure this. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c index 93383c5..dd5f7e0 100644 --- a/fs/nilfs2/gcdat.c +++ b/fs/nilfs2/gcdat.c @@ -61,6 +61,8 @@ void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs) nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap); + nilfs_palloc_clear_cache(dat); + nilfs_palloc_clear_cache(gcdat); nilfs_clear_dirty_pages(mapping); nilfs_copy_back_pages(mapping, gmapping); /* note: mdt dirty flags should be cleared by segctor. */ @@ -79,6 +81,7 @@ void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs) gcdat->i_state = I_CLEAR; gii->i_flags = 0; + nilfs_palloc_clear_cache(gcdat); truncate_inode_pages(gcdat->i_mapping, 0); truncate_inode_pages(&gii->i_btnode_cache, 0); } -- cgit v0.10.2 From 49fa7a590208b439cc74f2cafdb15568abb3f8d1 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 14 Nov 2009 16:44:22 +0900 Subject: nilfs2: add palloc cache to ifile This adds the palloc cache to ifile. The palloc cache is allocated on the extended region of nilfs_mdt_info struct. The struct nilfs_ifile_info defines the extended on memory structure of ifile. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index c1c1fd3..922d9dd 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -29,6 +29,17 @@ #include "alloc.h" #include "ifile.h" + +struct nilfs_ifile_info { + struct nilfs_mdt_info mi; + struct nilfs_palloc_cache palloc_cache; +}; + +static inline struct nilfs_ifile_info *NILFS_IFILE_I(struct inode *ifile) +{ + return (struct nilfs_ifile_info *)NILFS_MDT(ifile); +} + /** * nilfs_ifile_create_inode - create a new disk inode * @ifile: ifile inode @@ -159,13 +170,16 @@ struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size) struct inode *ifile; int err; - ifile = nilfs_mdt_new(sbi->s_nilfs, sbi->s_super, NILFS_IFILE_INO, 0); + ifile = nilfs_mdt_new(sbi->s_nilfs, sbi->s_super, NILFS_IFILE_INO, + sizeof(struct nilfs_ifile_info)); if (ifile) { err = nilfs_palloc_init_blockgroup(ifile, inode_size); if (unlikely(err)) { nilfs_mdt_destroy(ifile); return NULL; } + nilfs_palloc_setup_cache(ifile, + &NILFS_IFILE_I(ifile)->palloc_cache); } return ifile; } -- cgit v0.10.2 From 70622a2091647840013c1e982e56a8808768847e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sat, 14 Nov 2009 18:40:27 +0900 Subject: nilfs2: insert cache operation in palloc get block routines This implements cache operation in get block routines of palloc code: nilfs_palloc_get_desc_block(), nilfs_palloc_get_bitmap_block(), and nilfs_palloc_get_entry_block(). This will complete the palloc cache. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c index c56300d..3f959f1 100644 --- a/fs/nilfs2/alloc.c +++ b/fs/nilfs2/alloc.c @@ -142,29 +142,75 @@ static void nilfs_palloc_desc_block_init(struct inode *inode, } } +static int nilfs_palloc_get_block(struct inode *inode, unsigned long blkoff, + int create, + void (*init_block)(struct inode *, + struct buffer_head *, + void *), + struct buffer_head **bhp, + struct nilfs_bh_assoc *prev, + spinlock_t *lock) +{ + int ret; + + spin_lock(lock); + if (prev->bh && blkoff == prev->blkoff) { + get_bh(prev->bh); + *bhp = prev->bh; + spin_unlock(lock); + return 0; + } + spin_unlock(lock); + + ret = nilfs_mdt_get_block(inode, blkoff, create, init_block, bhp); + if (!ret) { + spin_lock(lock); + /* + * The following code must be safe for change of the + * cache contents during the get block call. + */ + brelse(prev->bh); + get_bh(*bhp); + prev->bh = *bhp; + prev->blkoff = blkoff; + spin_unlock(lock); + } + return ret; +} + static int nilfs_palloc_get_desc_block(struct inode *inode, unsigned long group, int create, struct buffer_head **bhp) { - return nilfs_mdt_get_block(inode, - nilfs_palloc_desc_blkoff(inode, group), - create, nilfs_palloc_desc_block_init, bhp); + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_get_block(inode, + nilfs_palloc_desc_blkoff(inode, group), + create, nilfs_palloc_desc_block_init, + bhp, &cache->prev_desc, &cache->lock); } static int nilfs_palloc_get_bitmap_block(struct inode *inode, unsigned long group, int create, struct buffer_head **bhp) { - return nilfs_mdt_get_block(inode, - nilfs_palloc_bitmap_blkoff(inode, group), - create, NULL, bhp); + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_get_block(inode, + nilfs_palloc_bitmap_blkoff(inode, group), + create, NULL, bhp, + &cache->prev_bitmap, &cache->lock); } int nilfs_palloc_get_entry_block(struct inode *inode, __u64 nr, int create, struct buffer_head **bhp) { - return nilfs_mdt_get_block(inode, nilfs_palloc_entry_blkoff(inode, nr), - create, NULL, bhp); + struct nilfs_palloc_cache *cache = NILFS_MDT(inode)->mi_palloc_cache; + + return nilfs_palloc_get_block(inode, + nilfs_palloc_entry_blkoff(inode, nr), + create, NULL, bhp, + &cache->prev_entry, &cache->lock); } static struct nilfs_palloc_group_desc * -- cgit v0.10.2 From 61a189e9c62359cd12b2aa3bd6ab9cffa6cf2745 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 18 Nov 2009 17:25:12 +0900 Subject: nilfs2: move routine marking segment usage dirty into sufile This adds nilfs_sufile_mark_dirty() function in sufile to replace nilfs_touch_segusage() function in log writer code. This is a preparation for the further cleanup which will move out low level sufile operations in the log writer. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index d21179b..3ae4a38 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1273,21 +1273,6 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) return err; } -static int nilfs_touch_segusage(struct inode *sufile, __u64 segnum) -{ - struct buffer_head *bh_su; - struct nilfs_segment_usage *raw_su; - int err; - - err = nilfs_sufile_get_segment_usage(sufile, segnum, &raw_su, &bh_su); - if (unlikely(err)) - return err; - nilfs_mdt_mark_buffer_dirty(bh_su); - nilfs_mdt_mark_dirty(sufile); - nilfs_sufile_put_segment_usage(sufile, segnum, bh_su); - return 0; -} - static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { @@ -1312,7 +1297,7 @@ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, } sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; - err = nilfs_touch_segusage(nilfs->ns_sufile, segbuf->sb_segnum); + err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum); if (unlikely(err)) return err; @@ -1352,7 +1337,7 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, * not be dirty. The following call ensures that the buffer is dirty * and will pin the buffer on memory until the sufile is written. */ - err = nilfs_touch_segusage(sufile, prev->sb_nextnum); + err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum); if (unlikely(err)) return err; diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 5f18aca..d560f88 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -502,6 +502,25 @@ void nilfs_sufile_put_segment_usage(struct inode *sufile, __u64 segnum, } /** + * nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty + * @sufile: inode of segment usage file + * @segnum: segment number + */ +int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) +{ + struct buffer_head *bh; + int ret; + + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); + if (!ret) { + nilfs_mdt_mark_buffer_dirty(bh); + nilfs_mdt_mark_dirty(sufile); + brelse(bh); + } + return ret; +} + +/** * nilfs_sufile_get_stat - get segment usage statistics * @sufile: inode of segment usage file * @stat: pointer to a structure of segment usage statistics diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index c339ad5..4146a65 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -42,6 +42,7 @@ int nilfs_sufile_get_segment_usage(struct inode *, __u64, struct buffer_head **); void nilfs_sufile_put_segment_usage(struct inode *, __u64, struct buffer_head *); +int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, size_t); -- cgit v0.10.2 From 071ec54dd730307ee0e703a105872b9a1c6fd2aa Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 18 Nov 2009 18:23:34 +0900 Subject: nilfs2: move routine to set segment usage into sufile This adds nilfs_sufile_set_segment_usage() function in sufile to replace direct access to the sufile metadata in log writer code. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 3ae4a38..097f9c4 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1457,21 +1457,16 @@ static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, struct inode *sufile) { struct nilfs_segment_buffer *segbuf; - struct buffer_head *bh_su; - struct nilfs_segment_usage *raw_su; unsigned long live_blocks; int ret; list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, - &raw_su, &bh_su); - WARN_ON(ret); /* always succeed because bh_su is dirty */ live_blocks = segbuf->sb_sum.nblocks + (segbuf->sb_pseg_start - segbuf->sb_fseg_start); - raw_su->su_lastmod = cpu_to_le64(sci->sc_seg_ctime); - raw_su->su_nblocks = cpu_to_le32(live_blocks); - nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, - bh_su); + ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, + live_blocks, + sci->sc_seg_ctime); + WARN_ON(ret); /* always succeed because the segusage is dirty */ } } @@ -1479,25 +1474,18 @@ static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, struct inode *sufile) { struct nilfs_segment_buffer *segbuf; - struct buffer_head *bh_su; - struct nilfs_segment_usage *raw_su; int ret; segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); - ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, - &raw_su, &bh_su); - WARN_ON(ret); /* always succeed because bh_su is dirty */ - raw_su->su_nblocks = cpu_to_le32(segbuf->sb_pseg_start - - segbuf->sb_fseg_start); - nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, bh_su); + ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, + segbuf->sb_pseg_start - + segbuf->sb_fseg_start, 0); + WARN_ON(ret); /* always succeed because the segusage is dirty */ list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { - ret = nilfs_sufile_get_segment_usage(sufile, segbuf->sb_segnum, - &raw_su, &bh_su); + ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, + 0, 0); WARN_ON(ret); /* always succeed */ - raw_su->su_nblocks = 0; - nilfs_sufile_put_segment_usage(sufile, segbuf->sb_segnum, - bh_su); } } diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index d560f88..3eed998 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -521,6 +521,43 @@ int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum) } /** + * nilfs_sufile_set_segment_usage - set usage of a segment + * @sufile: inode of segment usage file + * @segnum: segment number + * @nblocks: number of live blocks in the segment + * @modtime: modification time (option) + */ +int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, + unsigned long nblocks, time_t modtime) +{ + struct buffer_head *bh; + struct nilfs_segment_usage *su; + void *kaddr; + int ret; + + down_write(&NILFS_MDT(sufile)->mi_sem); + ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, &bh); + if (ret < 0) + goto out_sem; + + kaddr = kmap_atomic(bh->b_page, KM_USER0); + su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); + WARN_ON(nilfs_segment_usage_error(su)); + if (modtime) + su->su_lastmod = cpu_to_le64(modtime); + su->su_nblocks = cpu_to_le32(nblocks); + kunmap_atomic(kaddr, KM_USER0); + + nilfs_mdt_mark_buffer_dirty(bh); + nilfs_mdt_mark_dirty(sufile); + brelse(bh); + + out_sem: + up_write(&NILFS_MDT(sufile)->mi_sem); + return ret; +} + +/** * nilfs_sufile_get_stat - get segment usage statistics * @sufile: inode of segment usage file * @stat: pointer to a structure of segment usage statistics diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 4146a65..e1186bf 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -43,6 +43,8 @@ int nilfs_sufile_get_segment_usage(struct inode *, __u64, void nilfs_sufile_put_segment_usage(struct inode *, __u64, struct buffer_head *); int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); +int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, + unsigned long nblocks, time_t modtime); int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, size_t); -- cgit v0.10.2 From f021759d74d71bacc73fc3e00d6e3d35e1f2e123 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 18 Nov 2009 18:37:28 +0900 Subject: nilfs2: clean up get/put function of a segment usage This eliminates obsolete nilfs_get_sufile_get_segment_usage() and nilfs_set_sufile_segment_usage() from sufile. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 3eed998..b6c36d0 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -430,78 +430,6 @@ void nilfs_sufile_do_free(struct inode *sufile, __u64 segnum, } /** - * nilfs_sufile_get_segment_usage - get a segment usage - * @sufile: inode of segment usage file - * @segnum: segment number - * @sup: pointer to segment usage - * @bhp: pointer to buffer head - * - * Description: nilfs_sufile_get_segment_usage() acquires the segment usage - * specified by @segnum. - * - * Return Value: On success, 0 is returned, and the segment usage and the - * buffer head of the buffer on which the segment usage is located are stored - * in the place pointed by @sup and @bhp, respectively. On error, one of the - * following negative error codes is returned. - * - * %-EIO - I/O error. - * - * %-ENOMEM - Insufficient amount of memory available. - * - * %-EINVAL - Invalid segment usage number. - */ -int nilfs_sufile_get_segment_usage(struct inode *sufile, __u64 segnum, - struct nilfs_segment_usage **sup, - struct buffer_head **bhp) -{ - struct buffer_head *bh; - struct nilfs_segment_usage *su; - void *kaddr; - int ret; - - /* segnum is 0 origin */ - if (segnum >= nilfs_sufile_get_nsegments(sufile)) - return -EINVAL; - down_write(&NILFS_MDT(sufile)->mi_sem); - ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 1, &bh); - if (ret < 0) - goto out_sem; - kaddr = kmap(bh->b_page); - su = nilfs_sufile_block_get_segment_usage(sufile, segnum, bh, kaddr); - if (nilfs_segment_usage_error(su)) { - kunmap(bh->b_page); - brelse(bh); - ret = -EINVAL; - goto out_sem; - } - - if (sup != NULL) - *sup = su; - *bhp = bh; - - out_sem: - up_write(&NILFS_MDT(sufile)->mi_sem); - return ret; -} - -/** - * nilfs_sufile_put_segment_usage - put a segment usage - * @sufile: inode of segment usage file - * @segnum: segment number - * @bh: buffer head - * - * Description: nilfs_sufile_put_segment_usage() releases the segment usage - * specified by @segnum. @bh must be the buffer head which have been returned - * by a previous call to nilfs_sufile_get_segment_usage() with @segnum. - */ -void nilfs_sufile_put_segment_usage(struct inode *sufile, __u64 segnum, - struct buffer_head *bh) -{ - kunmap(bh->b_page); - brelse(bh); -} - -/** * nilfs_sufile_mark_dirty - mark the buffer having a segment usage dirty * @sufile: inode of segment usage file * @segnum: segment number diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index e1186bf..15163b8 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -37,11 +37,6 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); int nilfs_sufile_alloc(struct inode *, __u64 *); -int nilfs_sufile_get_segment_usage(struct inode *, __u64, - struct nilfs_segment_usage **, - struct buffer_head **); -void nilfs_sufile_put_segment_usage(struct inode *, __u64, - struct buffer_head *); int nilfs_sufile_mark_dirty(struct inode *sufile, __u64 segnum); int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, unsigned long nblocks, time_t modtime); -- cgit v0.10.2 From 050b4142c9f3cb3a213f254bd1a1fa1476800585 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 19 Nov 2009 22:24:48 +0900 Subject: nilfs2: apply readahead for recovery on mount This inserts readahead in the recovery code. The readahead request is issued per segment while searching the latest super root block. This will shorten mount time after unclean unmount. A measurement shows the recovery time was reduced by more than 60 percent: e.g. real 0m11.586s -> 0m3.918s (x 2.96) Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index bcd386d..6d5412e 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -798,6 +798,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, struct nilfs_segsum_info ssi; sector_t pseg_start, pseg_end, sr_pseg_start = 0; sector_t seg_start, seg_end; /* range of full segment (block number) */ + sector_t b, end; u64 seg_seq; __u64 segnum, nextnum = 0; __u64 cno; @@ -813,6 +814,11 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, /* Calculate range of segment */ nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); + /* Read ahead segment */ + b = seg_start; + while (b <= seg_end) + sb_breadahead(sbi->s_super, b++); + for (;;) { /* Load segment summary */ ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi, 1); @@ -835,14 +841,20 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, ri->ri_nextnum = nextnum; empty_seg = 0; + if (!NILFS_SEG_HAS_SR(&ssi) && !scan_newer) { + /* This will never happen because a superblock + (last_segment) always points to a pseg + having a super root. */ + ret = NILFS_SEG_FAIL_CONSISTENCY; + goto failed; + } + + if (pseg_start == seg_start) { + nilfs_get_segment_range(nilfs, nextnum, &b, &end); + while (b <= end) + sb_breadahead(sbi->s_super, b++); + } if (!NILFS_SEG_HAS_SR(&ssi)) { - if (!scan_newer) { - /* This will never happen because a superblock - (last_segment) always points to a pseg - having a super root. */ - ret = NILFS_SEG_FAIL_CONSISTENCY; - goto failed; - } if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) { ri->ri_lsegs_start = pseg_start; ri->ri_lsegs_start_seq = seg_seq; -- cgit v0.10.2 From f50a4c8149cc135921a8a0476bff8e622f59aef9 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 19 Nov 2009 16:58:40 +0900 Subject: nilfs2: move recovery completion into load_nilfs function Although mount recovery of nilfs is integrated in load_nilfs() procedure, the completion of recovery was isolated from the procedure and performed at the end of the fill_super routine. This was somewhat confusing since the recovery is needed for the nilfs object, not for a super block instance. To resolve the inconsistency, this will integrate the recovery completion into load_nilfs(). Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index f526169..990ead4 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -414,22 +414,6 @@ void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) up_write(&nilfs->ns_super_sem); } -static int nilfs_mark_recovery_complete(struct nilfs_sb_info *sbi) -{ - struct the_nilfs *nilfs = sbi->s_nilfs; - int err = 0; - - down_write(&nilfs->ns_sem); - if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { - nilfs->ns_mount_state |= NILFS_VALID_FS; - err = nilfs_commit_super(sbi, 1); - if (likely(!err)) - printk(KERN_INFO "NILFS: recovery complete.\n"); - } - up_write(&nilfs->ns_sem); - return err; -} - static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; @@ -649,9 +633,7 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi) int mnt_count = le16_to_cpu(sbp->s_mnt_count); /* nilfs->sem must be locked by the caller. */ - if (!(nilfs->ns_mount_state & NILFS_VALID_FS)) { - printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); - } else if (nilfs->ns_mount_state & NILFS_ERROR_FS) { + if (nilfs->ns_mount_state & NILFS_ERROR_FS) { printk(KERN_WARNING "NILFS warning: mounting fs with errors\n"); #if 0 @@ -759,11 +741,10 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, sb->s_root = NULL; sb->s_time_gran = 1; - if (!nilfs_loaded(nilfs)) { - err = load_nilfs(nilfs, sbi); - if (err) - goto failed_sbi; - } + err = load_nilfs(nilfs, sbi); + if (err) + goto failed_sbi; + cno = nilfs_last_cno(nilfs); if (sb->s_flags & MS_RDONLY) { @@ -831,12 +812,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, up_write(&nilfs->ns_sem); } - err = nilfs_mark_recovery_complete(sbi); - if (unlikely(err)) { - printk(KERN_ERR "NILFS: recovery failed.\n"); - goto failed_root; - } - down_write(&nilfs->ns_super_sem); if (!nilfs_test_opt(sbi, SNAPSHOT)) nilfs->ns_current = sbi; @@ -844,10 +819,6 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, return 0; - failed_root: - dput(sb->s_root); - sb->s_root = NULL; - failed_segctor: nilfs_detach_segment_constructor(sbi); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 4d4d35c..aea2b58 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -262,28 +262,27 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) unsigned int s_flags = sbi->s_super->s_flags; int really_read_only = bdev_read_only(nilfs->ns_bdev); unsigned valid_fs; - int err = 0; + int err; - nilfs_init_recovery_info(&ri); + if (nilfs_loaded(nilfs)) + return 0; down_write(&nilfs->ns_sem); valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); up_write(&nilfs->ns_sem); - if (!valid_fs && (s_flags & MS_RDONLY)) { - printk(KERN_INFO "NILFS: INFO: recovery " - "required for readonly filesystem.\n"); - if (really_read_only) { - printk(KERN_ERR "NILFS: write access " - "unavailable, cannot proceed.\n"); - err = -EROFS; - goto failed; + if (!valid_fs) { + printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); + if (s_flags & MS_RDONLY) { + printk(KERN_INFO "NILFS: INFO: recovery " + "required for readonly filesystem.\n"); + printk(KERN_INFO "NILFS: write access will " + "be enabled during recovery.\n"); } - printk(KERN_INFO "NILFS: write access will " - "be enabled during recovery.\n"); - sbi->s_super->s_flags &= ~MS_RDONLY; } + nilfs_init_recovery_info(&ri); + err = nilfs_search_super_root(nilfs, sbi, &ri); if (unlikely(err)) { printk(KERN_ERR "NILFS: error searching super root.\n"); @@ -296,19 +295,46 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) goto failed; } - if (!valid_fs) { - err = nilfs_recover_logical_segments(nilfs, sbi, &ri); - if (unlikely(err)) { - nilfs_mdt_destroy(nilfs->ns_cpfile); - nilfs_mdt_destroy(nilfs->ns_sufile); - nilfs_mdt_destroy(nilfs->ns_dat); - goto failed; + if (valid_fs) + goto skip_recovery; + + if (s_flags & MS_RDONLY) { + if (really_read_only) { + printk(KERN_ERR "NILFS: write access " + "unavailable, cannot proceed.\n"); + err = -EROFS; + goto failed_unload; } - if (ri.ri_need_recovery == NILFS_RECOVERY_SR_UPDATED) - sbi->s_super->s_dirt = 1; + sbi->s_super->s_flags &= ~MS_RDONLY; + } + + err = nilfs_recover_logical_segments(nilfs, sbi, &ri); + if (err) + goto failed_unload; + + down_write(&nilfs->ns_sem); + nilfs->ns_mount_state |= NILFS_VALID_FS; + nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); + err = nilfs_commit_super(sbi, 1); + up_write(&nilfs->ns_sem); + + if (err) { + printk(KERN_ERR "NILFS: failed to update super block. " + "recovery unfinished.\n"); + goto failed_unload; } + printk(KERN_INFO "NILFS: recovery complete.\n"); + skip_recovery: set_nilfs_loaded(nilfs); + nilfs_clear_recovery_info(&ri); + sbi->s_super->s_flags = s_flags; + return 0; + + failed_unload: + nilfs_mdt_destroy(nilfs->ns_cpfile); + nilfs_mdt_destroy(nilfs->ns_sufile); + nilfs_mdt_destroy(nilfs->ns_dat); failed: nilfs_clear_recovery_info(&ri); -- cgit v0.10.2 From a057d2c01161444c48b12a60351ae6c7135f6e61 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Thu, 19 Nov 2009 19:58:46 +0900 Subject: nilfs2: add helper to get if volume is in a valid state This adds a helper function, nilfs_valid_fs() which returns if nilfs is in a valid state or not. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index aea2b58..890a8d3 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -261,16 +261,12 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) struct nilfs_recovery_info ri; unsigned int s_flags = sbi->s_super->s_flags; int really_read_only = bdev_read_only(nilfs->ns_bdev); - unsigned valid_fs; + int valid_fs = nilfs_valid_fs(nilfs); int err; if (nilfs_loaded(nilfs)) return 0; - down_write(&nilfs->ns_sem); - valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); - up_write(&nilfs->ns_sem); - if (!valid_fs) { printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); if (s_flags & MS_RDONLY) { diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 20abd55..589786e 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -258,6 +258,16 @@ static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) kfree(sbi); } +static inline int nilfs_valid_fs(struct the_nilfs *nilfs) +{ + unsigned valid_fs; + + down_read(&nilfs->ns_sem); + valid_fs = (nilfs->ns_mount_state & NILFS_VALID_FS); + up_read(&nilfs->ns_sem); + return valid_fs; +} + static inline void nilfs_get_segment_range(struct the_nilfs *nilfs, __u64 segnum, sector_t *seg_start, sector_t *seg_end) -- cgit v0.10.2 From 0234576d041b9b2cc7043691ea61d2c2ca597aaa Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Fri, 20 Nov 2009 03:28:01 +0900 Subject: nilfs2: add norecovery mount option This adds "norecovery" mount option which disables temporal write access to read-only mounts or snapshots during mount/recovery. Without this option, write access will be even performed for those types of mounts; the temporal write access is needed to mount root file system read-only after an unclean shutdown. This option will be helpful when user wants to prevent any write access to the device. Signed-off-by: Ryusuke Konishi Cc: Eric Sandeen diff --git a/Documentation/filesystems/nilfs2.txt b/Documentation/filesystems/nilfs2.txt index cbd8779..4949fca 100644 --- a/Documentation/filesystems/nilfs2.txt +++ b/Documentation/filesystems/nilfs2.txt @@ -70,6 +70,10 @@ order=strict Apply strict in-order semantics that preserves sequence blocks. That means, it is guaranteed that no overtaking of events occurs in the recovered file system after a crash. +norecovery Disable recovery of the filesystem on mount. + This disables every write access on the device for + read-only mounts or snapshots. This option will fail + for r/w mounts on an unclean volume. NILFS2 usage ============ diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 990ead4..5403b3e 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -479,6 +479,8 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) seq_printf(seq, ",errors=panic"); if (nilfs_test_opt(sbi, STRICT_ORDER)) seq_printf(seq, ",order=strict"); + if (nilfs_test_opt(sbi, NORECOVERY)) + seq_printf(seq, ",norecovery"); return 0; } @@ -547,7 +549,7 @@ static const struct export_operations nilfs_export_ops = { enum { Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_nobarrier, Opt_snapshot, Opt_order, + Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, Opt_err, }; @@ -558,6 +560,7 @@ static match_table_t tokens = { {Opt_nobarrier, "nobarrier"}, {Opt_snapshot, "cp=%u"}, {Opt_order, "order=%s"}, + {Opt_norecovery, "norecovery"}, {Opt_err, NULL} }; @@ -608,6 +611,9 @@ static int parse_options(char *options, struct super_block *sb) sbi->s_snapshot_cno = option; nilfs_set_opt(sbi, SNAPSHOT); break; + case Opt_norecovery: + nilfs_set_opt(sbi, NORECOVERY); + break; default: printk(KERN_ERR "NILFS: Unrecognized mount option \"%s\"\n", p); @@ -863,6 +869,14 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if (!nilfs_valid_fs(nilfs)) { + printk(KERN_WARNING "NILFS (device %s): couldn't " + "remount because the filesystem is in an " + "incomplete recovery state.\n", sb->s_id); + err = -EINVAL; + goto restore_opts; + } + if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) goto out; if (*flags & MS_RDONLY) { diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 890a8d3..6241e17 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -264,8 +264,14 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) int valid_fs = nilfs_valid_fs(nilfs); int err; - if (nilfs_loaded(nilfs)) - return 0; + if (nilfs_loaded(nilfs)) { + if (valid_fs || + ((s_flags & MS_RDONLY) && nilfs_test_opt(sbi, NORECOVERY))) + return 0; + printk(KERN_ERR "NILFS: the filesystem is in an incomplete " + "recovery state.\n"); + return -EINVAL; + } if (!valid_fs) { printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); @@ -295,6 +301,11 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) goto skip_recovery; if (s_flags & MS_RDONLY) { + if (nilfs_test_opt(sbi, NORECOVERY)) { + printk(KERN_INFO "NILFS: norecovery option specified. " + "skipping roll-forward recovery\n"); + goto skip_recovery; + } if (really_read_only) { printk(KERN_ERR "NILFS: write access " "unavailable, cannot proceed.\n"); @@ -302,6 +313,11 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) goto failed_unload; } sbi->s_super->s_flags &= ~MS_RDONLY; + } else if (nilfs_test_opt(sbi, NORECOVERY)) { + printk(KERN_ERR "NILFS: recovery cancelled because norecovery " + "option was specified for a read/write mount\n"); + err = -EINVAL; + goto failed_unload; } err = nilfs_recover_logical_segments(nilfs, sbi, &ri); diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index 72289d2..3fe02cf 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -151,6 +151,8 @@ struct nilfs_super_root { #define NILFS_MOUNT_BARRIER 0x1000 /* Use block barriers */ #define NILFS_MOUNT_STRICT_ORDER 0x2000 /* Apply strict in-order semantics also for data */ +#define NILFS_MOUNT_NORECOVERY 0x4000 /* Disable write access during + mount-time recovery */ /** -- cgit v0.10.2 From 9ca941d4b62e72571948efe5a73c563b4cacc98d Mon Sep 17 00:00:00 2001 From: Jiro SEKIBA Date: Fri, 27 Nov 2009 19:41:06 +0900 Subject: nilfs2: delete mark_inode_dirty in nilfs_new_inode It is redundant to call mark_inode_dirty() in nilfs_new_inode() because all caller of nilfs_new_inode() will call mark_inode_dirty() after calling nilfs_new_inode() directly or indirectly in transaction. Signed-off-by: Jiro SEKIBA Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index a16c179..ede0375 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -323,7 +323,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) nilfs_init_acl(), proper cancellation of above jobs should be considered */ - mark_inode_dirty(inode); return inode; failed_acl: diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index ed02e88..01adda8 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -188,7 +188,7 @@ static int nilfs_symlink(struct inode *dir, struct dentry *dentry, goto out_fail; /* mark_inode_dirty(inode); */ - /* nilfs_new_inode() and page_symlink() do this */ + /* page_symlink() do this */ err = nilfs_add_nondir(dentry, inode); out: -- cgit v0.10.2 From 43f8bc262fcfadc7583b2353d2708e6eb77788ff Mon Sep 17 00:00:00 2001 From: Jiro SEKIBA Date: Fri, 27 Nov 2009 19:41:07 +0900 Subject: nilfs2: delete mark_inode_dirty from nilfs_set_link Delete mark_inode_dirty() from nilfs_set_link() to reduce redundant mark_inode_dirty() calls in caller of nilfs_set_link(). Signed-off-by: Jiro SEKIBA Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index e097099..4f3fa00 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -459,7 +459,6 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, nilfs_put_page(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; /* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ - mark_inode_dirty(dir); } /* diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 01adda8..4616f96 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -397,6 +397,7 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_dir; inode_inc_link_count(old_inode); nilfs_set_link(new_dir, new_de, new_page, old_inode); + mark_inode_dirty(new_dir); new_inode->i_ctime = CURRENT_TIME; if (dir_de) drop_nlink(new_inode); @@ -425,12 +426,13 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, old_inode->i_ctime = CURRENT_TIME; nilfs_delete_entry(old_de, old_page); - inode_dec_link_count(old_inode); + drop_nlink(old_inode); if (dir_de) { nilfs_set_link(old_inode, dir_de, dir_page, new_dir); inode_dec_link_count(old_dir); } + mark_inode_dirty(old_inode); err = nilfs_transaction_commit(old_dir->i_sb); return err; -- cgit v0.10.2 From 565de406e7bfa92ffec6315e89857986da657192 Mon Sep 17 00:00:00 2001 From: Jiro SEKIBA Date: Fri, 27 Nov 2009 19:41:08 +0900 Subject: nilfs2: expand inode_inc_link_count and inode_dec_link_count This is an intermidiate patch to reduce redandunt mark_inode_dirty() calls by calling inode_inc_link_count() and inode_dec_link_count() functions. Signed-off-by: Jiro SEKIBA Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 4616f96..f952439 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -200,7 +200,8 @@ out: return err; out_fail: - inode_dec_link_count(inode); + drop_nlink(inode); + mark_inode_dirty(inode); iput(inode); goto out; } @@ -245,7 +246,8 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) return err; - inode_inc_link_count(dir); + inc_nlink(dir); + mark_inode_dirty(dir); inode = nilfs_new_inode(dir, S_IFDIR | mode); err = PTR_ERR(inode); @@ -256,7 +258,8 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_fop = &nilfs_dir_operations; inode->i_mapping->a_ops = &nilfs_aops; - inode_inc_link_count(inode); + inc_nlink(inode); + mark_inode_dirty(inode); err = nilfs_make_empty(inode, dir); if (err) @@ -276,11 +279,14 @@ out: return err; out_fail: - inode_dec_link_count(inode); - inode_dec_link_count(inode); + drop_nlink(inode); + mark_inode_dirty(inode); + drop_nlink(inode); + mark_inode_dirty(inode); iput(inode); out_dir: - inode_dec_link_count(dir); + drop_nlink(dir); + mark_inode_dirty(dir); goto out; } @@ -317,7 +323,8 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry) goto out; inode->i_ctime = dir->i_ctime; - inode_dec_link_count(inode); + drop_nlink(inode); + mark_inode_dirty(inode); err = 0; out: if (!err) @@ -343,8 +350,10 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) err = nilfs_unlink(dir, dentry); if (!err) { inode->i_size = 0; - inode_dec_link_count(inode); - inode_dec_link_count(dir); + drop_nlink(inode); + mark_inode_dirty(inode); + drop_nlink(dir); + mark_inode_dirty(dir); } } if (!err) @@ -395,33 +404,38 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, new_de = nilfs_find_entry(new_dir, new_dentry, &new_page); if (!new_de) goto out_dir; - inode_inc_link_count(old_inode); + inc_nlink(old_inode); + mark_inode_dirty(old_inode); nilfs_set_link(new_dir, new_de, new_page, old_inode); mark_inode_dirty(new_dir); new_inode->i_ctime = CURRENT_TIME; if (dir_de) drop_nlink(new_inode); - inode_dec_link_count(new_inode); + drop_nlink(new_inode); + mark_inode_dirty(new_inode); } else { if (dir_de) { err = -EMLINK; if (new_dir->i_nlink >= NILFS_LINK_MAX) goto out_dir; } - inode_inc_link_count(old_inode); + inc_nlink(old_inode); + mark_inode_dirty(old_inode); err = nilfs_add_link(new_dentry, old_inode); if (err) { - inode_dec_link_count(old_inode); + drop_nlink(old_inode); + mark_inode_dirty(old_inode); goto out_dir; } - if (dir_de) - inode_inc_link_count(new_dir); + if (dir_de) { + inc_nlink(new_dir); + mark_inode_dirty(new_dir); + } } /* * Like most other Unix systems, set the ctime for inodes on a * rename. - * inode_dec_link_count() will mark the inode dirty. */ old_inode->i_ctime = CURRENT_TIME; @@ -430,7 +444,8 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (dir_de) { nilfs_set_link(old_inode, dir_de, dir_page, new_dir); - inode_dec_link_count(old_dir); + drop_nlink(old_dir); + mark_inode_dirty(old_dir); } mark_inode_dirty(old_inode); -- cgit v0.10.2 From 17491472769abbf4dac694d96c65eed5a7e1c81c Mon Sep 17 00:00:00 2001 From: Jiro SEKIBA Date: Fri, 27 Nov 2009 19:41:09 +0900 Subject: nilfs2: delete redundant mark_inode_dirty delete redundant mark_inode_dirty() calls Signed-off-by: Jiro SEKIBA Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index f952439..35f59da 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -247,7 +247,6 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) return err; inc_nlink(dir); - mark_inode_dirty(dir); inode = nilfs_new_inode(dir, S_IFDIR | mode); err = PTR_ERR(inode); @@ -280,7 +279,6 @@ out: out_fail: drop_nlink(inode); - mark_inode_dirty(inode); drop_nlink(inode); mark_inode_dirty(inode); iput(inode); @@ -405,7 +403,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (!new_de) goto out_dir; inc_nlink(old_inode); - mark_inode_dirty(old_inode); nilfs_set_link(new_dir, new_de, new_page, old_inode); mark_inode_dirty(new_dir); new_inode->i_ctime = CURRENT_TIME; @@ -420,7 +417,6 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_dir; } inc_nlink(old_inode); - mark_inode_dirty(old_inode); err = nilfs_add_link(new_dentry, old_inode); if (err) { drop_nlink(old_inode); -- cgit v0.10.2 From 4cd76c3c930993cf70657775bb521cad006e37b4 Mon Sep 17 00:00:00 2001 From: Jiro SEKIBA Date: Fri, 27 Nov 2009 19:41:10 +0900 Subject: nilfs2: split nilfs_unlink as nilfs_do_unlink and nilfs_unlink Split nilfs_unlink() to reduce nested transaction and duplicate mark_inode_dirty() calls when calling nilfs_unlink() from nilfs_rmdir(). nilfs_do_unlink() is an actual unlink functionality which is not in transaction and does not call mark_inode_dirty() for dentry argument. nilfs_unlink() is a wrapper function for do_nilfs_unlink() with transaction and mark_inode_dirty() for dentry argument. Signed-off-by: Jiro SEKIBA Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 35f59da..d92e839 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -288,18 +288,13 @@ out_dir: goto out; } -static int nilfs_unlink(struct inode *dir, struct dentry *dentry) +static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode; struct nilfs_dir_entry *de; struct page *page; - struct nilfs_transaction_info ti; int err; - err = nilfs_transaction_begin(dir->i_sb, &ti, 0); - if (err) - return err; - err = -ENOENT; de = nilfs_find_entry(dir, dentry, &page); if (!de) @@ -322,12 +317,26 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry) inode->i_ctime = dir->i_ctime; drop_nlink(inode); - mark_inode_dirty(inode); err = 0; out: - if (!err) + return err; +} + +static int nilfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct nilfs_transaction_info ti; + int err; + + err = nilfs_transaction_begin(dir->i_sb, &ti, 0); + if (err) + return err; + + err = nilfs_do_unlink(dir, dentry); + + if (!err) { + mark_inode_dirty(dentry->d_inode); err = nilfs_transaction_commit(dir->i_sb); - else + } else nilfs_transaction_abort(dir->i_sb); return err; @@ -345,7 +354,7 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) err = -ENOTEMPTY; if (nilfs_empty_dir(inode)) { - err = nilfs_unlink(dir, dentry); + err = nilfs_do_unlink(dir, dentry); if (!err) { inode->i_size = 0; drop_nlink(inode); -- cgit v0.10.2 From 2093abf9cbcec3cb1409a67d8bce51854595b1d5 Mon Sep 17 00:00:00 2001 From: Jiro SEKIBA Date: Fri, 27 Nov 2009 19:41:11 +0900 Subject: nilfs2: change return type of nilfs_commit_chunk change return type of nilfs_commit_chunk() as void from int, for nilfs_set_file_dirty() usually does not return error. This is an intermediate patch to reduce mark_inode_dirty() in nilfs_commit_chunk(). Signed-off-by: Jiro SEKIBA Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 4f3fa00..173530d 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -99,9 +99,9 @@ static int nilfs_prepare_chunk(struct page *page, NULL, nilfs_get_block); } -static int nilfs_commit_chunk(struct page *page, - struct address_space *mapping, - unsigned from, unsigned to) +static void nilfs_commit_chunk(struct page *page, + struct address_space *mapping, + unsigned from, unsigned to) { struct inode *dir = mapping->host; struct nilfs_sb_info *sbi = NILFS_SB(dir->i_sb); @@ -119,8 +119,8 @@ static int nilfs_commit_chunk(struct page *page, if (IS_DIRSYNC(dir)) nilfs_set_transaction_flag(NILFS_TI_SYNC); err = nilfs_set_file_dirty(sbi, dir, nr_dirty); + WARN_ON(err); /* do not happen */ unlock_page(page); - return err; } static void nilfs_check_page(struct page *page) @@ -455,7 +455,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, BUG_ON(err); de->inode = cpu_to_le64(inode->i_ino); nilfs_set_de_type(de, inode); - err = nilfs_commit_chunk(page, mapping, from, to); + nilfs_commit_chunk(page, mapping, from, to); nilfs_put_page(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; /* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ @@ -547,7 +547,7 @@ got_it: memcpy(de->name, name, namelen); de->inode = cpu_to_le64(inode->i_ino); nilfs_set_de_type(de, inode); - err = nilfs_commit_chunk(page, page->mapping, from, to); + nilfs_commit_chunk(page, page->mapping, from, to); dir->i_mtime = dir->i_ctime = CURRENT_TIME; /* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ mark_inode_dirty(dir); @@ -594,7 +594,7 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) if (pde) pde->rec_len = cpu_to_le16(to - from); dir->inode = 0; - err = nilfs_commit_chunk(page, mapping, from, to); + nilfs_commit_chunk(page, mapping, from, to); inode->i_ctime = inode->i_mtime = CURRENT_TIME; /* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */ mark_inode_dirty(inode); @@ -639,7 +639,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) memcpy(de->name, "..\0", 4); nilfs_set_de_type(de, inode); kunmap_atomic(kaddr, KM_USER0); - err = nilfs_commit_chunk(page, mapping, 0, chunk_size); + nilfs_commit_chunk(page, mapping, 0, chunk_size); fail: page_cache_release(page); return err; -- cgit v0.10.2 From 58d55471cb1911f7493aba7bf3b6b87ca91e4314 Mon Sep 17 00:00:00 2001 From: Jiro SEKIBA Date: Fri, 27 Nov 2009 19:41:12 +0900 Subject: nilfs2: delete mark_inode_dirty in nilfs_commit_chunk Delete mark_inode_dirty() in nilfs_commit_chunk(), for callers of nilfs_commit_chunk() will call equivalent mark_inode_dirty() after calling nilfs_commit_chunk(). Signed-off-by: Jiro SEKIBA Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 173530d..693539b 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -112,10 +112,8 @@ static void nilfs_commit_chunk(struct page *page, nr_dirty = nilfs_page_count_clean_buffers(page, from, to); copied = block_write_end(NULL, mapping, pos, len, len, page, NULL); - if (pos + copied > dir->i_size) { + if (pos + copied > dir->i_size) i_size_write(dir, pos + copied); - mark_inode_dirty(dir); - } if (IS_DIRSYNC(dir)) nilfs_set_transaction_flag(NILFS_TI_SYNC); err = nilfs_set_file_dirty(sbi, dir, nr_dirty); diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index d92e839..d6aa8f4 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -258,7 +258,6 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) inode->i_mapping->a_ops = &nilfs_aops; inc_nlink(inode); - mark_inode_dirty(inode); err = nilfs_make_empty(inode, dir); if (err) @@ -268,6 +267,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_fail; + mark_inode_dirty(inode); d_instantiate(dentry, inode); out: if (!err) -- cgit v0.10.2 From 3534573b58fd7576d3dc8dd66a9973592ac08b2d Mon Sep 17 00:00:00 2001 From: Jiro SEKIBA Date: Fri, 27 Nov 2009 19:41:13 +0900 Subject: nilfs2: delete mark_inode_dirty in nilfs_delete_entry Delete mark_inode_dirty() in nilfs_delete_entry() to reduce duplicate mark_inode_dirty() calls both in nilfs_rename() and nilfs_delete_entry(). Signed-off-by: Jiro SEKIBA Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 693539b..1d9a4e4 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -595,7 +595,6 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) nilfs_commit_chunk(page, mapping, from, to); inode->i_ctime = inode->i_mtime = CURRENT_TIME; /* NILFS_I(inode)->i_flags &= ~NILFS_BTREE_FL; */ - mark_inode_dirty(inode); out: nilfs_put_page(page); return err; diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index d6aa8f4..4237722b5 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -334,6 +334,7 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry) err = nilfs_do_unlink(dir, dentry); if (!err) { + mark_inode_dirty(dir); mark_inode_dirty(dentry->d_inode); err = nilfs_transaction_commit(dir->i_sb); } else @@ -450,8 +451,8 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, if (dir_de) { nilfs_set_link(old_inode, dir_de, dir_page, new_dir); drop_nlink(old_dir); - mark_inode_dirty(old_dir); } + mark_inode_dirty(old_dir); mark_inode_dirty(old_inode); err = nilfs_transaction_commit(old_dir->i_sb); -- cgit v0.10.2 From abdb318b79d387a723af5db2aa79f812cefd0797 Mon Sep 17 00:00:00 2001 From: Jiro SEKIBA Date: Fri, 27 Nov 2009 19:41:14 +0900 Subject: nilfs2: replace mark_inode_dirty as nilfs_mark_inode_dirty Replace mark_inode_dirty() as nilfs_mark_inode_dirty() to reduce deep function calls. Signed-off-by: Jiro SEKIBA Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 1d9a4e4..76d803e 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -548,7 +548,7 @@ got_it: nilfs_commit_chunk(page, page->mapping, from, to); dir->i_mtime = dir->i_ctime = CURRENT_TIME; /* NILFS_I(dir)->i_flags &= ~NILFS_BTREE_FL; */ - mark_inode_dirty(dir); + nilfs_mark_inode_dirty(dir); /* OFFSET_CACHE */ out_put: nilfs_put_page(page); diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index ede0375..7868cc1 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -97,7 +97,7 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff, nilfs_transaction_abort(inode->i_sb); goto out; } - mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); nilfs_transaction_commit(inode->i_sb); /* never fails */ /* Error handling should be detailed */ set_buffer_new(bh_result); @@ -598,7 +598,7 @@ void nilfs_truncate(struct inode *inode) if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); - mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); nilfs_set_file_dirty(NILFS_SB(sb), inode, 0); nilfs_transaction_commit(sb); /* May construct a logical segment and may fail in sync mode. @@ -623,7 +623,7 @@ void nilfs_delete_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); nilfs_truncate_bmap(ii, 0); - mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); nilfs_free_inode(inode); /* nilfs_free_inode() marks inode buffer dirty */ if (IS_SYNC(inode)) diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 4237722b5..07ba838 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -120,7 +120,7 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, int mode, inode->i_op = &nilfs_file_inode_operations; inode->i_fop = &nilfs_file_operations; inode->i_mapping->a_ops = &nilfs_aops; - mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); err = nilfs_add_nondir(dentry, inode); } if (!err) @@ -148,7 +148,7 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) err = PTR_ERR(inode); if (!IS_ERR(inode)) { init_special_inode(inode, inode->i_mode, rdev); - mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); err = nilfs_add_nondir(dentry, inode); } if (!err) @@ -201,7 +201,7 @@ out: out_fail: drop_nlink(inode); - mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); iput(inode); goto out; } @@ -267,7 +267,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) if (err) goto out_fail; - mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); d_instantiate(dentry, inode); out: if (!err) @@ -280,11 +280,11 @@ out: out_fail: drop_nlink(inode); drop_nlink(inode); - mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); iput(inode); out_dir: drop_nlink(dir); - mark_inode_dirty(dir); + nilfs_mark_inode_dirty(dir); goto out; } @@ -334,8 +334,8 @@ static int nilfs_unlink(struct inode *dir, struct dentry *dentry) err = nilfs_do_unlink(dir, dentry); if (!err) { - mark_inode_dirty(dir); - mark_inode_dirty(dentry->d_inode); + nilfs_mark_inode_dirty(dir); + nilfs_mark_inode_dirty(dentry->d_inode); err = nilfs_transaction_commit(dir->i_sb); } else nilfs_transaction_abort(dir->i_sb); @@ -359,9 +359,9 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry) if (!err) { inode->i_size = 0; drop_nlink(inode); - mark_inode_dirty(inode); + nilfs_mark_inode_dirty(inode); drop_nlink(dir); - mark_inode_dirty(dir); + nilfs_mark_inode_dirty(dir); } } if (!err) @@ -414,12 +414,12 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, goto out_dir; inc_nlink(old_inode); nilfs_set_link(new_dir, new_de, new_page, old_inode); - mark_inode_dirty(new_dir); + nilfs_mark_inode_dirty(new_dir); new_inode->i_ctime = CURRENT_TIME; if (dir_de) drop_nlink(new_inode); drop_nlink(new_inode); - mark_inode_dirty(new_inode); + nilfs_mark_inode_dirty(new_inode); } else { if (dir_de) { err = -EMLINK; @@ -430,12 +430,12 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, err = nilfs_add_link(new_dentry, old_inode); if (err) { drop_nlink(old_inode); - mark_inode_dirty(old_inode); + nilfs_mark_inode_dirty(old_inode); goto out_dir; } if (dir_de) { inc_nlink(new_dir); - mark_inode_dirty(new_dir); + nilfs_mark_inode_dirty(new_dir); } } @@ -452,8 +452,8 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry, nilfs_set_link(old_inode, dir_de, dir_page, new_dir); drop_nlink(old_dir); } - mark_inode_dirty(old_dir); - mark_inode_dirty(old_inode); + nilfs_mark_inode_dirty(old_dir); + nilfs_mark_inode_dirty(old_inode); err = nilfs_transaction_commit(old_dir->i_sb); return err; -- cgit v0.10.2 From 0935db747739782fc779eb58529610c12db88ea2 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 29 Nov 2009 02:39:11 +0900 Subject: nilfs2: use list_splice_tail or list_splice_tail_init This applies list_splice_tail (or list_splice_tail_init) operation instead of list_splice (or list_splice_init, respectively) to append a new list to tail of an existing list. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 139b113..7cdd98b 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -2018,7 +2018,7 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap, for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < NILFS_BTREE_LEVEL_MAX; level++) - list_splice(&lists[level], listp->prev); + list_splice_tail(&lists[level], listp); } static int nilfs_btree_assign_p(struct nilfs_btree *btree, diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index 6d5412e..c9c96c7 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -925,7 +925,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, super_root_found: /* Updating pointers relating to the latest checkpoint */ - list_splice(&segments, ri->ri_used_segments.prev); + list_splice_tail(&segments, &ri->ri_used_segments); nilfs->ns_last_pseg = sr_pseg_start; nilfs->ns_last_seq = nilfs->ns_seg_seq; nilfs->ns_last_cno = ri->ri_cno; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 097f9c4..79acf4d 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1363,7 +1363,7 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, list_add_tail(&segbuf->sb_list, &list); prev = segbuf; } - list_splice(&list, sci->sc_segbufs.prev); + list_splice_tail(&list, &sci->sc_segbufs); return 0; failed_segbuf: @@ -2532,7 +2532,7 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, sci->sc_freesegs = kbufs[4]; sci->sc_nfreesegs = argv[4].v_nmembs; - list_splice_init(&nilfs->ns_gc_inodes, sci->sc_gc_inodes.prev); + list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes); for (;;) { nilfs_segctor_accept(sci, &req); -- cgit v0.10.2 From 5f1586d0dd8f6eeecf6c0d35cbca6291afd6f1cc Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 29 Nov 2009 19:14:17 +0900 Subject: nilfs2: do not return io error for bio allocation failure Previously, log writer had possibility to set an io error flag on segments even in case of memory allocation failure. This fixes the issue. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index e6d9e37..c71b689 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -374,7 +374,7 @@ int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, struct nilfs_write_info *wi) { struct buffer_head *bh; - int res, rw = WRITE; + int res = 0, rw = WRITE; list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { res = nilfs_submit_bh(wi, bh, rw); @@ -395,17 +395,10 @@ int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, */ rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); res = nilfs_submit_seg_bio(wi, rw); - if (unlikely(res)) - goto failed_bio; } - res = 0; - out: - return res; - failed_bio: - atomic_inc(&wi->err); - goto out; + return res; } /** -- cgit v0.10.2 From 9284ad2a9016ad631460caf8fd01fc21d84f118c Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Wed, 25 Nov 2009 01:04:21 +0900 Subject: nilfs2: relocate io status variables to segment buffer This moves io status variables in nilfs_write_info struct to nilfs_segment_buffer struct. This is a preparation to hide nilfs_write_info in segment buffer code. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index c71b689..d86056c 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -63,6 +63,11 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *sb) INIT_LIST_HEAD(&segbuf->sb_list); INIT_LIST_HEAD(&segbuf->sb_segsum_buffers); INIT_LIST_HEAD(&segbuf->sb_payload_buffers); + + init_completion(&segbuf->sb_bio_event); + atomic_set(&segbuf->sb_err, 0); + segbuf->sb_nbio = 0; + return segbuf; } @@ -132,8 +137,6 @@ int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags, segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary); segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0; segbuf->sb_sum.ctime = ctime; - - segbuf->sb_io_error = 0; return 0; } @@ -247,7 +250,7 @@ void nilfs_release_buffers(struct list_head *list) static void nilfs_end_bio_write(struct bio *bio, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - struct nilfs_write_info *wi = bio->bi_private; + struct nilfs_segment_buffer *segbuf = bio->bi_private; if (err == -EOPNOTSUPP) { set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); @@ -256,21 +259,22 @@ static void nilfs_end_bio_write(struct bio *bio, int err) } if (!uptodate) - atomic_inc(&wi->err); + atomic_inc(&segbuf->sb_err); bio_put(bio); - complete(&wi->bio_event); + complete(&segbuf->sb_bio_event); } -static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) +static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, + struct nilfs_write_info *wi, int mode) { struct bio *bio = wi->bio; int err; - if (wi->nbio > 0 && bdi_write_congested(wi->bdi)) { - wait_for_completion(&wi->bio_event); - wi->nbio--; - if (unlikely(atomic_read(&wi->err))) { + if (segbuf->sb_nbio > 0 && bdi_write_congested(wi->bdi)) { + wait_for_completion(&segbuf->sb_bio_event); + segbuf->sb_nbio--; + if (unlikely(atomic_read(&segbuf->sb_err))) { bio_put(bio); err = -EIO; goto failed; @@ -278,7 +282,7 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) } bio->bi_end_io = nilfs_end_bio_write; - bio->bi_private = wi; + bio->bi_private = segbuf; bio_get(bio); submit_bio(mode, bio); if (bio_flagged(bio, BIO_EOPNOTSUPP)) { @@ -286,7 +290,7 @@ static int nilfs_submit_seg_bio(struct nilfs_write_info *wi, int mode) err = -EOPNOTSUPP; goto failed; } - wi->nbio++; + segbuf->sb_nbio++; bio_put(bio); wi->bio = NULL; @@ -336,15 +340,12 @@ void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, wi->max_pages = bio_get_nr_vecs(wi->sb->s_bdev); wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); wi->start = wi->end = 0; - wi->nbio = 0; wi->blocknr = segbuf->sb_pseg_start; - - atomic_set(&wi->err, 0); - init_completion(&wi->bio_event); } -static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh, - int mode) +static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, + struct nilfs_write_info *wi, + struct buffer_head *bh, int mode) { int len, err; @@ -363,7 +364,7 @@ static int nilfs_submit_bh(struct nilfs_write_info *wi, struct buffer_head *bh, return 0; } /* bio is FULL */ - err = nilfs_submit_seg_bio(wi, mode); + err = nilfs_segbuf_submit_bio(segbuf, wi, mode); /* never submit current bh */ if (likely(!err)) goto repeat; @@ -377,13 +378,13 @@ int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, int res = 0, rw = WRITE; list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { - res = nilfs_submit_bh(wi, bh, rw); + res = nilfs_segbuf_submit_bh(segbuf, wi, bh, rw); if (unlikely(res)) goto failed_bio; } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - res = nilfs_submit_bh(wi, bh, rw); + res = nilfs_segbuf_submit_bh(segbuf, wi, bh, rw); if (unlikely(res)) goto failed_bio; } @@ -394,7 +395,7 @@ int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, * submission. */ rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); - res = nilfs_submit_seg_bio(wi, rw); + res = nilfs_segbuf_submit_bio(segbuf, wi, rw); } failed_bio: @@ -403,29 +404,27 @@ int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, /** * nilfs_segbuf_wait - wait for completion of requested BIOs - * @wi: nilfs_write_info + * @segbuf: segment buffer * * Return Value: On Success, 0 is returned. On Error, one of the following * negative error code is returned. * * %-EIO - I/O error */ -int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf, - struct nilfs_write_info *wi) +int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf) { int err = 0; - if (!wi->nbio) + if (!segbuf->sb_nbio) return 0; do { - wait_for_completion(&wi->bio_event); - } while (--wi->nbio > 0); + wait_for_completion(&segbuf->sb_bio_event); + } while (--segbuf->sb_nbio > 0); - if (unlikely(atomic_read(&wi->err) > 0)) { + if (unlikely(atomic_read(&segbuf->sb_err) > 0)) { printk(KERN_ERR "NILFS: IO error writing segment\n"); err = -EIO; - segbuf->sb_io_error = 1; } return err; } diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index 0c3076f..bd076cc 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -77,7 +77,9 @@ struct nilfs_segsum_info { * @sb_rest_blocks: Number of residual blocks in the current segment * @sb_segsum_buffers: List of buffers for segment summaries * @sb_payload_buffers: List of buffers for segment payload - * @sb_io_error: I/O error status + * @sb_nbio: Number of flying bio requests + * @sb_err: I/O error status + * @sb_bio_event: Completion event of log writing */ struct nilfs_segment_buffer { struct super_block *sb_super; @@ -96,7 +98,9 @@ struct nilfs_segment_buffer { struct list_head sb_payload_buffers; /* including super root */ /* io status */ - int sb_io_error; + int sb_nbio; + atomic_t sb_err; + struct completion sb_bio_event; }; #define NILFS_LIST_SEGBUF(head) \ @@ -177,11 +181,6 @@ struct nilfs_write_info { int nr_vecs; sector_t blocknr; - int nbio; - atomic_t err; - struct completion bio_event; - /* completion event of segment write */ - /* * The following fields must be set explicitly */ @@ -195,7 +194,6 @@ void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *, struct nilfs_write_info *); int nilfs_segbuf_write(struct nilfs_segment_buffer *, struct nilfs_write_info *); -int nilfs_segbuf_wait(struct nilfs_segment_buffer *, - struct nilfs_write_info *); +int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf); #endif /* _NILFS_SEGBUF_H */ diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 79acf4d..cb004eb 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1382,14 +1382,14 @@ static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { struct nilfs_segment_buffer *segbuf; - int ret, done = 0; + int ret; segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); if (nilfs->ns_nextnum != segbuf->sb_nextnum) { ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); WARN_ON(ret); /* never fails */ } - if (segbuf->sb_io_error) { + if (atomic_read(&segbuf->sb_err)) { /* Case 1: The first segment failed */ if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) /* Case 1a: Partial segment appended into an existing @@ -1398,19 +1398,16 @@ static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, segbuf->sb_fseg_end); else /* Case 1b: New full segment */ set_nilfs_discontinued(nilfs); - done++; } list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); WARN_ON(ret); /* never fails */ - if (!done && segbuf->sb_io_error) { - if (segbuf->sb_segnum != nilfs->ns_nextnum) - /* Case 2: extended segment (!= next) failed */ - nilfs_sufile_set_error(nilfs->ns_sufile, - segbuf->sb_segnum); - done++; - } + if (atomic_read(&segbuf->sb_err) && + segbuf->sb_segnum != nilfs->ns_nextnum) + /* Case 2: extended segment (!= next) failed */ + nilfs_sufile_set_error(nilfs->ns_sufile, + segbuf->sb_segnum); } } @@ -1801,7 +1798,7 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci, nilfs_segbuf_prepare_write(segbuf, &wi); err = nilfs_segbuf_write(segbuf, &wi); - res = nilfs_segbuf_wait(segbuf, &wi); + res = nilfs_segbuf_wait(segbuf); err = err ? : res; if (err) return err; -- cgit v0.10.2 From 9c965bac169f786cc6cca8ff81d3b636e923c960 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 29 Nov 2009 01:17:31 +0900 Subject: nilfs2: hide nilfs_write_info struct in segment buffer code Hides nilfs_write_info struct and nilfs_segbuf_prepare_write function in segbuf.c to simplify the interface of nilfs_segbuf_write function. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index d86056c..636590c 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -24,10 +24,22 @@ #include #include #include +#include #include "page.h" #include "segbuf.h" +struct nilfs_write_info { + struct the_nilfs *nilfs; + struct bio *bio; + int start, end; /* The region to be submitted */ + int rest_blocks; + int max_pages; + int nr_vecs; + sector_t blocknr; +}; + + static struct kmem_cache *nilfs_segbuf_cachep; static void nilfs_segbuf_init_once(void *obj) @@ -271,7 +283,7 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, struct bio *bio = wi->bio; int err; - if (segbuf->sb_nbio > 0 && bdi_write_congested(wi->bdi)) { + if (segbuf->sb_nbio > 0 && bdi_write_congested(wi->nilfs->ns_bdi)) { wait_for_completion(&segbuf->sb_bio_event); segbuf->sb_nbio--; if (unlikely(atomic_read(&segbuf->sb_err))) { @@ -305,17 +317,15 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, } /** - * nilfs_alloc_seg_bio - allocate a bio for writing segment. - * @sb: super block - * @start: beginning disk block number of this BIO. + * nilfs_alloc_seg_bio - allocate a new bio for writing log + * @nilfs: nilfs object + * @start: start block number of the bio * @nr_vecs: request size of page vector. * - * alloc_seg_bio() allocates a new BIO structure and initialize it. - * * Return Value: On success, pointer to the struct bio is returned. * On error, NULL is returned. */ -static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start, +static struct bio *nilfs_alloc_seg_bio(struct the_nilfs *nilfs, sector_t start, int nr_vecs) { struct bio *bio; @@ -326,18 +336,18 @@ static struct bio *nilfs_alloc_seg_bio(struct super_block *sb, sector_t start, bio = bio_alloc(GFP_NOIO, nr_vecs); } if (likely(bio)) { - bio->bi_bdev = sb->s_bdev; - bio->bi_sector = (sector_t)start << (sb->s_blocksize_bits - 9); + bio->bi_bdev = nilfs->ns_bdev; + bio->bi_sector = start << (nilfs->ns_blocksize_bits - 9); } return bio; } -void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, - struct nilfs_write_info *wi) +static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf, + struct nilfs_write_info *wi) { wi->bio = NULL; wi->rest_blocks = segbuf->sb_sum.nblocks; - wi->max_pages = bio_get_nr_vecs(wi->sb->s_bdev); + wi->max_pages = bio_get_nr_vecs(wi->nilfs->ns_bdev); wi->nr_vecs = min(wi->max_pages, wi->rest_blocks); wi->start = wi->end = 0; wi->blocknr = segbuf->sb_pseg_start; @@ -352,7 +362,7 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, BUG_ON(wi->nr_vecs <= 0); repeat: if (!wi->bio) { - wi->bio = nilfs_alloc_seg_bio(wi->sb, wi->blocknr + wi->end, + wi->bio = nilfs_alloc_seg_bio(wi->nilfs, wi->blocknr + wi->end, wi->nr_vecs); if (unlikely(!wi->bio)) return -ENOMEM; @@ -371,31 +381,47 @@ static int nilfs_segbuf_submit_bh(struct nilfs_segment_buffer *segbuf, return err; } +/** + * nilfs_segbuf_write - submit write requests of a log + * @segbuf: buffer storing a log to be written + * @nilfs: nilfs object + * + * Return Value: On Success, 0 is returned. On Error, one of the following + * negative error code is returned. + * + * %-EIO - I/O error + * + * %-ENOMEM - Insufficient memory available. + */ int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, - struct nilfs_write_info *wi) + struct the_nilfs *nilfs) { + struct nilfs_write_info wi; struct buffer_head *bh; int res = 0, rw = WRITE; + wi.nilfs = nilfs; + nilfs_segbuf_prepare_write(segbuf, &wi); + list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { - res = nilfs_segbuf_submit_bh(segbuf, wi, bh, rw); + res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw); if (unlikely(res)) goto failed_bio; } list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - res = nilfs_segbuf_submit_bh(segbuf, wi, bh, rw); + res = nilfs_segbuf_submit_bh(segbuf, &wi, bh, rw); if (unlikely(res)) goto failed_bio; } - if (wi->bio) { + if (wi.bio) { /* * Last BIO is always sent through the following * submission. */ rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); - res = nilfs_segbuf_submit_bio(segbuf, wi, rw); + res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); } failed_bio: diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index bd076cc..241a00d 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -27,7 +27,6 @@ #include #include #include -#include /** * struct nilfs_segsum_info - On-memory segment summary @@ -173,27 +172,8 @@ static inline void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf) nilfs_release_buffers(&segbuf->sb_payload_buffers); } -struct nilfs_write_info { - struct bio *bio; - int start, end; /* The region to be submitted */ - int rest_blocks; - int max_pages; - int nr_vecs; - sector_t blocknr; - - /* - * The following fields must be set explicitly - */ - struct super_block *sb; - struct backing_dev_info *bdi; /* backing dev info */ - struct buffer_head *bh_sr; -}; - - -void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *, - struct nilfs_write_info *); -int nilfs_segbuf_write(struct nilfs_segment_buffer *, - struct nilfs_write_info *); +int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, + struct the_nilfs *nilfs); int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf); #endif /* _NILFS_SEGBUF_H */ diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index cb004eb..4422cda 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1784,19 +1784,13 @@ static int nilfs_segctor_prepare_write(struct nilfs_sc_info *sci, } static int nilfs_segctor_write(struct nilfs_sc_info *sci, - struct backing_dev_info *bdi) + struct the_nilfs *nilfs) { struct nilfs_segment_buffer *segbuf; - struct nilfs_write_info wi; int err, res; - wi.sb = sci->sc_super; - wi.bh_sr = sci->sc_super_root; - wi.bdi = bdi; - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - nilfs_segbuf_prepare_write(segbuf, &wi); - err = nilfs_segbuf_write(segbuf, &wi); + err = nilfs_segbuf_write(segbuf, nilfs); res = nilfs_segbuf_wait(segbuf); err = err ? : res; @@ -2170,7 +2164,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed); - err = nilfs_segctor_write(sci, nilfs->ns_bdi); + err = nilfs_segctor_write(sci, nilfs); if (unlikely(err)) goto failed_to_write; -- cgit v0.10.2 From e29df395bc6d2d0c89b3d8a5939a24b1b43c2fb6 Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 29 Nov 2009 16:51:16 +0900 Subject: nilfs2: add iterator for segment buffers This adds a few iterator functions for segment buffers to make it easy to handle multiple series of logs. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 636590c..d856d62 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -234,7 +234,7 @@ void nilfs_segbuf_fill_in_data_crc(struct nilfs_segment_buffer *segbuf, raw_sum->ss_datasum = cpu_to_le32(crc); } -void nilfs_release_buffers(struct list_head *list) +static void nilfs_release_buffers(struct list_head *list) { struct buffer_head *bh, *n; @@ -256,6 +256,49 @@ void nilfs_release_buffers(struct list_head *list) } } +static void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf) +{ + nilfs_release_buffers(&segbuf->sb_segsum_buffers); + nilfs_release_buffers(&segbuf->sb_payload_buffers); +} + +/* + * Iterators for segment buffers + */ +void nilfs_clear_logs(struct list_head *logs) +{ + struct nilfs_segment_buffer *segbuf; + + list_for_each_entry(segbuf, logs, sb_list) + nilfs_segbuf_clear(segbuf); +} + +void nilfs_truncate_logs(struct list_head *logs, + struct nilfs_segment_buffer *last) +{ + struct nilfs_segment_buffer *n, *segbuf; + + segbuf = list_prepare_entry(last, logs, sb_list); + list_for_each_entry_safe_continue(segbuf, n, logs, sb_list) { + list_del_init(&segbuf->sb_list); + nilfs_segbuf_clear(segbuf); + nilfs_segbuf_free(segbuf); + } +} + +int nilfs_wait_on_logs(struct list_head *logs) +{ + struct nilfs_segment_buffer *segbuf; + int err; + + list_for_each_entry(segbuf, logs, sb_list) { + err = nilfs_segbuf_wait(segbuf); + if (err) + return err; + } + return 0; +} + /* * BIO operations */ diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index 241a00d..7fbaf5e 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -164,16 +164,18 @@ nilfs_segbuf_add_file_buffer(struct nilfs_segment_buffer *segbuf, segbuf->sb_sum.nfileblk++; } -void nilfs_release_buffers(struct list_head *); - -static inline void nilfs_segbuf_clear(struct nilfs_segment_buffer *segbuf) -{ - nilfs_release_buffers(&segbuf->sb_segsum_buffers); - nilfs_release_buffers(&segbuf->sb_payload_buffers); -} - int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, struct the_nilfs *nilfs); int nilfs_segbuf_wait(struct nilfs_segment_buffer *segbuf); +void nilfs_clear_logs(struct list_head *logs); +void nilfs_truncate_logs(struct list_head *logs, + struct nilfs_segment_buffer *last); +int nilfs_wait_on_logs(struct list_head *logs); + +static inline void nilfs_destroy_logs(struct list_head *logs) +{ + nilfs_truncate_logs(logs, NULL); +} + #endif /* _NILFS_SEGBUF_H */ diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 4422cda..689deb9 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1276,7 +1276,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { - struct nilfs_segment_buffer *segbuf, *n; + struct nilfs_segment_buffer *segbuf; __u64 nextnum; int err; @@ -1313,18 +1313,14 @@ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); /* truncating segment buffers */ - list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, - sb_list) { - list_del_init(&segbuf->sb_list); - nilfs_segbuf_free(segbuf); - } + nilfs_truncate_logs(&sci->sc_segbufs, segbuf); return 0; } static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, struct the_nilfs *nilfs, int nadd) { - struct nilfs_segment_buffer *segbuf, *prev, *n; + struct nilfs_segment_buffer *segbuf, *prev; struct inode *sufile = nilfs->ns_sufile; __u64 nextnextnum; LIST_HEAD(list); @@ -1369,12 +1365,11 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, failed_segbuf: nilfs_segbuf_free(segbuf); failed: - list_for_each_entry_safe(segbuf, n, &list, sb_list) { + list_for_each_entry(segbuf, &list, sb_list) { ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); WARN_ON(ret); /* never fails */ - list_del_init(&segbuf->sb_list); - nilfs_segbuf_free(segbuf); } + nilfs_destroy_logs(&list); return err; } @@ -1411,27 +1406,6 @@ static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, } } -static void nilfs_segctor_clear_segment_buffers(struct nilfs_sc_info *sci) -{ - struct nilfs_segment_buffer *segbuf; - - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) - nilfs_segbuf_clear(segbuf); - sci->sc_super_root = NULL; -} - -static void nilfs_segctor_destroy_segment_buffers(struct nilfs_sc_info *sci) -{ - struct nilfs_segment_buffer *segbuf; - - while (!list_empty(&sci->sc_segbufs)) { - segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); - list_del_init(&segbuf->sb_list); - nilfs_segbuf_free(segbuf); - } - /* sci->sc_curseg = NULL; */ -} - static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci, struct the_nilfs *nilfs, int err) { @@ -1447,7 +1421,8 @@ static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci, WARN_ON(ret); /* do not happen */ } } - nilfs_segctor_clear_segment_buffers(sci); + nilfs_clear_logs(&sci->sc_segbufs); + sci->sc_super_root = NULL; } static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, @@ -1490,17 +1465,15 @@ static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci, struct nilfs_segment_buffer *last, struct inode *sufile) { - struct nilfs_segment_buffer *segbuf = last, *n; + struct nilfs_segment_buffer *segbuf = last; int ret; - list_for_each_entry_safe_continue(segbuf, n, &sci->sc_segbufs, - sb_list) { - list_del_init(&segbuf->sb_list); + list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); WARN_ON(ret); - nilfs_segbuf_free(segbuf); } + nilfs_truncate_logs(&sci->sc_segbufs, last); } @@ -1539,7 +1512,7 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, NULL); WARN_ON(err); /* do not happen */ } - nilfs_segctor_clear_segment_buffers(sci); + nilfs_clear_logs(&sci->sc_segbufs); err = nilfs_segctor_extend_segments(sci, nilfs, nadd); if (unlikely(err)) @@ -2179,7 +2152,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) } while (sci->sc_stage.scnt != NILFS_ST_DONE); out: - nilfs_segctor_destroy_segment_buffers(sci); + nilfs_destroy_logs(&sci->sc_segbufs); nilfs_segctor_check_out_files(sci, sbi); return err; -- cgit v0.10.2 From a694291a6211537189c6080f77f63cdabfc9b63e Mon Sep 17 00:00:00 2001 From: Ryusuke Konishi Date: Sun, 29 Nov 2009 23:03:04 +0900 Subject: nilfs2: separate wait function from nilfs_segctor_write This separates wait function for submitted logs from the write function nilfs_segctor_write(). A new list of segment buffers "sc_write_logs" is added to hold logs under writing, and double buffering is partially applied to hide io latency. At this point, the double buffering is disabled for blocksize < pagesize because page dirty flag is turned off during write and dirty buffers are not properly collected for pages crossing over segments. To receive full benefit of the double buffering, further refinement is needed to move the io wait outside the lock section of log writer. Signed-off-by: Ryusuke Konishi diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index d856d62..645c786 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -100,6 +100,22 @@ void nilfs_segbuf_map(struct nilfs_segment_buffer *segbuf, __u64 segnum, segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; } +/** + * nilfs_segbuf_map_cont - map a new log behind a given log + * @segbuf: new segment buffer + * @prev: segment buffer containing a log to be continued + */ +void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf, + struct nilfs_segment_buffer *prev) +{ + segbuf->sb_segnum = prev->sb_segnum; + segbuf->sb_fseg_start = prev->sb_fseg_start; + segbuf->sb_fseg_end = prev->sb_fseg_end; + segbuf->sb_pseg_start = prev->sb_pseg_start + prev->sb_sum.nblocks; + segbuf->sb_rest_blocks = + segbuf->sb_fseg_end - segbuf->sb_pseg_start + 1; +} + void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *segbuf, __u64 nextnum, struct the_nilfs *nilfs) { diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index 7fbaf5e..6af1630 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -128,6 +128,8 @@ struct nilfs_segment_buffer *nilfs_segbuf_new(struct super_block *); void nilfs_segbuf_free(struct nilfs_segment_buffer *); void nilfs_segbuf_map(struct nilfs_segment_buffer *, __u64, unsigned long, struct the_nilfs *); +void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf, + struct nilfs_segment_buffer *prev); void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64, struct the_nilfs *); int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t); diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 689deb9..17584c5 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1273,48 +1273,69 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) return err; } +/** + * nilfs_segctor_begin_construction - setup segment buffer to make a new log + * @sci: nilfs_sc_info + * @nilfs: nilfs object + */ static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { - struct nilfs_segment_buffer *segbuf; + struct nilfs_segment_buffer *segbuf, *prev; __u64 nextnum; - int err; + int err, alloc = 0; - if (list_empty(&sci->sc_segbufs)) { - segbuf = nilfs_segbuf_new(sci->sc_super); - if (unlikely(!segbuf)) - return -ENOMEM; - list_add(&segbuf->sb_list, &sci->sc_segbufs); - } else - segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + segbuf = nilfs_segbuf_new(sci->sc_super); + if (unlikely(!segbuf)) + return -ENOMEM; - nilfs_segbuf_map(segbuf, nilfs->ns_segnum, nilfs->ns_pseg_offset, - nilfs); + if (list_empty(&sci->sc_write_logs)) { + nilfs_segbuf_map(segbuf, nilfs->ns_segnum, + nilfs->ns_pseg_offset, nilfs); + if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { + nilfs_shift_to_next_segment(nilfs); + nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); + } - if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { - nilfs_shift_to_next_segment(nilfs); - nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); + segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; + nextnum = nilfs->ns_nextnum; + + if (nilfs->ns_segnum == nilfs->ns_nextnum) + /* Start from the head of a new full segment */ + alloc++; + } else { + /* Continue logs */ + prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs); + nilfs_segbuf_map_cont(segbuf, prev); + segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq; + nextnum = prev->sb_nextnum; + + if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { + nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); + segbuf->sb_sum.seg_seq++; + alloc++; + } } - sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum); - if (unlikely(err)) - return err; + if (err) + goto failed; - if (nilfs->ns_segnum == nilfs->ns_nextnum) { - /* Start from the head of a new full segment */ + if (alloc) { err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum); - if (unlikely(err)) - return err; - } else - nextnum = nilfs->ns_nextnum; - - segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; + if (err) + goto failed; + } nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); - /* truncating segment buffers */ - nilfs_truncate_logs(&sci->sc_segbufs, segbuf); + BUG_ON(!list_empty(&sci->sc_segbufs)); + list_add_tail(&segbuf->sb_list, &sci->sc_segbufs); + sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; return 0; + + failed: + nilfs_segbuf_free(segbuf); + return err; } static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, @@ -1373,15 +1394,16 @@ static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, return err; } -static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs) +static void nilfs_free_incomplete_logs(struct list_head *logs, + struct the_nilfs *nilfs) { - struct nilfs_segment_buffer *segbuf; + struct nilfs_segment_buffer *segbuf, *prev; + struct inode *sufile = nilfs->ns_sufile; int ret; - segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + segbuf = NILFS_FIRST_SEGBUF(logs); if (nilfs->ns_nextnum != segbuf->sb_nextnum) { - ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); + ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); WARN_ON(ret); /* never fails */ } if (atomic_read(&segbuf->sb_err)) { @@ -1395,34 +1417,18 @@ static void nilfs_segctor_free_incomplete_segments(struct nilfs_sc_info *sci, set_nilfs_discontinued(nilfs); } - list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { - ret = nilfs_sufile_free(nilfs->ns_sufile, segbuf->sb_nextnum); - WARN_ON(ret); /* never fails */ + prev = segbuf; + list_for_each_entry_continue(segbuf, logs, sb_list) { + if (prev->sb_nextnum != segbuf->sb_nextnum) { + ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); + WARN_ON(ret); /* never fails */ + } if (atomic_read(&segbuf->sb_err) && segbuf->sb_segnum != nilfs->ns_nextnum) /* Case 2: extended segment (!= next) failed */ - nilfs_sufile_set_error(nilfs->ns_sufile, - segbuf->sb_segnum); - } -} - -static void nilfs_segctor_end_construction(struct nilfs_sc_info *sci, - struct the_nilfs *nilfs, int err) -{ - if (unlikely(err)) { - nilfs_segctor_free_incomplete_segments(sci, nilfs); - if (sci->sc_stage.flags & NILFS_CF_SUFREED) { - int ret; - - ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, - sci->sc_freesegs, - sci->sc_nfreesegs, - NULL); - WARN_ON(ret); /* do not happen */ - } + nilfs_sufile_set_error(sufile, segbuf->sb_segnum); + prev = segbuf; } - nilfs_clear_logs(&sci->sc_segbufs); - sci->sc_super_root = NULL; } static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, @@ -1442,19 +1448,18 @@ static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, } } -static void nilfs_segctor_cancel_segusage(struct nilfs_sc_info *sci, - struct inode *sufile) +static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile) { struct nilfs_segment_buffer *segbuf; int ret; - segbuf = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); + segbuf = NILFS_FIRST_SEGBUF(logs); ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, segbuf->sb_pseg_start - segbuf->sb_fseg_start, 0); WARN_ON(ret); /* always succeed because the segusage is dirty */ - list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { + list_for_each_entry_continue(segbuf, logs, sb_list) { ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, 0, 0); WARN_ON(ret); /* always succeed */ @@ -1760,17 +1765,15 @@ static int nilfs_segctor_write(struct nilfs_sc_info *sci, struct the_nilfs *nilfs) { struct nilfs_segment_buffer *segbuf; - int err, res; + int ret = 0; list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - err = nilfs_segbuf_write(segbuf, nilfs); - - res = nilfs_segbuf_wait(segbuf); - err = err ? : res; - if (err) - return err; + ret = nilfs_segbuf_write(segbuf, nilfs); + if (ret) + break; } - return 0; + list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs); + return ret; } static void __nilfs_end_page_io(struct page *page, int err) @@ -1848,15 +1851,17 @@ static void nilfs_clear_copied_buffers(struct list_head *list, int err) } } -static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, - struct page *failed_page, int err) +static void nilfs_abort_logs(struct list_head *logs, struct page *failed_page, + struct buffer_head *bh_sr, int err) { struct nilfs_segment_buffer *segbuf; struct page *bd_page = NULL, *fs_page = NULL; + struct buffer_head *bh; - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { - struct buffer_head *bh; + if (list_empty(logs)) + return; + list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { if (bh->b_page != bd_page) { @@ -1868,7 +1873,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { - if (bh == sci->sc_super_root) { + if (bh == bh_sr) { if (bh->b_page != bd_page) { end_page_writeback(bd_page); bd_page = bh->b_page; @@ -1878,7 +1883,7 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, if (bh->b_page != fs_page) { nilfs_end_page_io(fs_page, err); if (fs_page && fs_page == failed_page) - goto done; + return; fs_page = bh->b_page; } } @@ -1887,8 +1892,34 @@ static void nilfs_segctor_abort_write(struct nilfs_sc_info *sci, end_page_writeback(bd_page); nilfs_end_page_io(fs_page, err); - done: +} + +static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, + struct the_nilfs *nilfs, int err) +{ + LIST_HEAD(logs); + int ret; + + list_splice_tail_init(&sci->sc_write_logs, &logs); + ret = nilfs_wait_on_logs(&logs); + if (ret) + nilfs_abort_logs(&logs, NULL, sci->sc_super_root, ret); + + list_splice_tail_init(&sci->sc_segbufs, &logs); + nilfs_cancel_segusage(&logs, nilfs->ns_sufile); + nilfs_free_incomplete_logs(&logs, nilfs); nilfs_clear_copied_buffers(&sci->sc_copied_buffers, err); + + if (sci->sc_stage.flags & NILFS_CF_SUFREED) { + ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, + sci->sc_freesegs, + sci->sc_nfreesegs, + NULL); + WARN_ON(ret); /* do not happen */ + } + + nilfs_destroy_logs(&logs); + sci->sc_super_root = NULL; } static void nilfs_set_next_segment(struct the_nilfs *nilfs, @@ -1910,7 +1941,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) struct the_nilfs *nilfs = sbi->s_nilfs; int update_sr = (sci->sc_super_root != NULL); - list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { + list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) { struct buffer_head *bh; list_for_each_entry(bh, &segbuf->sb_segsum_buffers, @@ -1983,7 +2014,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) sci->sc_nblk_inc += sci->sc_nblk_this_inc; - segbuf = NILFS_LAST_SEGBUF(&sci->sc_segbufs); + segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs); nilfs_set_next_segment(nilfs, segbuf); if (update_sr) { @@ -1994,10 +2025,23 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); + nilfs_segctor_clear_metadata_dirty(sci); } else clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); } +static int nilfs_segctor_wait(struct nilfs_sc_info *sci) +{ + int ret; + + ret = nilfs_wait_on_logs(&sci->sc_write_logs); + if (!ret) { + nilfs_segctor_complete_write(sci); + nilfs_destroy_logs(&sci->sc_write_logs); + } + return ret; +} + static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, struct nilfs_sb_info *sbi) { @@ -2110,7 +2154,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) /* Avoid empty segment */ if (sci->sc_stage.scnt == NILFS_ST_DONE && NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { - nilfs_segctor_end_construction(sci, nilfs, 1); + nilfs_segctor_abort_construction(sci, nilfs, 1); goto out; } @@ -2124,7 +2168,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) if (has_sr) { err = nilfs_segctor_fill_in_checkpoint(sci); if (unlikely(err)) - goto failed_to_make_up; + goto failed_to_write; nilfs_segctor_fill_in_super_root(sci, nilfs); } @@ -2132,42 +2176,46 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) /* Write partial segments */ err = nilfs_segctor_prepare_write(sci, &failed_page); - if (unlikely(err)) + if (err) { + nilfs_abort_logs(&sci->sc_segbufs, failed_page, + sci->sc_super_root, err); goto failed_to_write; - + } nilfs_segctor_fill_in_checksums(sci, nilfs->ns_crc_seed); err = nilfs_segctor_write(sci, nilfs); if (unlikely(err)) goto failed_to_write; - nilfs_segctor_complete_write(sci); - - /* Commit segments */ - if (has_sr) - nilfs_segctor_clear_metadata_dirty(sci); - - nilfs_segctor_end_construction(sci, nilfs, 0); - + if (sci->sc_stage.scnt == NILFS_ST_DONE || + nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) { + /* + * At this point, we avoid double buffering + * for blocksize < pagesize because page dirty + * flag is turned off during write and dirty + * buffers are not properly collected for + * pages crossing over segments. + */ + err = nilfs_segctor_wait(sci); + if (err) + goto failed_to_write; + } } while (sci->sc_stage.scnt != NILFS_ST_DONE); + sci->sc_super_root = NULL; + out: - nilfs_destroy_logs(&sci->sc_segbufs); nilfs_segctor_check_out_files(sci, sbi); return err; failed_to_write: - nilfs_segctor_abort_write(sci, failed_page, err); - nilfs_segctor_cancel_segusage(sci, nilfs->ns_sufile); - - failed_to_make_up: if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) nilfs_redirty_inodes(&sci->sc_dirty_files); failed: if (nilfs_doing_gc()) nilfs_redirty_inodes(&sci->sc_gc_inodes); - nilfs_segctor_end_construction(sci, nilfs, err); + nilfs_segctor_abort_construction(sci, nilfs, err); goto out; } @@ -2725,6 +2773,7 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) spin_lock_init(&sci->sc_state_lock); INIT_LIST_HEAD(&sci->sc_dirty_files); INIT_LIST_HEAD(&sci->sc_segbufs); + INIT_LIST_HEAD(&sci->sc_write_logs); INIT_LIST_HEAD(&sci->sc_gc_inodes); INIT_LIST_HEAD(&sci->sc_copied_buffers); @@ -2792,6 +2841,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) } WARN_ON(!list_empty(&sci->sc_segbufs)); + WARN_ON(!list_empty(&sci->sc_write_logs)); down_write(&sbi->s_nilfs->ns_segctor_sem); diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 0d2a475..3d3ab2f 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -97,6 +97,7 @@ struct nilfs_segsum_pointer { * @sc_dsync_start: start byte offset of data pages * @sc_dsync_end: end byte offset of data pages (inclusive) * @sc_segbufs: List of segment buffers + * @sc_write_logs: List of segment buffers to hold logs under writing * @sc_segbuf_nblocks: Number of available blocks in segment buffers. * @sc_curseg: Current segment buffer * @sc_super_root: Pointer to the super root buffer @@ -143,6 +144,7 @@ struct nilfs_sc_info { /* Segment buffers */ struct list_head sc_segbufs; + struct list_head sc_write_logs; unsigned long sc_segbuf_nblocks; struct nilfs_segment_buffer *sc_curseg; struct buffer_head *sc_super_root; -- cgit v0.10.2