diff options
author | Michal Marek <mmarek@suse.cz> | 2010-12-14 21:01:55 (GMT) |
---|---|---|
committer | Michal Marek <mmarek@suse.cz> | 2010-12-14 21:01:55 (GMT) |
commit | 8990c1bc4be46473ad19bf2fa612ca57286f3df4 (patch) | |
tree | 3cea60576903a1d26c67e6ec62891b524d390e95 /fs/nilfs2 | |
parent | 2979076fbf17a0947d6eba367b0cac19c907c160 (diff) | |
parent | c8ddb2713c624f432fa5fe3c7ecffcdda46ea0d4 (diff) | |
download | linux-fsl-qoriq-8990c1bc4be46473ad19bf2fa612ca57286f3df4.tar.xz |
Merge commit 'v2.6.37-rc1' into kbuild/kbuild
Diffstat (limited to 'fs/nilfs2')
39 files changed, 2607 insertions, 2195 deletions
diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile index df3e62c..85c9873 100644 --- a/fs/nilfs2/Makefile +++ b/fs/nilfs2/Makefile @@ -2,4 +2,4 @@ obj-$(CONFIG_NILFS2_FS) += nilfs2.o nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \ btnode.o bmap.o btree.o direct.o dat.o recovery.o \ the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \ - ifile.o alloc.o gcinode.o ioctl.o gcdat.o + ifile.o alloc.o gcinode.o ioctl.o diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index effdbdb..8b782b0 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c @@ -26,6 +26,8 @@ #include "nilfs.h" #include "bmap.h" #include "sb.h" +#include "btree.h" +#include "direct.h" #include "btnode.h" #include "mdt.h" #include "dat.h" @@ -531,18 +533,20 @@ void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) nilfs_btree_init_gc(bmap); } -void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) +void nilfs_bmap_save(const struct nilfs_bmap *bmap, + struct nilfs_bmap_store *store) { - memcpy(gcbmap, bmap, sizeof(union nilfs_bmap_union)); - init_rwsem(&gcbmap->b_sem); - lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); - gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; + memcpy(store->data, bmap->b_u.u_data, sizeof(store->data)); + store->last_allocated_key = bmap->b_last_allocated_key; + store->last_allocated_ptr = bmap->b_last_allocated_ptr; + store->state = bmap->b_state; } -void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) +void nilfs_bmap_restore(struct nilfs_bmap *bmap, + const struct nilfs_bmap_store *store) { - memcpy(bmap, gcbmap, sizeof(union nilfs_bmap_union)); - init_rwsem(&bmap->b_sem); - lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); - bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; + memcpy(bmap->b_u.u_data, store->data, sizeof(store->data)); + bmap->b_last_allocated_key = store->last_allocated_key; + bmap->b_last_allocated_ptr = store->last_allocated_ptr; + bmap->b_state = store->state; } diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index 9980d7d..bde1c0a 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h @@ -32,11 +32,6 @@ #define NILFS_BMAP_INVALID_PTR 0 -#define nilfs_bmap_dkey_to_key(dkey) le64_to_cpu(dkey) -#define nilfs_bmap_key_to_dkey(key) cpu_to_le64(key) -#define nilfs_bmap_dptr_to_ptr(dptr) le64_to_cpu(dptr) -#define nilfs_bmap_ptr_to_dptr(ptr) cpu_to_le64(ptr) - #define nilfs_bmap_keydiff_abs(diff) ((diff) < 0 ? -(diff) : (diff)) @@ -71,7 +66,7 @@ struct nilfs_bmap_operations { int (*bop_delete)(struct nilfs_bmap *, __u64); void (*bop_clear)(struct nilfs_bmap *); - int (*bop_propagate)(const struct nilfs_bmap *, struct buffer_head *); + int (*bop_propagate)(struct nilfs_bmap *, struct buffer_head *); void (*bop_lookup_dirty_buffers)(struct nilfs_bmap *, struct list_head *); @@ -110,6 +105,7 @@ static inline int nilfs_bmap_is_new_ptr(unsigned long ptr) * @b_last_allocated_ptr: last allocated ptr for data block * @b_ptr_type: pointer type * @b_state: state + * @b_nchildren_per_block: maximum number of child nodes for non-root nodes */ struct nilfs_bmap { union { @@ -123,6 +119,7 @@ struct nilfs_bmap { __u64 b_last_allocated_ptr; int b_ptr_type; int b_state; + __u16 b_nchildren_per_block; }; /* pointer type */ @@ -138,6 +135,12 @@ struct nilfs_bmap { /* state */ #define NILFS_BMAP_DIRTY 0x00000001 +struct nilfs_bmap_store { + __le64 data[NILFS_BMAP_SIZE / sizeof(__le64)]; + __u64 last_allocated_key; + __u64 last_allocated_ptr; + int state; +}; int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); @@ -156,9 +159,9 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *, __u64, int, __u64 *); int nilfs_bmap_mark(struct nilfs_bmap *, __u64, int); void nilfs_bmap_init_gc(struct nilfs_bmap *); -void nilfs_bmap_init_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); -void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); +void nilfs_bmap_save(const struct nilfs_bmap *, struct nilfs_bmap_store *); +void nilfs_bmap_restore(struct nilfs_bmap *, const struct nilfs_bmap_store *); static inline int nilfs_bmap_lookup(struct nilfs_bmap *bmap, __u64 key, __u64 *ptr) @@ -224,6 +227,13 @@ static inline void nilfs_bmap_abort_end_ptr(struct nilfs_bmap *bmap, nilfs_dat_abort_end(dat, &req->bpr_req); } +static inline void nilfs_bmap_set_target_v(struct nilfs_bmap *bmap, __u64 key, + __u64 ptr) +{ + bmap->b_last_allocated_key = key; + bmap->b_last_allocated_ptr = ptr; +} + __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *, const struct buffer_head *); diff --git a/fs/nilfs2/bmap_union.h b/fs/nilfs2/bmap_union.h deleted file mode 100644 index d41509b..0000000 --- a/fs/nilfs2/bmap_union.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * bmap_union.h - NILFS block mapping. - * - * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Koji Sato <koji@osrg.net>. - */ - -#ifndef _NILFS_BMAP_UNION_H -#define _NILFS_BMAP_UNION_H - -#include "bmap.h" -#include "direct.h" -#include "btree.h" - -/** - * nilfs_bmap_union - - * @bi_bmap: bmap structure - * @bi_btree: direct map structure - * @bi_direct: B-tree structure - */ -union nilfs_bmap_union { - struct nilfs_bmap bi_bmap; - struct nilfs_direct bi_direct; - struct nilfs_btree bi_btree; -}; - -#endif /* _NILFS_BMAP_UNION_H */ diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index 447ce47..5115814 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -37,15 +37,7 @@ void nilfs_btnode_cache_init_once(struct address_space *btnc) { - memset(btnc, 0, sizeof(*btnc)); - INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC); - spin_lock_init(&btnc->tree_lock); - INIT_LIST_HEAD(&btnc->private_list); - spin_lock_init(&btnc->private_lock); - - spin_lock_init(&btnc->i_mmap_lock); - INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap); - INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); + nilfs_mapping_init_once(btnc); } static const struct address_space_operations def_btnode_aops = { @@ -55,12 +47,7 @@ static const struct address_space_operations def_btnode_aops = { void nilfs_btnode_cache_init(struct address_space *btnc, struct backing_dev_info *bdi) { - btnc->host = NULL; /* can safely set to host inode ? */ - btnc->flags = 0; - mapping_set_gfp_mask(btnc, GFP_NOFS); - btnc->assoc_mapping = NULL; - btnc->backing_dev_info = bdi; - btnc->a_ops = &def_btnode_aops; + nilfs_mapping_init(btnc, bdi, &def_btnode_aops); } void nilfs_btnode_cache_clear(struct address_space *btnc) @@ -96,10 +83,12 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr) } int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, - sector_t pblocknr, struct buffer_head **pbh) + sector_t pblocknr, int mode, + struct buffer_head **pbh, sector_t *submit_ptr) { struct buffer_head *bh; struct inode *inode = NILFS_BTNC_I(btnc); + struct page *page; int err; bh = nilfs_grab_buffer(inode, btnc, blocknr, 1 << BH_NILFS_Node); @@ -107,6 +96,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, return -ENOMEM; err = -EEXIST; /* internal code */ + page = bh->b_page; if (buffer_uptodate(bh) || buffer_dirty(bh)) goto found; @@ -125,7 +115,16 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, } } } - lock_buffer(bh); + + if (mode == READA) { + if (pblocknr != *submit_ptr + 1 || !trylock_buffer(bh)) { + err = -EBUSY; /* internal code */ + brelse(bh); + goto out_locked; + } + } else { /* mode == READ */ + lock_buffer(bh); + } if (buffer_uptodate(bh)) { unlock_buffer(bh); err = -EEXIST; /* internal code */ @@ -136,15 +135,16 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(READ, bh); + submit_bh(mode, bh); bh->b_blocknr = blocknr; /* set back to the given block address */ + *submit_ptr = pblocknr; err = 0; found: *pbh = bh; out_locked: - unlock_page(bh->b_page); - page_cache_release(bh->b_page); + unlock_page(page); + page_cache_release(page); return err; } diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h index 07da83f..7903749 100644 --- a/fs/nilfs2/btnode.h +++ b/fs/nilfs2/btnode.h @@ -42,8 +42,8 @@ void nilfs_btnode_cache_init(struct address_space *, struct backing_dev_info *); void nilfs_btnode_cache_clear(struct address_space *); struct buffer_head *nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr); -int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, - struct buffer_head **); +int nilfs_btnode_submit_block(struct address_space *, __u64, sector_t, int, + struct buffer_head **, sector_t *); void nilfs_btnode_delete(struct buffer_head *); int nilfs_btnode_prepare_change_key(struct address_space *, struct nilfs_btnode_chkey_ctxt *); diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index b27a342..300c2bc 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -66,30 +66,10 @@ static void nilfs_btree_free_path(struct nilfs_btree_path *path) /* * B-tree node operations */ -static int nilfs_btree_get_block(const struct nilfs_btree *btree, __u64 ptr, - struct buffer_head **bhp) -{ - struct address_space *btnc = - &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; - int err; - - err = nilfs_btnode_submit_block(btnc, ptr, 0, bhp); - if (err) - return err == -EEXIST ? 0 : err; - - wait_on_buffer(*bhp); - if (!buffer_uptodate(*bhp)) { - brelse(*bhp); - return -EIO; - } - return 0; -} - -static int nilfs_btree_get_new_block(const struct nilfs_btree *btree, +static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp) { - struct address_space *btnc = - &NILFS_BMAP_I((struct nilfs_bmap *)btree)->i_btnode_cache; + struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); @@ -101,71 +81,55 @@ static int nilfs_btree_get_new_block(const struct nilfs_btree *btree, return 0; } -static inline int -nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) +static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) { return node->bn_flags; } -static inline void +static void nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) { node->bn_flags = flags; } -static inline int nilfs_btree_node_root(const struct nilfs_btree_node *node) +static int nilfs_btree_node_root(const struct nilfs_btree_node *node) { return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; } -static inline int -nilfs_btree_node_get_level(const struct nilfs_btree_node *node) +static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) { return node->bn_level; } -static inline void +static void nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) { node->bn_level = level; } -static inline int -nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) +static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) { return le16_to_cpu(node->bn_nchildren); } -static inline void +static void nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) { node->bn_nchildren = cpu_to_le16(nchildren); } -static inline int nilfs_btree_node_size(const struct nilfs_btree *btree) +static int nilfs_btree_node_size(const struct nilfs_bmap *btree) { - return 1 << btree->bt_bmap.b_inode->i_blkbits; + return 1 << btree->b_inode->i_blkbits; } -static inline int -nilfs_btree_node_nchildren_min(const struct nilfs_btree_node *node, - const struct nilfs_btree *btree) +static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) { - return nilfs_btree_node_root(node) ? - NILFS_BTREE_ROOT_NCHILDREN_MIN : - NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); + return btree->b_nchildren_per_block; } -static inline int -nilfs_btree_node_nchildren_max(const struct nilfs_btree_node *node, - const struct nilfs_btree *btree) -{ - return nilfs_btree_node_root(node) ? - NILFS_BTREE_ROOT_NCHILDREN_MAX : - NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree)); -} - -static inline __le64 * +static __le64 * nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) { return (__le64 *)((char *)(node + 1) + @@ -173,45 +137,40 @@ nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); } -static inline __le64 * -nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, - const struct nilfs_btree *btree) +static __le64 * +nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) { - return (__le64 *)(nilfs_btree_node_dkeys(node) + - nilfs_btree_node_nchildren_max(node, btree)); + return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); } -static inline __u64 +static __u64 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) { - return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(node) + index)); + return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); } -static inline void +static void nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) { - *(nilfs_btree_node_dkeys(node) + index) = nilfs_bmap_key_to_dkey(key); + *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); } -static inline __u64 -nilfs_btree_node_get_ptr(const struct nilfs_btree *btree, - const struct nilfs_btree_node *node, int index) +static __u64 +nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, + int ncmax) { - return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(node, btree) + - index)); + return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); } -static inline void -nilfs_btree_node_set_ptr(struct nilfs_btree *btree, - struct nilfs_btree_node *node, int index, __u64 ptr) +static void +nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, + int ncmax) { - *(nilfs_btree_node_dptrs(node, btree) + index) = - nilfs_bmap_ptr_to_dptr(ptr); + *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); } -static void nilfs_btree_node_init(struct nilfs_btree *btree, - struct nilfs_btree_node *node, - int flags, int level, int nchildren, +static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, + int level, int nchildren, int ncmax, const __u64 *keys, const __u64 *ptrs) { __le64 *dkeys; @@ -223,29 +182,28 @@ static void nilfs_btree_node_init(struct nilfs_btree *btree, nilfs_btree_node_set_nchildren(node, nchildren); dkeys = nilfs_btree_node_dkeys(node); - dptrs = nilfs_btree_node_dptrs(node, btree); + dptrs = nilfs_btree_node_dptrs(node, ncmax); for (i = 0; i < nchildren; i++) { - dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]); - dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]); + dkeys[i] = cpu_to_le64(keys[i]); + dptrs[i] = cpu_to_le64(ptrs[i]); } } /* Assume the buffer heads corresponding to left and right are locked. */ -static void nilfs_btree_node_move_left(struct nilfs_btree *btree, - struct nilfs_btree_node *left, +static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, struct nilfs_btree_node *right, - int n) + int n, int lncmax, int rncmax) { __le64 *ldkeys, *rdkeys; __le64 *ldptrs, *rdptrs; int lnchildren, rnchildren; ldkeys = nilfs_btree_node_dkeys(left); - ldptrs = nilfs_btree_node_dptrs(left, btree); + ldptrs = nilfs_btree_node_dptrs(left, lncmax); lnchildren = nilfs_btree_node_get_nchildren(left); rdkeys = nilfs_btree_node_dkeys(right); - rdptrs = nilfs_btree_node_dptrs(right, btree); + rdptrs = nilfs_btree_node_dptrs(right, rncmax); rnchildren = nilfs_btree_node_get_nchildren(right); memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); @@ -260,21 +218,20 @@ static void nilfs_btree_node_move_left(struct nilfs_btree *btree, } /* Assume that the buffer heads corresponding to left and right are locked. */ -static void nilfs_btree_node_move_right(struct nilfs_btree *btree, - struct nilfs_btree_node *left, +static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, struct nilfs_btree_node *right, - int n) + int n, int lncmax, int rncmax) { __le64 *ldkeys, *rdkeys; __le64 *ldptrs, *rdptrs; int lnchildren, rnchildren; ldkeys = nilfs_btree_node_dkeys(left); - ldptrs = nilfs_btree_node_dptrs(left, btree); + ldptrs = nilfs_btree_node_dptrs(left, lncmax); lnchildren = nilfs_btree_node_get_nchildren(left); rdkeys = nilfs_btree_node_dkeys(right); - rdptrs = nilfs_btree_node_dptrs(right, btree); + rdptrs = nilfs_btree_node_dptrs(right, rncmax); rnchildren = nilfs_btree_node_get_nchildren(right); memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); @@ -289,16 +246,15 @@ static void nilfs_btree_node_move_right(struct nilfs_btree *btree, } /* Assume that the buffer head corresponding to node is locked. */ -static void nilfs_btree_node_insert(struct nilfs_btree *btree, - struct nilfs_btree_node *node, - __u64 key, __u64 ptr, int index) +static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, + __u64 key, __u64 ptr, int ncmax) { __le64 *dkeys; __le64 *dptrs; int nchildren; dkeys = nilfs_btree_node_dkeys(node); - dptrs = nilfs_btree_node_dptrs(node, btree); + dptrs = nilfs_btree_node_dptrs(node, ncmax); nchildren = nilfs_btree_node_get_nchildren(node); if (index < nchildren) { memmove(dkeys + index + 1, dkeys + index, @@ -306,16 +262,15 @@ static void nilfs_btree_node_insert(struct nilfs_btree *btree, memmove(dptrs + index + 1, dptrs + index, (nchildren - index) * sizeof(*dptrs)); } - dkeys[index] = nilfs_bmap_key_to_dkey(key); - dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr); + dkeys[index] = cpu_to_le64(key); + dptrs[index] = cpu_to_le64(ptr); nchildren++; nilfs_btree_node_set_nchildren(node, nchildren); } /* Assume that the buffer head corresponding to node is locked. */ -static void nilfs_btree_node_delete(struct nilfs_btree *btree, - struct nilfs_btree_node *node, - __u64 *keyp, __u64 *ptrp, int index) +static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, + __u64 *keyp, __u64 *ptrp, int ncmax) { __u64 key; __u64 ptr; @@ -324,9 +279,9 @@ static void nilfs_btree_node_delete(struct nilfs_btree *btree, int nchildren; dkeys = nilfs_btree_node_dkeys(node); - dptrs = nilfs_btree_node_dptrs(node, btree); - key = nilfs_bmap_dkey_to_key(dkeys[index]); - ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]); + dptrs = nilfs_btree_node_dptrs(node, ncmax); + key = le64_to_cpu(dkeys[index]); + ptr = le64_to_cpu(dptrs[index]); nchildren = nilfs_btree_node_get_nchildren(node); if (keyp != NULL) *keyp = key; @@ -382,40 +337,92 @@ static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, return s == 0; } -static inline struct nilfs_btree_node * -nilfs_btree_get_root(const struct nilfs_btree *btree) +/** + * nilfs_btree_node_broken - verify consistency of btree node + * @node: btree node block to be examined + * @size: node size (in bytes) + * @blocknr: block number + * + * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. + */ +static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, + size_t size, sector_t blocknr) { - return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data; + int level, flags, nchildren; + int ret = 0; + + level = nilfs_btree_node_get_level(node); + flags = nilfs_btree_node_get_flags(node); + nchildren = nilfs_btree_node_get_nchildren(node); + + if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || + level >= NILFS_BTREE_LEVEL_MAX || + (flags & NILFS_BTREE_NODE_ROOT) || + nchildren < 0 || + nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { + printk(KERN_CRIT "NILFS: bad btree node (blocknr=%llu): " + "level = %d, flags = 0x%x, nchildren = %d\n", + (unsigned long long)blocknr, level, flags, nchildren); + ret = 1; + } + return ret; } -static inline struct nilfs_btree_node * +int nilfs_btree_broken_node_block(struct buffer_head *bh) +{ + int ret; + + if (buffer_nilfs_checked(bh)) + return 0; + + ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, + bh->b_size, bh->b_blocknr); + if (likely(!ret)) + set_buffer_nilfs_checked(bh); + return ret; +} + +static struct nilfs_btree_node * +nilfs_btree_get_root(const struct nilfs_bmap *btree) +{ + return (struct nilfs_btree_node *)btree->b_u.u_data; +} + +static struct nilfs_btree_node * nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) { return (struct nilfs_btree_node *)path[level].bp_bh->b_data; } -static inline struct nilfs_btree_node * +static struct nilfs_btree_node * nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) { return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; } -static inline int nilfs_btree_height(const struct nilfs_btree *btree) +static int nilfs_btree_height(const struct nilfs_bmap *btree) { return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; } -static inline struct nilfs_btree_node * -nilfs_btree_get_node(const struct nilfs_btree *btree, +static struct nilfs_btree_node * +nilfs_btree_get_node(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, - int level) + int level, int *ncmaxp) { - return (level == nilfs_btree_height(btree) - 1) ? - nilfs_btree_get_root(btree) : - nilfs_btree_get_nonroot_node(path, level); + struct nilfs_btree_node *node; + + if (level == nilfs_btree_height(btree) - 1) { + node = nilfs_btree_get_root(btree); + *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; + } else { + node = nilfs_btree_get_nonroot_node(path, level); + *ncmaxp = nilfs_btree_nchildren_per_block(btree); + } + return node; } -static inline int +static int nilfs_btree_bad_node(struct nilfs_btree_node *node, int level) { if (unlikely(nilfs_btree_node_get_level(node) != level)) { @@ -427,13 +434,83 @@ nilfs_btree_bad_node(struct nilfs_btree_node *node, int level) return 0; } -static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, +struct nilfs_btree_readahead_info { + struct nilfs_btree_node *node; /* parent node */ + int max_ra_blocks; /* max nof blocks to read ahead */ + int index; /* current index on the parent node */ + int ncmax; /* nof children in the parent node */ +}; + +static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, + struct buffer_head **bhp, + const struct nilfs_btree_readahead_info *ra) +{ + struct address_space *btnc = &NILFS_BMAP_I(btree)->i_btnode_cache; + struct buffer_head *bh, *ra_bh; + sector_t submit_ptr = 0; + int ret; + + ret = nilfs_btnode_submit_block(btnc, ptr, 0, READ, &bh, &submit_ptr); + if (ret) { + if (ret != -EEXIST) + return ret; + goto out_check; + } + + if (ra) { + int i, n; + __u64 ptr2; + + /* read ahead sibling nodes */ + for (n = ra->max_ra_blocks, i = ra->index + 1; + n > 0 && i < ra->ncmax; n--, i++) { + ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); + + ret = nilfs_btnode_submit_block(btnc, ptr2, 0, READA, + &ra_bh, &submit_ptr); + if (likely(!ret || ret == -EEXIST)) + brelse(ra_bh); + else if (ret != -EBUSY) + break; + if (!buffer_locked(bh)) + goto out_no_wait; + } + } + + wait_on_buffer(bh); + + out_no_wait: + if (!buffer_uptodate(bh)) { + brelse(bh); + return -EIO; + } + + out_check: + if (nilfs_btree_broken_node_block(bh)) { + clear_buffer_uptodate(bh); + brelse(bh); + return -EINVAL; + } + + *bhp = bh; + return 0; +} + +static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, + struct buffer_head **bhp) +{ + return __nilfs_btree_get_block(btree, ptr, bhp, NULL); +} + +static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, struct nilfs_btree_path *path, - __u64 key, __u64 *ptrp, int minlevel) + __u64 key, __u64 *ptrp, int minlevel, + int readahead) { struct nilfs_btree_node *node; + struct nilfs_btree_readahead_info p, *ra; __u64 ptr; - int level, index, found, ret; + int level, index, found, ncmax, ret; node = nilfs_btree_get_root(btree); level = nilfs_btree_node_get_level(node); @@ -441,14 +518,27 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, return -ENOENT; found = nilfs_btree_node_lookup(node, key, &index); - ptr = nilfs_btree_node_get_ptr(btree, node, index); + ptr = nilfs_btree_node_get_ptr(node, index, + NILFS_BTREE_ROOT_NCHILDREN_MAX); path[level].bp_bh = NULL; path[level].bp_index = index; - for (level--; level >= minlevel; level--) { - ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); + ncmax = nilfs_btree_nchildren_per_block(btree); + + while (--level >= minlevel) { + ra = NULL; + if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { + p.node = nilfs_btree_get_node(btree, path, level + 1, + &p.ncmax); + p.index = index; + p.max_ra_blocks = 7; + ra = &p; + } + ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, + ra); if (ret < 0) return ret; + node = nilfs_btree_get_nonroot_node(path, level); if (nilfs_btree_bad_node(node, level)) return -EINVAL; @@ -456,9 +546,9 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, found = nilfs_btree_node_lookup(node, key, &index); else index = 0; - if (index < nilfs_btree_node_nchildren_max(node, btree)) - ptr = nilfs_btree_node_get_ptr(btree, node, index); - else { + if (index < ncmax) { + ptr = nilfs_btree_node_get_ptr(node, index, ncmax); + } else { WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); /* insert */ ptr = NILFS_BMAP_INVALID_PTR; @@ -474,22 +564,24 @@ static int nilfs_btree_do_lookup(const struct nilfs_btree *btree, return 0; } -static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, +static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, struct nilfs_btree_path *path, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; __u64 ptr; - int index, level, ret; + int index, level, ncmax, ret; node = nilfs_btree_get_root(btree); index = nilfs_btree_node_get_nchildren(node) - 1; if (index < 0) return -ENOENT; level = nilfs_btree_node_get_level(node); - ptr = nilfs_btree_node_get_ptr(btree, node, index); + ptr = nilfs_btree_node_get_ptr(node, index, + NILFS_BTREE_ROOT_NCHILDREN_MAX); path[level].bp_bh = NULL; path[level].bp_index = index; + ncmax = nilfs_btree_nchildren_per_block(btree); for (level--; level > 0; level--) { ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); @@ -499,7 +591,7 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, if (nilfs_btree_bad_node(node, level)) return -EINVAL; index = nilfs_btree_node_get_nchildren(node) - 1; - ptr = nilfs_btree_node_get_ptr(btree, node, index); + ptr = nilfs_btree_node_get_ptr(node, index, ncmax); path[level].bp_index = index; } @@ -511,51 +603,45 @@ static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree, return 0; } -static int nilfs_btree_lookup(const struct nilfs_bmap *bmap, +static int nilfs_btree_lookup(const struct nilfs_bmap *btree, __u64 key, int level, __u64 *ptrp) { - struct nilfs_btree *btree; struct nilfs_btree_path *path; - __u64 ptr; int ret; - btree = (struct nilfs_btree *)bmap; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; - ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); - - if (ptrp != NULL) - *ptrp = ptr; + ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); nilfs_btree_free_path(path); return ret; } -static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, +static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, __u64 key, __u64 *ptrp, unsigned maxblocks) { - struct nilfs_btree *btree = (struct nilfs_btree *)bmap; struct nilfs_btree_path *path; struct nilfs_btree_node *node; struct inode *dat = NULL; __u64 ptr, ptr2; sector_t blocknr; int level = NILFS_BTREE_LEVEL_NODE_MIN; - int ret, cnt, index, maxlevel; + int ret, cnt, index, maxlevel, ncmax; + struct nilfs_btree_readahead_info p; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; - ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level); + ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); if (ret < 0) goto out; - if (NILFS_BMAP_USE_VBN(bmap)) { - dat = nilfs_bmap_get_dat(bmap); + if (NILFS_BMAP_USE_VBN(btree)) { + dat = nilfs_bmap_get_dat(btree); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) goto out; @@ -566,14 +652,14 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, goto end; maxlevel = nilfs_btree_height(btree) - 1; - node = nilfs_btree_get_node(btree, path, level); + node = nilfs_btree_get_node(btree, path, level, &ncmax); index = path[level].bp_index + 1; for (;;) { while (index < nilfs_btree_node_get_nchildren(node)) { if (nilfs_btree_node_get_key(node, index) != key + cnt) goto end; - ptr2 = nilfs_btree_node_get_ptr(btree, node, index); + ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) @@ -589,20 +675,24 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, break; /* look-up right sibling node */ - node = nilfs_btree_get_node(btree, path, level + 1); - index = path[level + 1].bp_index + 1; - if (index >= nilfs_btree_node_get_nchildren(node) || - nilfs_btree_node_get_key(node, index) != key + cnt) + p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); + p.index = path[level + 1].bp_index + 1; + p.max_ra_blocks = 7; + if (p.index >= nilfs_btree_node_get_nchildren(p.node) || + nilfs_btree_node_get_key(p.node, p.index) != key + cnt) break; - ptr2 = nilfs_btree_node_get_ptr(btree, node, index); - path[level + 1].bp_index = index; + ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); + path[level + 1].bp_index = p.index; brelse(path[level].bp_bh); path[level].bp_bh = NULL; - ret = nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh); + + ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, + &p); if (ret < 0) goto out; node = nilfs_btree_get_nonroot_node(path, level); + ncmax = nilfs_btree_nchildren_per_block(btree); index = 0; path[level].bp_index = index; } @@ -614,7 +704,7 @@ static int nilfs_btree_lookup_contig(const struct nilfs_bmap *bmap, return ret; } -static void nilfs_btree_promote_key(struct nilfs_btree *btree, +static void nilfs_btree_promote_key(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 key) { @@ -636,16 +726,18 @@ static void nilfs_btree_promote_key(struct nilfs_btree *btree, } } -static void nilfs_btree_do_insert(struct nilfs_btree *btree, +static void nilfs_btree_do_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; + int ncblk; if (level < nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_nonroot_node(path, level); - nilfs_btree_node_insert(btree, node, *keyp, *ptrp, - path[level].bp_index); + ncblk = nilfs_btree_nchildren_per_block(btree); + nilfs_btree_node_insert(node, path[level].bp_index, + *keyp, *ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); @@ -655,22 +747,24 @@ static void nilfs_btree_do_insert(struct nilfs_btree *btree, 0)); } else { node = nilfs_btree_get_root(btree); - nilfs_btree_node_insert(btree, node, *keyp, *ptrp, - path[level].bp_index); + nilfs_btree_node_insert(node, path[level].bp_index, + *keyp, *ptrp, + NILFS_BTREE_ROOT_NCHILDREN_MAX); } } -static void nilfs_btree_carry_left(struct nilfs_btree *btree, +static void nilfs_btree_carry_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; - int nchildren, lnchildren, n, move; + int nchildren, lnchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); lnchildren = nilfs_btree_node_get_nchildren(left); + ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + lnchildren + 1) / 2 - lnchildren; @@ -680,7 +774,7 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, move = 1; } - nilfs_btree_node_move_left(btree, left, node, n); + nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); @@ -705,17 +799,18 @@ static void nilfs_btree_carry_left(struct nilfs_btree *btree, nilfs_btree_do_insert(btree, path, level, keyp, ptrp); } -static void nilfs_btree_carry_right(struct nilfs_btree *btree, +static void nilfs_btree_carry_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; - int nchildren, rnchildren, n, move; + int nchildren, rnchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); rnchildren = nilfs_btree_node_get_nchildren(right); + ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + rnchildren + 1) / 2 - rnchildren; @@ -725,7 +820,7 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree, move = 1; } - nilfs_btree_node_move_right(btree, node, right, n); + nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); @@ -751,18 +846,19 @@ static void nilfs_btree_carry_right(struct nilfs_btree *btree, nilfs_btree_do_insert(btree, path, level, keyp, ptrp); } -static void nilfs_btree_split(struct nilfs_btree *btree, +static void nilfs_btree_split(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; __u64 newkey; __u64 newptr; - int nchildren, n, move; + int nchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); + ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + 1) / 2; @@ -771,7 +867,7 @@ static void nilfs_btree_split(struct nilfs_btree *btree, move = 1; } - nilfs_btree_node_move_right(btree, node, right, n); + nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); @@ -783,8 +879,8 @@ static void nilfs_btree_split(struct nilfs_btree *btree, if (move) { path[level].bp_index -= nilfs_btree_node_get_nchildren(node); - nilfs_btree_node_insert(btree, right, *keyp, *ptrp, - path[level].bp_index); + nilfs_btree_node_insert(right, path[level].bp_index, + *keyp, *ptrp, ncblk); *keyp = nilfs_btree_node_get_key(right, 0); *ptrp = path[level].bp_newreq.bpr_ptr; @@ -805,19 +901,21 @@ static void nilfs_btree_split(struct nilfs_btree *btree, path[level + 1].bp_index++; } -static void nilfs_btree_grow(struct nilfs_btree *btree, +static void nilfs_btree_grow(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *root, *child; - int n; + int n, ncblk; root = nilfs_btree_get_root(btree); child = nilfs_btree_get_sib_node(path, level); + ncblk = nilfs_btree_nchildren_per_block(btree); n = nilfs_btree_node_get_nchildren(root); - nilfs_btree_node_move_right(btree, root, child, n); + nilfs_btree_node_move_right(root, child, n, + NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); nilfs_btree_node_set_level(root, level + 1); if (!buffer_dirty(path[level].bp_sib_bh)) @@ -832,11 +930,11 @@ static void nilfs_btree_grow(struct nilfs_btree *btree, *ptrp = path[level].bp_newreq.bpr_ptr; } -static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree, +static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path) { struct nilfs_btree_node *node; - int level; + int level, ncmax; if (path == NULL) return NILFS_BMAP_INVALID_PTR; @@ -844,29 +942,30 @@ static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree, /* left sibling */ level = NILFS_BTREE_LEVEL_NODE_MIN; if (path[level].bp_index > 0) { - node = nilfs_btree_get_node(btree, path, level); - return nilfs_btree_node_get_ptr(btree, node, - path[level].bp_index - 1); + node = nilfs_btree_get_node(btree, path, level, &ncmax); + return nilfs_btree_node_get_ptr(node, + path[level].bp_index - 1, + ncmax); } /* parent */ level = NILFS_BTREE_LEVEL_NODE_MIN + 1; if (level <= nilfs_btree_height(btree) - 1) { - node = nilfs_btree_get_node(btree, path, level); - return nilfs_btree_node_get_ptr(btree, node, - path[level].bp_index); + node = nilfs_btree_get_node(btree, path, level, &ncmax); + return nilfs_btree_node_get_ptr(node, path[level].bp_index, + ncmax); } return NILFS_BMAP_INVALID_PTR; } -static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree, +static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, __u64 key) { __u64 ptr; - ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key); + ptr = nilfs_bmap_find_target_seq(btree, key); if (ptr != NILFS_BMAP_INVALID_PTR) /* sequential access */ return ptr; @@ -877,17 +976,10 @@ static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree, return ptr; } /* block group */ - return nilfs_bmap_find_target_in_group(&btree->bt_bmap); -} - -static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key, - __u64 ptr) -{ - btree->bt_bmap.b_last_allocated_key = key; - btree->bt_bmap.b_last_allocated_ptr = ptr; + return nilfs_bmap_find_target_in_group(btree); } -static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, +static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int *levelp, __u64 key, __u64 ptr, struct nilfs_bmap_stats *stats) @@ -895,79 +987,78 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, struct buffer_head *bh; struct nilfs_btree_node *node, *parent, *sib; __u64 sibptr; - int pindex, level, ret; + int pindex, level, ncmax, ncblk, ret; struct inode *dat = NULL; stats->bs_nblocks = 0; level = NILFS_BTREE_LEVEL_DATA; /* allocate a new ptr for data block */ - if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) { + if (NILFS_BMAP_USE_VBN(btree)) { path[level].bp_newreq.bpr_ptr = nilfs_btree_find_target_v(btree, path, key); - dat = nilfs_bmap_get_dat(&btree->bt_bmap); + dat = nilfs_bmap_get_dat(btree); } - ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, - &path[level].bp_newreq, dat); + ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_data; + ncblk = nilfs_btree_nchildren_per_block(btree); + for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < nilfs_btree_height(btree) - 1; level++) { node = nilfs_btree_get_nonroot_node(path, level); - if (nilfs_btree_node_get_nchildren(node) < - nilfs_btree_node_nchildren_max(node, btree)) { + if (nilfs_btree_node_get_nchildren(node) < ncblk) { path[level].bp_op = nilfs_btree_do_insert; stats->bs_nblocks++; goto out; } - parent = nilfs_btree_get_node(btree, path, level + 1); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); pindex = path[level + 1].bp_index; /* left sibling */ if (pindex > 0) { - sibptr = nilfs_btree_node_get_ptr(btree, parent, - pindex - 1); + sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, + ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; - if (nilfs_btree_node_get_nchildren(sib) < - nilfs_btree_node_nchildren_max(sib, btree)) { + if (nilfs_btree_node_get_nchildren(sib) < ncblk) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_carry_left; stats->bs_nblocks++; goto out; - } else + } else { brelse(bh); + } } /* right sibling */ - if (pindex < - nilfs_btree_node_get_nchildren(parent) - 1) { - sibptr = nilfs_btree_node_get_ptr(btree, parent, - pindex + 1); + if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { + sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, + ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; - if (nilfs_btree_node_get_nchildren(sib) < - nilfs_btree_node_nchildren_max(sib, btree)) { + if (nilfs_btree_node_get_nchildren(sib) < ncblk) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_carry_right; stats->bs_nblocks++; goto out; - } else + } else { brelse(bh); + } } /* split */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; - ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, + ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_child_node; @@ -979,9 +1070,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, stats->bs_nblocks++; - nilfs_btree_node_init(btree, - (struct nilfs_btree_node *)bh->b_data, - 0, level, 0, NULL, NULL); + sib = (struct nilfs_btree_node *)bh->b_data; + nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_split; } @@ -989,7 +1079,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, /* root */ node = nilfs_btree_get_root(btree); if (nilfs_btree_node_get_nchildren(node) < - nilfs_btree_node_nchildren_max(node, btree)) { + NILFS_BTREE_ROOT_NCHILDREN_MAX) { path[level].bp_op = nilfs_btree_do_insert; stats->bs_nblocks++; goto out; @@ -997,8 +1087,7 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, /* grow */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; - ret = nilfs_bmap_prepare_alloc_ptr(&btree->bt_bmap, - &path[level].bp_newreq, dat); + ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_child_node; ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, @@ -1006,8 +1095,8 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, if (ret < 0) goto err_out_curr_node; - nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data, - 0, level, 0, NULL, NULL); + nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, + 0, level, 0, ncblk, NULL, NULL); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_grow; @@ -1024,25 +1113,22 @@ static int nilfs_btree_prepare_insert(struct nilfs_btree *btree, /* error */ err_out_curr_node: - nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq, - dat); + nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); err_out_child_node: for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { nilfs_btnode_delete(path[level].bp_sib_bh); - nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, - &path[level].bp_newreq, dat); + nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); } - nilfs_bmap_abort_alloc_ptr(&btree->bt_bmap, &path[level].bp_newreq, - dat); + nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); err_out_data: *levelp = level; stats->bs_nblocks = 0; return ret; } -static void nilfs_btree_commit_insert(struct nilfs_btree *btree, +static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int maxlevel, __u64 key, __u64 ptr) { @@ -1051,35 +1137,33 @@ static void nilfs_btree_commit_insert(struct nilfs_btree *btree, set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; - if (NILFS_BMAP_USE_VBN(&btree->bt_bmap)) { - nilfs_btree_set_target_v(btree, key, ptr); - dat = nilfs_bmap_get_dat(&btree->bt_bmap); + if (NILFS_BMAP_USE_VBN(btree)) { + nilfs_bmap_set_target_v(btree, key, ptr); + dat = nilfs_bmap_get_dat(btree); } for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { - nilfs_bmap_commit_alloc_ptr(&btree->bt_bmap, + nilfs_bmap_commit_alloc_ptr(btree, &path[level - 1].bp_newreq, dat); path[level].bp_op(btree, path, level, &key, &ptr); } - if (!nilfs_bmap_dirty(&btree->bt_bmap)) - nilfs_bmap_set_dirty(&btree->bt_bmap); + if (!nilfs_bmap_dirty(btree)) + nilfs_bmap_set_dirty(btree); } -static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) +static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) { - struct nilfs_btree *btree; struct nilfs_btree_path *path; struct nilfs_bmap_stats stats; int level, ret; - btree = (struct nilfs_btree *)bmap; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, NULL, - NILFS_BTREE_LEVEL_NODE_MIN); + NILFS_BTREE_LEVEL_NODE_MIN, 0); if (ret != -ENOENT) { if (ret == 0) ret = -EEXIST; @@ -1090,23 +1174,25 @@ static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) if (ret < 0) goto out; nilfs_btree_commit_insert(btree, path, level, key, ptr); - nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); + nilfs_bmap_add_blocks(btree, stats.bs_nblocks); out: nilfs_btree_free_path(path); return ret; } -static void nilfs_btree_do_delete(struct nilfs_btree *btree, +static void nilfs_btree_do_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; + int ncblk; if (level < nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_nonroot_node(path, level); - nilfs_btree_node_delete(btree, node, keyp, ptrp, - path[level].bp_index); + ncblk = nilfs_btree_nchildren_per_block(btree); + nilfs_btree_node_delete(node, path[level].bp_index, + keyp, ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); if (path[level].bp_index == 0) @@ -1114,17 +1200,18 @@ static void nilfs_btree_do_delete(struct nilfs_btree *btree, nilfs_btree_node_get_key(node, 0)); } else { node = nilfs_btree_get_root(btree); - nilfs_btree_node_delete(btree, node, keyp, ptrp, - path[level].bp_index); + nilfs_btree_node_delete(node, path[level].bp_index, + keyp, ptrp, + NILFS_BTREE_ROOT_NCHILDREN_MAX); } } -static void nilfs_btree_borrow_left(struct nilfs_btree *btree, +static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; - int nchildren, lnchildren, n; + int nchildren, lnchildren, n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); @@ -1132,10 +1219,11 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree, left = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); lnchildren = nilfs_btree_node_get_nchildren(left); + ncblk = nilfs_btree_nchildren_per_block(btree); n = (nchildren + lnchildren) / 2 - nchildren; - nilfs_btree_node_move_right(btree, left, node, n); + nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); @@ -1150,12 +1238,12 @@ static void nilfs_btree_borrow_left(struct nilfs_btree *btree, path[level].bp_index += n; } -static void nilfs_btree_borrow_right(struct nilfs_btree *btree, +static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; - int nchildren, rnchildren, n; + int nchildren, rnchildren, n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); @@ -1163,10 +1251,11 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree, right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); rnchildren = nilfs_btree_node_get_nchildren(right); + ncblk = nilfs_btree_nchildren_per_block(btree); n = (nchildren + rnchildren) / 2 - nchildren; - nilfs_btree_node_move_left(btree, node, right, n); + nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); @@ -1182,21 +1271,22 @@ static void nilfs_btree_borrow_right(struct nilfs_btree *btree, path[level].bp_sib_bh = NULL; } -static void nilfs_btree_concat_left(struct nilfs_btree *btree, +static void nilfs_btree_concat_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; - int n; + int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); + ncblk = nilfs_btree_nchildren_per_block(btree); n = nilfs_btree_node_get_nchildren(node); - nilfs_btree_node_move_left(btree, left, node, n); + nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_sib_bh)) nilfs_btnode_mark_dirty(path[level].bp_sib_bh); @@ -1207,21 +1297,22 @@ static void nilfs_btree_concat_left(struct nilfs_btree *btree, path[level].bp_index += nilfs_btree_node_get_nchildren(left); } -static void nilfs_btree_concat_right(struct nilfs_btree *btree, +static void nilfs_btree_concat_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; - int n; + int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); + ncblk = nilfs_btree_nchildren_per_block(btree); n = nilfs_btree_node_get_nchildren(right); - nilfs_btree_node_move_left(btree, node, right, n); + nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) nilfs_btnode_mark_dirty(path[level].bp_bh); @@ -1231,29 +1322,32 @@ static void nilfs_btree_concat_right(struct nilfs_btree *btree, path[level + 1].bp_index++; } -static void nilfs_btree_shrink(struct nilfs_btree *btree, +static void nilfs_btree_shrink(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *root, *child; - int n; + int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); root = nilfs_btree_get_root(btree); child = nilfs_btree_get_nonroot_node(path, level); + ncblk = nilfs_btree_nchildren_per_block(btree); - nilfs_btree_node_delete(btree, root, NULL, NULL, 0); + nilfs_btree_node_delete(root, 0, NULL, NULL, + NILFS_BTREE_ROOT_NCHILDREN_MAX); nilfs_btree_node_set_level(root, level); n = nilfs_btree_node_get_nchildren(child); - nilfs_btree_node_move_left(btree, root, child, n); + nilfs_btree_node_move_left(root, child, n, + NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = NULL; } -static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, +static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int *levelp, struct nilfs_bmap_stats *stats, @@ -1262,42 +1356,43 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, struct buffer_head *bh; struct nilfs_btree_node *node, *parent, *sib; __u64 sibptr; - int pindex, level, ret; + int pindex, level, ncmin, ncmax, ncblk, ret; ret = 0; stats->bs_nblocks = 0; + ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); + ncblk = nilfs_btree_nchildren_per_block(btree); + for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < nilfs_btree_height(btree) - 1; level++) { node = nilfs_btree_get_nonroot_node(path, level); path[level].bp_oldreq.bpr_ptr = - nilfs_btree_node_get_ptr(btree, node, - path[level].bp_index); - ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, + nilfs_btree_node_get_ptr(node, path[level].bp_index, + ncblk); + ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); if (ret < 0) goto err_out_child_node; - if (nilfs_btree_node_get_nchildren(node) > - nilfs_btree_node_nchildren_min(node, btree)) { + if (nilfs_btree_node_get_nchildren(node) > ncmin) { path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; goto out; } - parent = nilfs_btree_get_node(btree, path, level + 1); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); pindex = path[level + 1].bp_index; if (pindex > 0) { /* left sibling */ - sibptr = nilfs_btree_node_get_ptr(btree, parent, - pindex - 1); + sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, + ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; - if (nilfs_btree_node_get_nchildren(sib) > - nilfs_btree_node_nchildren_min(sib, btree)) { + if (nilfs_btree_node_get_nchildren(sib) > ncmin) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_borrow_left; stats->bs_nblocks++; @@ -1311,14 +1406,13 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, } else if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { /* right sibling */ - sibptr = nilfs_btree_node_get_ptr(btree, parent, - pindex + 1); + sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, + ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; - if (nilfs_btree_node_get_nchildren(sib) > - nilfs_btree_node_nchildren_min(sib, btree)) { + if (nilfs_btree_node_get_nchildren(sib) > ncmin) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_borrow_right; stats->bs_nblocks++; @@ -1349,10 +1443,10 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, node = nilfs_btree_get_root(btree); path[level].bp_oldreq.bpr_ptr = - nilfs_btree_node_get_ptr(btree, node, path[level].bp_index); + nilfs_btree_node_get_ptr(node, path[level].bp_index, + NILFS_BTREE_ROOT_NCHILDREN_MAX); - ret = nilfs_bmap_prepare_end_ptr(&btree->bt_bmap, - &path[level].bp_oldreq, dat); + ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); if (ret < 0) goto err_out_child_node; @@ -1367,75 +1461,68 @@ static int nilfs_btree_prepare_delete(struct nilfs_btree *btree, /* error */ err_out_curr_node: - nilfs_bmap_abort_end_ptr(&btree->bt_bmap, &path[level].bp_oldreq, dat); + nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); err_out_child_node: for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { brelse(path[level].bp_sib_bh); - nilfs_bmap_abort_end_ptr(&btree->bt_bmap, - &path[level].bp_oldreq, dat); + nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); } *levelp = level; stats->bs_nblocks = 0; return ret; } -static void nilfs_btree_commit_delete(struct nilfs_btree *btree, +static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int maxlevel, struct inode *dat) { int level; for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { - nilfs_bmap_commit_end_ptr(&btree->bt_bmap, - &path[level].bp_oldreq, dat); + nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); path[level].bp_op(btree, path, level, NULL, NULL); } - if (!nilfs_bmap_dirty(&btree->bt_bmap)) - nilfs_bmap_set_dirty(&btree->bt_bmap); + if (!nilfs_bmap_dirty(btree)) + nilfs_bmap_set_dirty(btree); } -static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key) +static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) { - struct nilfs_btree *btree; struct nilfs_btree_path *path; struct nilfs_bmap_stats stats; struct inode *dat; int level, ret; - btree = (struct nilfs_btree *)bmap; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, NULL, - NILFS_BTREE_LEVEL_NODE_MIN); + NILFS_BTREE_LEVEL_NODE_MIN, 0); if (ret < 0) goto out; - dat = NILFS_BMAP_USE_VBN(&btree->bt_bmap) ? - nilfs_bmap_get_dat(&btree->bt_bmap) : NULL; + dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); if (ret < 0) goto out; nilfs_btree_commit_delete(btree, path, level, dat); - nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks); + nilfs_bmap_sub_blocks(btree, stats.bs_nblocks); out: nilfs_btree_free_path(path); return ret; } -static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) +static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) { - struct nilfs_btree *btree; struct nilfs_btree_path *path; int ret; - btree = (struct nilfs_btree *)bmap; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; @@ -1447,16 +1534,14 @@ static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) return ret; } -static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) +static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) { struct buffer_head *bh; - struct nilfs_btree *btree; struct nilfs_btree_node *root, *node; __u64 maxkey, nextmaxkey; __u64 ptr; int nchildren, ret; - btree = (struct nilfs_btree *)bmap; root = nilfs_btree_get_root(btree); switch (nilfs_btree_height(btree)) { case 2: @@ -1467,7 +1552,8 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) nchildren = nilfs_btree_node_get_nchildren(root); if (nchildren > 1) return 0; - ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); + ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, + NILFS_BTREE_ROOT_NCHILDREN_MAX); ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; @@ -1487,32 +1573,33 @@ static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key) return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); } -static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, +static int nilfs_btree_gather_data(struct nilfs_bmap *btree, __u64 *keys, __u64 *ptrs, int nitems) { struct buffer_head *bh; - struct nilfs_btree *btree; struct nilfs_btree_node *node, *root; __le64 *dkeys; __le64 *dptrs; __u64 ptr; - int nchildren, i, ret; + int nchildren, ncmax, i, ret; - btree = (struct nilfs_btree *)bmap; root = nilfs_btree_get_root(btree); switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; + ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; break; case 3: nchildren = nilfs_btree_node_get_nchildren(root); WARN_ON(nchildren > 1); - ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1); + ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, + NILFS_BTREE_ROOT_NCHILDREN_MAX); ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; + ncmax = nilfs_btree_nchildren_per_block(btree); break; default: node = NULL; @@ -1523,10 +1610,10 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, if (nchildren < nitems) nitems = nchildren; dkeys = nilfs_btree_node_dkeys(node); - dptrs = nilfs_btree_node_dptrs(node, btree); + dptrs = nilfs_btree_node_dptrs(node, ncmax); for (i = 0; i < nitems; i++) { - keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]); - ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]); + keys[i] = le64_to_cpu(dkeys[i]); + ptrs[i] = le64_to_cpu(dptrs[i]); } if (bh != NULL) @@ -1536,14 +1623,13 @@ static int nilfs_btree_gather_data(struct nilfs_bmap *bmap, } static int -nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, +nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, union nilfs_bmap_ptr_req *dreq, union nilfs_bmap_ptr_req *nreq, struct buffer_head **bhp, struct nilfs_bmap_stats *stats) { struct buffer_head *bh; - struct nilfs_btree *btree = (struct nilfs_btree *)bmap; struct inode *dat = NULL; int ret; @@ -1551,12 +1637,12 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, /* for data */ /* cannot find near ptr */ - if (NILFS_BMAP_USE_VBN(bmap)) { + if (NILFS_BMAP_USE_VBN(btree)) { dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); - dat = nilfs_bmap_get_dat(bmap); + dat = nilfs_bmap_get_dat(btree); } - ret = nilfs_bmap_prepare_alloc_ptr(bmap, dreq, dat); + ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); if (ret < 0) return ret; @@ -1564,7 +1650,7 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, stats->bs_nblocks++; if (nreq != NULL) { nreq->bpr_ptr = dreq->bpr_ptr + 1; - ret = nilfs_bmap_prepare_alloc_ptr(bmap, nreq, dat); + ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); if (ret < 0) goto err_out_dreq; @@ -1581,16 +1667,16 @@ nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key, /* error */ err_out_nreq: - nilfs_bmap_abort_alloc_ptr(bmap, nreq, dat); + nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); err_out_dreq: - nilfs_bmap_abort_alloc_ptr(bmap, dreq, dat); + nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); stats->bs_nblocks = 0; return ret; } static void -nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, +nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, int n, @@ -1598,57 +1684,59 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, union nilfs_bmap_ptr_req *nreq, struct buffer_head *bh) { - struct nilfs_btree *btree = (struct nilfs_btree *)bmap; struct nilfs_btree_node *node; struct inode *dat; __u64 tmpptr; + int ncblk; /* free resources */ - if (bmap->b_ops->bop_clear != NULL) - bmap->b_ops->bop_clear(bmap); + if (btree->b_ops->bop_clear != NULL) + btree->b_ops->bop_clear(btree); /* ptr must be a pointer to a buffer head. */ set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); /* convert and insert */ - dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL; - nilfs_btree_init(bmap); + dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; + nilfs_btree_init(btree); if (nreq != NULL) { - nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat); - nilfs_bmap_commit_alloc_ptr(bmap, nreq, dat); + nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); + nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); /* create child node at level 1 */ node = (struct nilfs_btree_node *)bh->b_data; - nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs); - nilfs_btree_node_insert(btree, node, - key, dreq->bpr_ptr, n); + ncblk = nilfs_btree_nchildren_per_block(btree); + nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); + nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); if (!buffer_dirty(bh)) nilfs_btnode_mark_dirty(bh); - if (!nilfs_bmap_dirty(bmap)) - nilfs_bmap_set_dirty(bmap); + if (!nilfs_bmap_dirty(btree)) + nilfs_bmap_set_dirty(btree); brelse(bh); /* create root node at level 2 */ node = nilfs_btree_get_root(btree); tmpptr = nreq->bpr_ptr; - nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, - 2, 1, &keys[0], &tmpptr); + nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, + NILFS_BTREE_ROOT_NCHILDREN_MAX, + &keys[0], &tmpptr); } else { - nilfs_bmap_commit_alloc_ptr(bmap, dreq, dat); + nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); /* create root node at level 1 */ node = nilfs_btree_get_root(btree); - nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT, - 1, n, keys, ptrs); - nilfs_btree_node_insert(btree, node, - key, dreq->bpr_ptr, n); - if (!nilfs_bmap_dirty(bmap)) - nilfs_bmap_set_dirty(bmap); + nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, + NILFS_BTREE_ROOT_NCHILDREN_MAX, + keys, ptrs); + nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, + NILFS_BTREE_ROOT_NCHILDREN_MAX); + if (!nilfs_bmap_dirty(btree)) + nilfs_bmap_set_dirty(btree); } - if (NILFS_BMAP_USE_VBN(bmap)) - nilfs_btree_set_target_v(btree, key, dreq->bpr_ptr); + if (NILFS_BMAP_USE_VBN(btree)) + nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); } /** @@ -1660,7 +1748,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap, * @ptrs: * @n: */ -int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, +int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, int n) { @@ -1673,7 +1761,7 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, di = &dreq; ni = NULL; } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( - 1 << bmap->b_inode->i_blkbits)) { + 1 << btree->b_inode->i_blkbits)) { di = &dreq; ni = &nreq; } else { @@ -1682,17 +1770,17 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap, BUG(); } - ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh, + ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, &stats); if (ret < 0) return ret; - nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n, + nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, di, ni, bh); - nilfs_bmap_add_blocks(bmap, stats.bs_nblocks); + nilfs_bmap_add_blocks(btree, stats.bs_nblocks); return 0; } -static int nilfs_btree_propagate_p(struct nilfs_btree *btree, +static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head *bh) @@ -1704,17 +1792,17 @@ static int nilfs_btree_propagate_p(struct nilfs_btree *btree, return 0; } -static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, +static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct inode *dat) { struct nilfs_btree_node *parent; - int ret; + int ncmax, ret; - parent = nilfs_btree_get_node(btree, path, level + 1); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); path[level].bp_oldreq.bpr_ptr = - nilfs_btree_node_get_ptr(btree, parent, - path[level + 1].bp_index); + nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, + ncmax); path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req); @@ -1726,7 +1814,7 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; path[level].bp_ctxt.bh = path[level].bp_bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &NILFS_BMAP_I(btree)->i_btnode_cache, &path[level].bp_ctxt); if (ret < 0) { nilfs_dat_abort_update(dat, @@ -1739,30 +1827,31 @@ static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree, return 0; } -static void nilfs_btree_commit_update_v(struct nilfs_btree *btree, +static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct inode *dat) { struct nilfs_btree_node *parent; + int ncmax; nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req, - btree->bt_bmap.b_ptr_type == NILFS_BMAP_PTR_VS); + btree->b_ptr_type == NILFS_BMAP_PTR_VS); if (buffer_nilfs_node(path[level].bp_bh)) { nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &NILFS_BMAP_I(btree)->i_btnode_cache, &path[level].bp_ctxt); path[level].bp_bh = path[level].bp_ctxt.bh; } set_buffer_nilfs_volatile(path[level].bp_bh); - parent = nilfs_btree_get_node(btree, path, level + 1); - nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index, - path[level].bp_newreq.bpr_ptr); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); + nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, + path[level].bp_newreq.bpr_ptr, ncmax); } -static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, +static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct inode *dat) { @@ -1770,11 +1859,11 @@ static void nilfs_btree_abort_update_v(struct nilfs_btree *btree, &path[level].bp_newreq.bpr_req); if (buffer_nilfs_node(path[level].bp_bh)) nilfs_btnode_abort_change_key( - &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &NILFS_BMAP_I(btree)->i_btnode_cache, &path[level].bp_ctxt); } -static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, +static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int minlevel, int *maxlevelp, struct inode *dat) @@ -1809,7 +1898,7 @@ static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree, return ret; } -static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree, +static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int minlevel, int maxlevel, struct buffer_head *bh, @@ -1824,14 +1913,15 @@ static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree, nilfs_btree_commit_update_v(btree, path, level, dat); } -static int nilfs_btree_propagate_v(struct nilfs_btree *btree, +static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head *bh) { int maxlevel = 0, ret; struct nilfs_btree_node *parent; - struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap); + struct inode *dat = nilfs_bmap_get_dat(btree); __u64 ptr; + int ncmax; get_bh(bh); path[level].bp_bh = bh; @@ -1841,9 +1931,10 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree, goto out; if (buffer_nilfs_volatile(path[level].bp_bh)) { - parent = nilfs_btree_get_node(btree, path, level + 1); - ptr = nilfs_btree_node_get_ptr(btree, parent, - path[level + 1].bp_index); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); + ptr = nilfs_btree_node_get_ptr(parent, + path[level + 1].bp_index, + ncmax); ret = nilfs_dat_mark_dirty(dat, ptr); if (ret < 0) goto out; @@ -1857,10 +1948,9 @@ static int nilfs_btree_propagate_v(struct nilfs_btree *btree, return ret; } -static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, +static int nilfs_btree_propagate(struct nilfs_bmap *btree, struct buffer_head *bh) { - struct nilfs_btree *btree; struct nilfs_btree_path *path; struct nilfs_btree_node *node; __u64 key; @@ -1868,7 +1958,6 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, WARN_ON(!buffer_dirty(bh)); - btree = (struct nilfs_btree *)bmap; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; @@ -1878,11 +1967,11 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, key = nilfs_btree_node_get_key(node, 0); level = nilfs_btree_node_get_level(node); } else { - key = nilfs_bmap_data_get_key(bmap, bh); + key = nilfs_bmap_data_get_key(btree, bh); level = NILFS_BTREE_LEVEL_DATA; } - ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); + ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { if (unlikely(ret == -ENOENT)) printk(KERN_CRIT "%s: key = %llu, level == %d\n", @@ -1890,7 +1979,7 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, goto out; } - ret = NILFS_BMAP_USE_VBN(bmap) ? + ret = NILFS_BMAP_USE_VBN(btree) ? nilfs_btree_propagate_v(btree, path, level, bh) : nilfs_btree_propagate_p(btree, path, level, bh); @@ -1900,13 +1989,13 @@ static int nilfs_btree_propagate(const struct nilfs_bmap *bmap, return ret; } -static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap, +static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, struct buffer_head *bh) { - return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(bmap), bh->b_blocknr); + return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); } -static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, +static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, struct list_head *lists, struct buffer_head *bh) { @@ -1920,6 +2009,18 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, node = (struct nilfs_btree_node *)bh->b_data; key = nilfs_btree_node_get_key(node, 0); level = nilfs_btree_node_get_level(node); + if (level < NILFS_BTREE_LEVEL_NODE_MIN || + level >= NILFS_BTREE_LEVEL_MAX) { + dump_stack(); + printk(KERN_WARNING + "%s: invalid btree level: %d (key=%llu, ino=%lu, " + "blocknr=%llu)\n", + __func__, level, (unsigned long long)key, + NILFS_BMAP_I(btree)->vfs_inode.i_ino, + (unsigned long long)bh->b_blocknr); + return; + } + list_for_each(head, &lists[level]) { cbh = list_entry(head, struct buffer_head, b_assoc_buffers); cnode = (struct nilfs_btree_node *)cbh->b_data; @@ -1930,11 +2031,10 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree, list_add_tail(&bh->b_assoc_buffers, head); } -static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap, +static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, struct list_head *listp) { - struct nilfs_btree *btree = (struct nilfs_btree *)bmap; - struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache; + struct address_space *btcache = &NILFS_BMAP_I(btree)->i_btnode_cache; struct list_head lists[NILFS_BTREE_LEVEL_MAX]; struct pagevec pvec; struct buffer_head *bh, *head; @@ -1968,7 +2068,7 @@ static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap, list_splice_tail(&lists[level], listp); } -static int nilfs_btree_assign_p(struct nilfs_btree *btree, +static int nilfs_btree_assign_p(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head **bh, @@ -1978,38 +2078,38 @@ static int nilfs_btree_assign_p(struct nilfs_btree *btree, struct nilfs_btree_node *parent; __u64 key; __u64 ptr; - int ret; + int ncmax, ret; - parent = nilfs_btree_get_node(btree, path, level + 1); - ptr = nilfs_btree_node_get_ptr(btree, parent, - path[level + 1].bp_index); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); + ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, + ncmax); if (buffer_nilfs_node(*bh)) { path[level].bp_ctxt.oldkey = ptr; path[level].bp_ctxt.newkey = blocknr; path[level].bp_ctxt.bh = *bh; ret = nilfs_btnode_prepare_change_key( - &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &NILFS_BMAP_I(btree)->i_btnode_cache, &path[level].bp_ctxt); if (ret < 0) return ret; nilfs_btnode_commit_change_key( - &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache, + &NILFS_BMAP_I(btree)->i_btnode_cache, &path[level].bp_ctxt); *bh = path[level].bp_ctxt.bh; } - nilfs_btree_node_set_ptr(btree, parent, - path[level + 1].bp_index, blocknr); + nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, + ncmax); key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); /* on-disk format */ - binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); + binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = level; return 0; } -static int nilfs_btree_assign_v(struct nilfs_btree *btree, +static int nilfs_btree_assign_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head **bh, @@ -2017,15 +2117,15 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree, union nilfs_binfo *binfo) { struct nilfs_btree_node *parent; - struct inode *dat = nilfs_bmap_get_dat(&btree->bt_bmap); + struct inode *dat = nilfs_bmap_get_dat(btree); __u64 key; __u64 ptr; union nilfs_bmap_ptr_req req; - int ret; + int ncmax, ret; - parent = nilfs_btree_get_node(btree, path, level + 1); - ptr = nilfs_btree_node_get_ptr(btree, parent, - path[level + 1].bp_index); + parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); + ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, + ncmax); req.bpr_ptr = ptr; ret = nilfs_dat_prepare_start(dat, &req.bpr_req); if (ret < 0) @@ -2034,24 +2134,22 @@ static int nilfs_btree_assign_v(struct nilfs_btree *btree, key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); /* on-disk format */ - binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); - binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); + binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); + binfo->bi_v.bi_blkoff = cpu_to_le64(key); return 0; } -static int nilfs_btree_assign(struct nilfs_bmap *bmap, +static int nilfs_btree_assign(struct nilfs_bmap *btree, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { - struct nilfs_btree *btree; struct nilfs_btree_path *path; struct nilfs_btree_node *node; __u64 key; int level, ret; - btree = (struct nilfs_btree *)bmap; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; @@ -2061,17 +2159,17 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap, key = nilfs_btree_node_get_key(node, 0); level = nilfs_btree_node_get_level(node); } else { - key = nilfs_bmap_data_get_key(bmap, *bh); + key = nilfs_bmap_data_get_key(btree, *bh); level = NILFS_BTREE_LEVEL_DATA; } - ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1); + ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; } - ret = NILFS_BMAP_USE_VBN(bmap) ? + ret = NILFS_BMAP_USE_VBN(btree) ? nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); @@ -2081,7 +2179,7 @@ static int nilfs_btree_assign(struct nilfs_bmap *bmap, return ret; } -static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap, +static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) @@ -2090,7 +2188,7 @@ static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap, __u64 key; int ret; - ret = nilfs_dat_move(nilfs_bmap_get_dat(bmap), (*bh)->b_blocknr, + ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, blocknr); if (ret < 0) return ret; @@ -2099,29 +2197,27 @@ static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap, node = (struct nilfs_btree_node *)(*bh)->b_data; key = nilfs_btree_node_get_key(node, 0); } else - key = nilfs_bmap_data_get_key(bmap, *bh); + key = nilfs_bmap_data_get_key(btree, *bh); /* on-disk format */ binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); - binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); + binfo->bi_v.bi_blkoff = cpu_to_le64(key); return 0; } -static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) +static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) { struct buffer_head *bh; - struct nilfs_btree *btree; struct nilfs_btree_path *path; __u64 ptr; int ret; - btree = (struct nilfs_btree *)bmap; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; - ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1); + ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; @@ -2135,8 +2231,8 @@ static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level) if (!buffer_dirty(bh)) nilfs_btnode_mark_dirty(bh); brelse(bh); - if (!nilfs_bmap_dirty(&btree->bt_bmap)) - nilfs_bmap_set_dirty(&btree->bt_bmap); + if (!nilfs_bmap_dirty(btree)) + nilfs_bmap_set_dirty(btree); out: nilfs_btree_free_path(path); @@ -2186,10 +2282,14 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { int nilfs_btree_init(struct nilfs_bmap *bmap) { bmap->b_ops = &nilfs_btree_ops; + bmap->b_nchildren_per_block = + NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); return 0; } void nilfs_btree_init_gc(struct nilfs_bmap *bmap) { bmap->b_ops = &nilfs_btree_ops_gc; + bmap->b_nchildren_per_block = + NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); } diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h index 43c8c5b..22c02e3 100644 --- a/fs/nilfs2/btree.h +++ b/fs/nilfs2/btree.h @@ -31,14 +31,6 @@ #include "bmap.h" /** - * struct nilfs_btree - B-tree structure - * @bt_bmap: bmap base structure - */ -struct nilfs_btree { - struct nilfs_bmap bt_bmap; -}; - -/** * struct nilfs_btree_path - A path on which B-tree operations are executed * @bp_bh: buffer head of node block * @bp_sib_bh: buffer head of sibling node block @@ -54,7 +46,7 @@ struct nilfs_btree_path { union nilfs_bmap_ptr_req bp_oldreq; union nilfs_bmap_ptr_req bp_newreq; struct nilfs_btnode_chkey_ctxt bp_ctxt; - void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *, + void (*bp_op)(struct nilfs_bmap *, struct nilfs_btree_path *, int, __u64 *, __u64 *); }; @@ -80,4 +72,6 @@ int nilfs_btree_convert_and_insert(struct nilfs_bmap *, __u64, __u64, const __u64 *, const __u64 *, int); void nilfs_btree_init_gc(struct nilfs_bmap *); +int nilfs_btree_broken_node_block(struct buffer_head *bh); + #endif /* _NILFS_BTREE_H */ diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 1873781..5ff15a8 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c @@ -863,26 +863,19 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) */ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) { - struct the_nilfs *nilfs; int ret; - nilfs = NILFS_MDT(cpfile)->mi_nilfs; - switch (mode) { case NILFS_CHECKPOINT: - /* - * Check for protecting existing snapshot mounts: - * ns_mount_mutex is used to make this operation atomic and - * exclusive with a new mount job. Though it doesn't cover - * umount, it's enough for the purpose. - */ - if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) { - /* Current implementation does not have to protect - plain read-only mounts since they are exclusive - with a read/write mount and are protected from the - cleaner. */ + if (nilfs_checkpoint_is_mounted(cpfile->i_sb, cno)) + /* + * Current implementation does not have to protect + * plain read-only mounts since they are exclusive + * with a read/write mount and are protected from the + * cleaner. + */ ret = -EBUSY; - } else + else ret = nilfs_cpfile_clear_snapshot(cpfile, cno); return ret; case NILFS_SNAPSHOT: @@ -933,27 +926,40 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) } /** - * nilfs_cpfile_read - read cpfile inode - * @cpfile: cpfile inode - * @raw_inode: on-disk cpfile inode - */ -int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode) -{ - return nilfs_read_inode_common(cpfile, raw_inode); -} - -/** - * nilfs_cpfile_new - create cpfile - * @nilfs: nilfs object + * nilfs_cpfile_read - read or get cpfile inode + * @sb: super block instance * @cpsize: size of a checkpoint entry + * @raw_inode: on-disk cpfile inode + * @inodep: buffer to store the inode */ -struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize) +int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, + struct nilfs_inode *raw_inode, struct inode **inodep) { struct inode *cpfile; + int err; + + cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO); + if (unlikely(!cpfile)) + return -ENOMEM; + if (!(cpfile->i_state & I_NEW)) + goto out; + + err = nilfs_mdt_init(cpfile, NILFS_MDT_GFP, 0); + if (err) + goto failed; - cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO, 0); - if (cpfile) - nilfs_mdt_set_entry_size(cpfile, cpsize, - sizeof(struct nilfs_cpfile_header)); - return cpfile; + nilfs_mdt_set_entry_size(cpfile, cpsize, + sizeof(struct nilfs_cpfile_header)); + + err = nilfs_read_inode_common(cpfile, raw_inode); + if (err) + goto failed; + + unlock_new_inode(cpfile); + out: + *inodep = cpfile; + return 0; + failed: + iget_failed(cpfile); + return err; } diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index bc0809e..a242b9a 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h @@ -40,7 +40,7 @@ int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned, size_t); -int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode); -struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize); +int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, + struct nilfs_inode *raw_inode, struct inode **inodep); #endif /* _NILFS_CPFILE_H */ diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 0131467..49c844d 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c @@ -36,6 +36,7 @@ struct nilfs_dat_info { struct nilfs_mdt_info mi; struct nilfs_palloc_cache palloc_cache; + struct nilfs_shadow_map shadow; }; static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) @@ -102,7 +103,8 @@ void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) nilfs_palloc_abort_alloc_entry(dat, req); } -void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) +static void nilfs_dat_commit_free(struct inode *dat, + struct nilfs_palloc_req *req) { struct nilfs_dat_entry *entry; void *kaddr; @@ -327,6 +329,23 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); if (ret < 0) return ret; + + /* + * The given disk block number (blocknr) is not yet written to + * the device at this point. + * + * To prevent nilfs_dat_translate() from returning the + * uncommited block number, this makes a copy of the entry + * buffer and redirects nilfs_dat_translate() to the copy. + */ + if (!buffer_nilfs_redirected(entry_bh)) { + ret = nilfs_mdt_freeze_buffer(dat, entry_bh); + if (ret) { + brelse(entry_bh); + return ret; + } + } + kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { @@ -371,7 +390,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) */ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) { - struct buffer_head *entry_bh; + struct buffer_head *entry_bh, *bh; struct nilfs_dat_entry *entry; sector_t blocknr; void *kaddr; @@ -381,6 +400,15 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) if (ret < 0) return ret; + if (!nilfs_doing_gc() && buffer_nilfs_redirected(entry_bh)) { + bh = nilfs_mdt_get_frozen_buffer(dat, entry_bh); + if (bh) { + WARN_ON(!buffer_uptodate(bh)); + brelse(entry_bh); + entry_bh = bh; + } + } + kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); blocknr = le64_to_cpu(entry->de_blocknr); @@ -436,38 +464,48 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, } /** - * nilfs_dat_read - read dat inode - * @dat: dat inode - * @raw_inode: on-disk dat inode - */ -int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode) -{ - return nilfs_read_inode_common(dat, raw_inode); -} - -/** - * nilfs_dat_new - create dat file - * @nilfs: nilfs object + * nilfs_dat_read - read or get dat inode + * @sb: super block instance * @entry_size: size of a dat entry + * @raw_inode: on-disk dat inode + * @inodep: buffer to store the inode */ -struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size) +int nilfs_dat_read(struct super_block *sb, size_t entry_size, + struct nilfs_inode *raw_inode, struct inode **inodep) { static struct lock_class_key dat_lock_key; struct inode *dat; struct nilfs_dat_info *di; int err; - dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, sizeof(*di)); - if (dat) { - err = nilfs_palloc_init_blockgroup(dat, entry_size); - if (unlikely(err)) { - nilfs_mdt_destroy(dat); - return NULL; - } + dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); + if (unlikely(!dat)) + return -ENOMEM; + if (!(dat->i_state & I_NEW)) + goto out; - di = NILFS_DAT_I(dat); - lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); - nilfs_palloc_setup_cache(dat, &di->palloc_cache); - } - return dat; + err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di)); + if (err) + goto failed; + + err = nilfs_palloc_init_blockgroup(dat, entry_size); + if (err) + goto failed; + + di = NILFS_DAT_I(dat); + lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); + nilfs_palloc_setup_cache(dat, &di->palloc_cache); + nilfs_mdt_setup_shadow_map(dat, &di->shadow); + + err = nilfs_read_inode_common(dat, raw_inode); + if (err) + goto failed; + + unlock_new_inode(dat); + out: + *inodep = dat; + return 0; + failed: + iget_failed(dat); + return err; } diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index d31c3aa..cbd8e97 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h @@ -53,7 +53,7 @@ int nilfs_dat_freev(struct inode *, __u64 *, size_t); int nilfs_dat_move(struct inode *, __u64, sector_t); ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); -int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode); -struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size); +int nilfs_dat_read(struct super_block *sb, size_t entry_size, + struct nilfs_inode *raw_inode, struct inode **inodep); #endif /* _NILFS_DAT_H */ diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index 85c89df..cb003c8 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -80,23 +80,10 @@ static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr) return last_byte; } -static int nilfs_prepare_chunk_uninterruptible(struct page *page, - struct address_space *mapping, - unsigned from, unsigned to) +static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to) { loff_t pos = page_offset(page) + from; - return block_write_begin(NULL, mapping, pos, to - from, - AOP_FLAG_UNINTERRUPTIBLE, &page, - NULL, nilfs_get_block); -} - -static int nilfs_prepare_chunk(struct page *page, - struct address_space *mapping, - unsigned from, unsigned to) -{ - loff_t pos = page_offset(page) + from; - return block_write_begin(NULL, mapping, pos, to - from, 0, &page, - NULL, nilfs_get_block); + return __block_write_begin(page, pos, to - from, nilfs_get_block); } static void nilfs_commit_chunk(struct page *page, @@ -141,7 +128,7 @@ static void nilfs_check_page(struct page *page) } for (offs = 0; offs <= limit - NILFS_DIR_REC_LEN(1); offs += rec_len) { p = (struct nilfs_dir_entry *)(kaddr + offs); - rec_len = le16_to_cpu(p->rec_len); + rec_len = nilfs_rec_len_from_disk(p->rec_len); if (rec_len < NILFS_DIR_REC_LEN(1)) goto Eshort; @@ -199,13 +186,10 @@ fail: static struct page *nilfs_get_page(struct inode *dir, unsigned long n) { struct address_space *mapping = dir->i_mapping; - struct page *page = read_cache_page(mapping, n, - (filler_t *)mapping->a_ops->readpage, NULL); + struct page *page = read_mapping_page(mapping, n, NULL); + if (!IS_ERR(page)) { - wait_on_page_locked(page); kmap(page); - if (!PageUptodate(page)) - goto fail; if (!PageChecked(page)) nilfs_check_page(page); if (PageError(page)) @@ -238,7 +222,8 @@ nilfs_match(int len, const unsigned char *name, struct nilfs_dir_entry *de) */ static struct nilfs_dir_entry *nilfs_next_entry(struct nilfs_dir_entry *p) { - return (struct nilfs_dir_entry *)((char *)p + le16_to_cpu(p->rec_len)); + return (struct nilfs_dir_entry *)((char *)p + + nilfs_rec_len_from_disk(p->rec_len)); } static unsigned char @@ -329,7 +314,7 @@ static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) goto success; } } - filp->f_pos += le16_to_cpu(de->rec_len); + filp->f_pos += nilfs_rec_len_from_disk(de->rec_len); } nilfs_put_page(page); } @@ -444,12 +429,12 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de, struct page *page, struct inode *inode) { unsigned from = (char *) de - (char *) page_address(page); - unsigned to = from + le16_to_cpu(de->rec_len); + unsigned to = from + nilfs_rec_len_from_disk(de->rec_len); struct address_space *mapping = page->mapping; int err; lock_page(page); - err = nilfs_prepare_chunk_uninterruptible(page, mapping, from, to); + err = nilfs_prepare_chunk(page, from, to); BUG_ON(err); de->inode = cpu_to_le64(inode->i_ino); nilfs_set_de_type(de, inode); @@ -500,7 +485,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) /* We hit i_size */ name_len = 0; rec_len = chunk_size; - de->rec_len = cpu_to_le16(chunk_size); + de->rec_len = nilfs_rec_len_to_disk(chunk_size); de->inode = 0; goto got_it; } @@ -514,7 +499,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) if (nilfs_match(namelen, name, de)) goto out_unlock; name_len = NILFS_DIR_REC_LEN(de->name_len); - rec_len = le16_to_cpu(de->rec_len); + rec_len = nilfs_rec_len_from_disk(de->rec_len); if (!de->inode && rec_len >= reclen) goto got_it; if (rec_len >= name_len + reclen) @@ -530,15 +515,15 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode) got_it: from = (char *)de - (char *)page_address(page); to = from + rec_len; - err = nilfs_prepare_chunk(page, page->mapping, from, to); + err = nilfs_prepare_chunk(page, from, to); if (err) goto out_unlock; if (de->inode) { struct nilfs_dir_entry *de1; de1 = (struct nilfs_dir_entry *)((char *)de + name_len); - de1->rec_len = cpu_to_le16(rec_len - name_len); - de->rec_len = cpu_to_le16(name_len); + de1->rec_len = nilfs_rec_len_to_disk(rec_len - name_len); + de->rec_len = nilfs_rec_len_to_disk(name_len); de = de1; } de->name_len = namelen; @@ -569,7 +554,8 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) struct inode *inode = mapping->host; char *kaddr = page_address(page); unsigned from = ((char *)dir - kaddr) & ~(nilfs_chunk_size(inode) - 1); - unsigned to = ((char *)dir - kaddr) + le16_to_cpu(dir->rec_len); + unsigned to = ((char *)dir - kaddr) + + nilfs_rec_len_from_disk(dir->rec_len); struct nilfs_dir_entry *pde = NULL; struct nilfs_dir_entry *de = (struct nilfs_dir_entry *)(kaddr + from); int err; @@ -587,10 +573,10 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page) if (pde) from = (char *)pde - (char *)page_address(page); lock_page(page); - err = nilfs_prepare_chunk(page, mapping, from, to); + err = nilfs_prepare_chunk(page, from, to); BUG_ON(err); if (pde) - pde->rec_len = cpu_to_le16(to - from); + pde->rec_len = nilfs_rec_len_to_disk(to - from); dir->inode = 0; nilfs_commit_chunk(page, mapping, from, to); inode->i_ctime = inode->i_mtime = CURRENT_TIME; @@ -615,7 +601,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) if (!page) return -ENOMEM; - err = nilfs_prepare_chunk(page, mapping, 0, chunk_size); + err = nilfs_prepare_chunk(page, 0, chunk_size); if (unlikely(err)) { unlock_page(page); goto fail; @@ -624,14 +610,14 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent) memset(kaddr, 0, chunk_size); de = (struct nilfs_dir_entry *)kaddr; de->name_len = 1; - de->rec_len = cpu_to_le16(NILFS_DIR_REC_LEN(1)); + de->rec_len = nilfs_rec_len_to_disk(NILFS_DIR_REC_LEN(1)); memcpy(de->name, ".\0\0", 4); de->inode = cpu_to_le64(inode->i_ino); nilfs_set_de_type(de, inode); de = (struct nilfs_dir_entry *)(kaddr + NILFS_DIR_REC_LEN(1)); de->name_len = 2; - de->rec_len = cpu_to_le16(chunk_size - NILFS_DIR_REC_LEN(1)); + de->rec_len = nilfs_rec_len_to_disk(chunk_size - NILFS_DIR_REC_LEN(1)); de->inode = cpu_to_le64(parent->i_ino); memcpy(de->name, "..\0", 4); nilfs_set_de_type(de, inode); diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c index 236753d..324d80c 100644 --- a/fs/nilfs2/direct.c +++ b/fs/nilfs2/direct.c @@ -27,47 +27,43 @@ #include "alloc.h" #include "dat.h" -static inline __le64 *nilfs_direct_dptrs(const struct nilfs_direct *direct) +static inline __le64 *nilfs_direct_dptrs(const struct nilfs_bmap *direct) { return (__le64 *) - ((struct nilfs_direct_node *)direct->d_bmap.b_u.u_data + 1); + ((struct nilfs_direct_node *)direct->b_u.u_data + 1); } static inline __u64 -nilfs_direct_get_ptr(const struct nilfs_direct *direct, __u64 key) +nilfs_direct_get_ptr(const struct nilfs_bmap *direct, __u64 key) { - return nilfs_bmap_dptr_to_ptr(*(nilfs_direct_dptrs(direct) + key)); + return le64_to_cpu(*(nilfs_direct_dptrs(direct) + key)); } -static inline void nilfs_direct_set_ptr(struct nilfs_direct *direct, +static inline void nilfs_direct_set_ptr(struct nilfs_bmap *direct, __u64 key, __u64 ptr) { - *(nilfs_direct_dptrs(direct) + key) = nilfs_bmap_ptr_to_dptr(ptr); + *(nilfs_direct_dptrs(direct) + key) = cpu_to_le64(ptr); } -static int nilfs_direct_lookup(const struct nilfs_bmap *bmap, +static int nilfs_direct_lookup(const struct nilfs_bmap *direct, __u64 key, int level, __u64 *ptrp) { - struct nilfs_direct *direct; __u64 ptr; - direct = (struct nilfs_direct *)bmap; /* XXX: use macro for level 1 */ if (key > NILFS_DIRECT_KEY_MAX || level != 1) return -ENOENT; ptr = nilfs_direct_get_ptr(direct, key); if (ptr == NILFS_BMAP_INVALID_PTR) return -ENOENT; - if (ptrp != NULL) - *ptrp = ptr; + *ptrp = ptr; return 0; } -static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap, +static int nilfs_direct_lookup_contig(const struct nilfs_bmap *direct, __u64 key, __u64 *ptrp, unsigned maxblocks) { - struct nilfs_direct *direct = (struct nilfs_direct *)bmap; struct inode *dat = NULL; __u64 ptr, ptr2; sector_t blocknr; @@ -79,8 +75,8 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap, if (ptr == NILFS_BMAP_INVALID_PTR) return -ENOENT; - if (NILFS_BMAP_USE_VBN(bmap)) { - dat = nilfs_bmap_get_dat(bmap); + if (NILFS_BMAP_USE_VBN(direct)) { + dat = nilfs_bmap_get_dat(direct); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) return ret; @@ -106,29 +102,21 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap, } static __u64 -nilfs_direct_find_target_v(const struct nilfs_direct *direct, __u64 key) +nilfs_direct_find_target_v(const struct nilfs_bmap *direct, __u64 key) { __u64 ptr; - ptr = nilfs_bmap_find_target_seq(&direct->d_bmap, key); + ptr = nilfs_bmap_find_target_seq(direct, key); if (ptr != NILFS_BMAP_INVALID_PTR) /* sequential access */ return ptr; else /* block group */ - return nilfs_bmap_find_target_in_group(&direct->d_bmap); -} - -static void nilfs_direct_set_target_v(struct nilfs_direct *direct, - __u64 key, __u64 ptr) -{ - direct->d_bmap.b_last_allocated_key = key; - direct->d_bmap.b_last_allocated_ptr = ptr; + return nilfs_bmap_find_target_in_group(direct); } static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) { - struct nilfs_direct *direct = (struct nilfs_direct *)bmap; union nilfs_bmap_ptr_req req; struct inode *dat = NULL; struct buffer_head *bh; @@ -136,11 +124,11 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) if (key > NILFS_DIRECT_KEY_MAX) return -ENOENT; - if (nilfs_direct_get_ptr(direct, key) != NILFS_BMAP_INVALID_PTR) + if (nilfs_direct_get_ptr(bmap, key) != NILFS_BMAP_INVALID_PTR) return -EEXIST; if (NILFS_BMAP_USE_VBN(bmap)) { - req.bpr_ptr = nilfs_direct_find_target_v(direct, key); + req.bpr_ptr = nilfs_direct_find_target_v(bmap, key); dat = nilfs_bmap_get_dat(bmap); } ret = nilfs_bmap_prepare_alloc_ptr(bmap, &req, dat); @@ -150,13 +138,13 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) set_buffer_nilfs_volatile(bh); nilfs_bmap_commit_alloc_ptr(bmap, &req, dat); - nilfs_direct_set_ptr(direct, key, req.bpr_ptr); + nilfs_direct_set_ptr(bmap, key, req.bpr_ptr); if (!nilfs_bmap_dirty(bmap)) nilfs_bmap_set_dirty(bmap); if (NILFS_BMAP_USE_VBN(bmap)) - nilfs_direct_set_target_v(direct, key, req.bpr_ptr); + nilfs_bmap_set_target_v(bmap, key, req.bpr_ptr); nilfs_bmap_add_blocks(bmap, 1); } @@ -165,33 +153,30 @@ static int nilfs_direct_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr) static int nilfs_direct_delete(struct nilfs_bmap *bmap, __u64 key) { - struct nilfs_direct *direct = (struct nilfs_direct *)bmap; union nilfs_bmap_ptr_req req; struct inode *dat; int ret; if (key > NILFS_DIRECT_KEY_MAX || - nilfs_direct_get_ptr(direct, key) == NILFS_BMAP_INVALID_PTR) + nilfs_direct_get_ptr(bmap, key) == NILFS_BMAP_INVALID_PTR) return -ENOENT; dat = NILFS_BMAP_USE_VBN(bmap) ? nilfs_bmap_get_dat(bmap) : NULL; - req.bpr_ptr = nilfs_direct_get_ptr(direct, key); + req.bpr_ptr = nilfs_direct_get_ptr(bmap, key); ret = nilfs_bmap_prepare_end_ptr(bmap, &req, dat); if (!ret) { nilfs_bmap_commit_end_ptr(bmap, &req, dat); - nilfs_direct_set_ptr(direct, key, NILFS_BMAP_INVALID_PTR); + nilfs_direct_set_ptr(bmap, key, NILFS_BMAP_INVALID_PTR); nilfs_bmap_sub_blocks(bmap, 1); } return ret; } -static int nilfs_direct_last_key(const struct nilfs_bmap *bmap, __u64 *keyp) +static int nilfs_direct_last_key(const struct nilfs_bmap *direct, __u64 *keyp) { - struct nilfs_direct *direct; __u64 key, lastkey; - direct = (struct nilfs_direct *)bmap; lastkey = NILFS_DIRECT_KEY_MAX + 1; for (key = NILFS_DIRECT_KEY_MIN; key <= NILFS_DIRECT_KEY_MAX; key++) if (nilfs_direct_get_ptr(direct, key) != @@ -211,15 +196,13 @@ static int nilfs_direct_check_insert(const struct nilfs_bmap *bmap, __u64 key) return key > NILFS_DIRECT_KEY_MAX; } -static int nilfs_direct_gather_data(struct nilfs_bmap *bmap, +static int nilfs_direct_gather_data(struct nilfs_bmap *direct, __u64 *keys, __u64 *ptrs, int nitems) { - struct nilfs_direct *direct; __u64 key; __u64 ptr; int n; - direct = (struct nilfs_direct *)bmap; if (nitems > NILFS_DIRECT_NBLOCKS) nitems = NILFS_DIRECT_NBLOCKS; n = 0; @@ -237,7 +220,6 @@ static int nilfs_direct_gather_data(struct nilfs_bmap *bmap, int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, __u64 key, __u64 *keys, __u64 *ptrs, int n) { - struct nilfs_direct *direct; __le64 *dptrs; int ret, i, j; @@ -253,12 +235,11 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, bmap->b_ops->bop_clear(bmap); /* convert */ - direct = (struct nilfs_direct *)bmap; - dptrs = nilfs_direct_dptrs(direct); + dptrs = nilfs_direct_dptrs(bmap); for (i = 0, j = 0; i < NILFS_DIRECT_NBLOCKS; i++) { if ((j < n) && (i == keys[j])) { dptrs[i] = (i != key) ? - nilfs_bmap_ptr_to_dptr(ptrs[j]) : + cpu_to_le64(ptrs[j]) : NILFS_BMAP_INVALID_PTR; j++; } else @@ -269,10 +250,9 @@ int nilfs_direct_delete_and_convert(struct nilfs_bmap *bmap, return 0; } -static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, +static int nilfs_direct_propagate(struct nilfs_bmap *bmap, struct buffer_head *bh) { - struct nilfs_direct *direct = (struct nilfs_direct *)bmap; struct nilfs_palloc_req oldreq, newreq; struct inode *dat; __u64 key; @@ -284,7 +264,7 @@ static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, dat = nilfs_bmap_get_dat(bmap); key = nilfs_bmap_data_get_key(bmap, bh); - ptr = nilfs_direct_get_ptr(direct, key); + ptr = nilfs_direct_get_ptr(bmap, key); if (!buffer_nilfs_volatile(bh)) { oldreq.pr_entry_nr = ptr; newreq.pr_entry_nr = ptr; @@ -294,20 +274,20 @@ static int nilfs_direct_propagate(const struct nilfs_bmap *bmap, nilfs_dat_commit_update(dat, &oldreq, &newreq, bmap->b_ptr_type == NILFS_BMAP_PTR_VS); set_buffer_nilfs_volatile(bh); - nilfs_direct_set_ptr(direct, key, newreq.pr_entry_nr); + nilfs_direct_set_ptr(bmap, key, newreq.pr_entry_nr); } else ret = nilfs_dat_mark_dirty(dat, ptr); return ret; } -static int nilfs_direct_assign_v(struct nilfs_direct *direct, +static int nilfs_direct_assign_v(struct nilfs_bmap *direct, __u64 key, __u64 ptr, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { - struct inode *dat = nilfs_bmap_get_dat(&direct->d_bmap); + struct inode *dat = nilfs_bmap_get_dat(direct); union nilfs_bmap_ptr_req req; int ret; @@ -315,13 +295,13 @@ static int nilfs_direct_assign_v(struct nilfs_direct *direct, ret = nilfs_dat_prepare_start(dat, &req.bpr_req); if (!ret) { nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); - binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr); - binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key); + binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); + binfo->bi_v.bi_blkoff = cpu_to_le64(key); } return ret; } -static int nilfs_direct_assign_p(struct nilfs_direct *direct, +static int nilfs_direct_assign_p(struct nilfs_bmap *direct, __u64 key, __u64 ptr, struct buffer_head **bh, sector_t blocknr, @@ -329,7 +309,7 @@ static int nilfs_direct_assign_p(struct nilfs_direct *direct, { nilfs_direct_set_ptr(direct, key, blocknr); - binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key); + binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = 0; return 0; @@ -340,18 +320,16 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap, sector_t blocknr, union nilfs_binfo *binfo) { - struct nilfs_direct *direct; __u64 key; __u64 ptr; - direct = (struct nilfs_direct *)bmap; key = nilfs_bmap_data_get_key(bmap, *bh); if (unlikely(key > NILFS_DIRECT_KEY_MAX)) { printk(KERN_CRIT "%s: invalid key: %llu\n", __func__, (unsigned long long)key); return -EINVAL; } - ptr = nilfs_direct_get_ptr(direct, key); + ptr = nilfs_direct_get_ptr(bmap, key); if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) { printk(KERN_CRIT "%s: invalid pointer: %llu\n", __func__, (unsigned long long)ptr); @@ -359,8 +337,8 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap, } return NILFS_BMAP_USE_VBN(bmap) ? - nilfs_direct_assign_v(direct, key, ptr, bh, blocknr, binfo) : - nilfs_direct_assign_p(direct, key, ptr, bh, blocknr, binfo); + nilfs_direct_assign_v(bmap, key, ptr, bh, blocknr, binfo) : + nilfs_direct_assign_p(bmap, key, ptr, bh, blocknr, binfo); } static const struct nilfs_bmap_operations nilfs_direct_ops = { diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h index a5ffd66..dc643de 100644 --- a/fs/nilfs2/direct.h +++ b/fs/nilfs2/direct.h @@ -28,8 +28,6 @@ #include "bmap.h" -struct nilfs_direct; - /** * struct nilfs_direct_node - direct node * @dn_flags: flags @@ -40,15 +38,6 @@ struct nilfs_direct_node { __u8 pad[7]; }; -/** - * struct nilfs_direct - direct mapping - * @d_bmap: bmap structure - */ -struct nilfs_direct { - struct nilfs_bmap d_bmap; -}; - - #define NILFS_DIRECT_NBLOCKS (NILFS_BMAP_SIZE / sizeof(__le64) - 1) #define NILFS_DIRECT_KEY_MIN 0 #define NILFS_DIRECT_KEY_MAX (NILFS_DIRECT_NBLOCKS - 1) diff --git a/fs/nilfs2/export.h b/fs/nilfs2/export.h new file mode 100644 index 0000000..a71cc41 --- /dev/null +++ b/fs/nilfs2/export.h @@ -0,0 +1,17 @@ +#ifndef NILFS_EXPORT_H +#define NILFS_EXPORT_H + +#include <linux/exportfs.h> + +extern const struct export_operations nilfs_export_ops; + +struct nilfs_fid { + u64 cno; + u64 ino; + u32 gen; + + u32 parent_gen; + u64 parent_ino; +} __attribute__ ((packed)); + +#endif diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c deleted file mode 100644 index dd5f7e0..0000000 --- a/fs/nilfs2/gcdat.c +++ /dev/null @@ -1,87 +0,0 @@ -/* - * gcdat.c - NILFS shadow DAT inode for GC - * - * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - * Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>, - * and Ryusuke Konishi <ryusuke@osrg.net>. - * - */ - -#include <linux/buffer_head.h> -#include "nilfs.h" -#include "page.h" -#include "mdt.h" - -int nilfs_init_gcdat_inode(struct the_nilfs *nilfs) -{ - struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; - struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); - int err; - - gcdat->i_state = 0; - gcdat->i_blocks = dat->i_blocks; - gii->i_flags = dii->i_flags; - gii->i_state = dii->i_state | (1 << NILFS_I_GCDAT); - gii->i_cno = 0; - nilfs_bmap_init_gcdat(gii->i_bmap, dii->i_bmap); - err = nilfs_copy_dirty_pages(gcdat->i_mapping, dat->i_mapping); - if (unlikely(err)) - return err; - - return nilfs_copy_dirty_pages(&gii->i_btnode_cache, - &dii->i_btnode_cache); -} - -void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs) -{ - struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; - struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); - struct address_space *mapping = dat->i_mapping; - struct address_space *gmapping = gcdat->i_mapping; - - down_write(&NILFS_MDT(dat)->mi_sem); - dat->i_blocks = gcdat->i_blocks; - dii->i_flags = gii->i_flags; - dii->i_state = gii->i_state & ~(1 << NILFS_I_GCDAT); - - nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap); - - nilfs_palloc_clear_cache(dat); - nilfs_palloc_clear_cache(gcdat); - nilfs_clear_dirty_pages(mapping); - nilfs_copy_back_pages(mapping, gmapping); - /* note: mdt dirty flags should be cleared by segctor. */ - - nilfs_clear_dirty_pages(&dii->i_btnode_cache); - nilfs_copy_back_pages(&dii->i_btnode_cache, &gii->i_btnode_cache); - - up_write(&NILFS_MDT(dat)->mi_sem); -} - -void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs) -{ - struct inode *gcdat = nilfs->ns_gc_dat; - struct nilfs_inode_info *gii = NILFS_I(gcdat); - - gcdat->i_state = I_CLEAR; - gii->i_flags = 0; - - nilfs_palloc_clear_cache(gcdat); - truncate_inode_pages(gcdat->i_mapping, 0); - truncate_inode_pages(&gii->i_btnode_cache, 0); -} diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 145f03c..33ad25d 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -28,13 +28,6 @@ * gcinodes), and this file provides lookup function of the dummy * inodes and their buffer read function. * - * Since NILFS2 keeps up multiple checkpoints/snapshots across GC, it - * has to treat blocks that belong to a same file but have different - * checkpoint numbers. To avoid interference among generations, dummy - * inodes are managed separately from actual inodes, and their lookup - * function (nilfs_gc_iget) is designed to be specified with a - * checkpoint number argument as well as an inode number. - * * Buffers and pages held by the dummy inodes will be released each * time after they are copied to a new log. Dirty blocks made on the * current generation and the blocks to be moved by GC never overlap @@ -48,6 +41,8 @@ #include <linux/slab.h> #include <linux/swap.h> #include "nilfs.h" +#include "btree.h" +#include "btnode.h" #include "page.h" #include "mdt.h" #include "dat.h" @@ -149,8 +144,10 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, int nilfs_gccache_submit_read_node(struct inode *inode, sector_t pbn, __u64 vbn, struct buffer_head **out_bh) { - int ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, - vbn ? : pbn, pbn, out_bh); + int ret; + + ret = nilfs_btnode_submit_block(&NILFS_I(inode)->i_btnode_cache, + vbn ? : pbn, pbn, READ, out_bh, &pbn); if (ret == -EEXIST) /* internal code (cache hit) */ ret = 0; return ret; @@ -164,127 +161,53 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) if (buffer_dirty(bh)) return -EEXIST; - if (buffer_nilfs_node(bh)) + if (buffer_nilfs_node(bh)) { + if (nilfs_btree_broken_node_block(bh)) { + clear_buffer_uptodate(bh); + return -EIO; + } nilfs_btnode_mark_dirty(bh); - else - nilfs_mdt_mark_buffer_dirty(bh); - return 0; -} - -/* - * nilfs_init_gccache() - allocate and initialize gc_inode hash table - * @nilfs - the_nilfs - * - * Return Value: On success, 0. - * On error, a negative error code is returned. - */ -int nilfs_init_gccache(struct the_nilfs *nilfs) -{ - int loop; - - BUG_ON(nilfs->ns_gc_inodes_h); - - INIT_LIST_HEAD(&nilfs->ns_gc_inodes); - - nilfs->ns_gc_inodes_h = - kmalloc(sizeof(struct hlist_head) * NILFS_GCINODE_HASH_SIZE, - GFP_NOFS); - if (nilfs->ns_gc_inodes_h == NULL) - return -ENOMEM; - - for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++) - INIT_HLIST_HEAD(&nilfs->ns_gc_inodes_h[loop]); - return 0; -} - -/* - * nilfs_destroy_gccache() - free gc_inode hash table - * @nilfs - the nilfs - */ -void nilfs_destroy_gccache(struct the_nilfs *nilfs) -{ - if (nilfs->ns_gc_inodes_h) { - nilfs_remove_all_gcinode(nilfs); - kfree(nilfs->ns_gc_inodes_h); - nilfs->ns_gc_inodes_h = NULL; + } else { + nilfs_mark_buffer_dirty(bh); } + return 0; } -static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino, - __u64 cno) +int nilfs_init_gcinode(struct inode *inode) { - struct inode *inode; - struct nilfs_inode_info *ii; - - inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS, 0); - if (!inode) - return NULL; + struct nilfs_inode_info *ii = NILFS_I(inode); + struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; - inode->i_op = NULL; - inode->i_fop = NULL; + inode->i_mode = S_IFREG; + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); inode->i_mapping->a_ops = &def_gcinode_aops; + inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; - ii = NILFS_I(inode); - ii->i_cno = cno; ii->i_flags = 0; - ii->i_state = 1 << NILFS_I_GCINODE; - ii->i_bh = NULL; nilfs_bmap_init_gc(ii->i_bmap); - return inode; -} - -static unsigned long ihash(ino_t ino, __u64 cno) -{ - return hash_long((unsigned long)((ino << 2) + cno), - NILFS_GCINODE_HASH_BITS); -} - -/* - * nilfs_gc_iget() - find or create gc inode with specified (ino,cno) - */ -struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno) -{ - struct hlist_head *head = nilfs->ns_gc_inodes_h + ihash(ino, cno); - struct hlist_node *node; - struct inode *inode; - - hlist_for_each_entry(inode, node, head, i_hash) { - if (inode->i_ino == ino && NILFS_I(inode)->i_cno == cno) - return inode; - } - - inode = alloc_gcinode(nilfs, ino, cno); - if (likely(inode)) { - hlist_add_head(&inode->i_hash, head); - list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes); - } - return inode; -} + /* + * Add the inode to GC inode list. Garbage Collection + * is serialized and no two processes manipulate the + * list simultaneously. + */ + igrab(inode); + list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes); -/* - * nilfs_clear_gcinode() - clear and free a gc inode - */ -void nilfs_clear_gcinode(struct inode *inode) -{ - nilfs_mdt_destroy(inode); + return 0; } -/* - * nilfs_remove_all_gcinode() - remove all inodes from the_nilfs +/** + * nilfs_remove_all_gcinodes() - remove all unprocessed gc inodes */ -void nilfs_remove_all_gcinode(struct the_nilfs *nilfs) +void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) { - struct hlist_head *head = nilfs->ns_gc_inodes_h; - struct hlist_node *node, *n; - struct inode *inode; - int loop; + struct list_head *head = &nilfs->ns_gc_inodes; + struct nilfs_inode_info *ii; - for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++, head++) { - hlist_for_each_entry_safe(inode, node, n, head, i_hash) { - hlist_del_init(&inode->i_hash); - list_del_init(&NILFS_I(inode)->i_dirty); - nilfs_clear_gcinode(inode); /* might sleep */ - } + while (!list_empty(head)) { + ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); + list_del_init(&ii->i_dirty); + iput(&ii->vfs_inode); } } diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 922d9dd..9f8a2da 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c @@ -161,25 +161,46 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, } /** - * nilfs_ifile_new - create inode file - * @sbi: nilfs_sb_info struct + * nilfs_ifile_read - read or get ifile inode + * @sb: super block instance + * @root: root object * @inode_size: size of an inode + * @raw_inode: on-disk ifile inode + * @inodep: buffer to store the inode */ -struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size) +int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, + size_t inode_size, struct nilfs_inode *raw_inode, + struct inode **inodep) { struct inode *ifile; int err; - ifile = nilfs_mdt_new(sbi->s_nilfs, sbi->s_super, NILFS_IFILE_INO, - sizeof(struct nilfs_ifile_info)); - if (ifile) { - err = nilfs_palloc_init_blockgroup(ifile, inode_size); - if (unlikely(err)) { - nilfs_mdt_destroy(ifile); - return NULL; - } - nilfs_palloc_setup_cache(ifile, - &NILFS_IFILE_I(ifile)->palloc_cache); - } - return ifile; + ifile = nilfs_iget_locked(sb, root, NILFS_IFILE_INO); + if (unlikely(!ifile)) + return -ENOMEM; + if (!(ifile->i_state & I_NEW)) + goto out; + + err = nilfs_mdt_init(ifile, NILFS_MDT_GFP, + sizeof(struct nilfs_ifile_info)); + if (err) + goto failed; + + err = nilfs_palloc_init_blockgroup(ifile, inode_size); + if (err) + goto failed; + + nilfs_palloc_setup_cache(ifile, &NILFS_IFILE_I(ifile)->palloc_cache); + + err = nilfs_read_inode_common(ifile, raw_inode); + if (err) + goto failed; + + unlock_new_inode(ifile); + out: + *inodep = ifile; + return 0; + failed: + iget_failed(ifile); + return err; } diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index cbca32e..59b6f2b 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h @@ -49,6 +49,8 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); int nilfs_ifile_delete_inode(struct inode *, ino_t); int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); -struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size); +int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, + size_t inode_size, struct nilfs_inode *raw_inode, + struct inode **inodep); #endif /* _NILFS_IFILE_H */ diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 39e038a..71d4bc8 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -27,12 +27,19 @@ #include <linux/writeback.h> #include <linux/uio.h> #include "nilfs.h" +#include "btnode.h" #include "segment.h" #include "page.h" #include "mdt.h" #include "cpfile.h" #include "ifile.h" +struct nilfs_iget_args { + u64 ino; + __u64 cno; + struct nilfs_root *root; + int for_gc; +}; /** * nilfs_get_block() - get a file block on the filesystem (callback function) @@ -197,11 +204,15 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, if (unlikely(err)) return err; - *pagep = NULL; - err = block_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, nilfs_get_block); - if (unlikely(err)) + err = block_write_begin(mapping, pos, len, flags, pagep, + nilfs_get_block); + if (unlikely(err)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + nilfs_transaction_abort(inode->i_sb); + } return err; } @@ -237,6 +248,19 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, /* Needs synchronization with the cleaner */ size = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, nilfs_get_block, NULL); + + /* + * In case of error extending write may have instantiated a few + * blocks outside i_size. Trim these off again. + */ + if (unlikely((rw & WRITE) && size < 0)) { + loff_t isize = i_size_read(inode); + loff_t end = offset + iov_length(iov, nr_segs); + + if (end > isize) + vmtruncate(inode, isize); + } + return size; } @@ -261,6 +285,7 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) struct nilfs_sb_info *sbi = NILFS_SB(sb); struct inode *inode; struct nilfs_inode_info *ii; + struct nilfs_root *root; int err = -ENOMEM; ino_t ino; @@ -271,15 +296,17 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) mapping_set_gfp_mask(inode->i_mapping, mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); + root = NILFS_I(dir)->i_root; ii = NILFS_I(inode); ii->i_state = 1 << NILFS_I_NEW; + ii->i_root = root; - err = nilfs_ifile_create_inode(sbi->s_ifile, &ino, &ii->i_bh); + err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); if (unlikely(err)) goto failed_ifile_create_inode; /* reference count of i_bh inherits from nilfs_mdt_read_block() */ - atomic_inc(&sbi->s_inodes_count); + atomic_inc(&root->inodes_count); inode_init_owner(inode, dir, mode); inode->i_ino = ino; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; @@ -302,7 +329,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) /* ii->i_file_acl = 0; */ /* ii->i_dir_acl = 0; */ ii->i_dir_start_lookup = 0; - ii->i_cno = 0; nilfs_set_inode_flags(inode); spin_lock(&sbi->s_next_gen_lock); inode->i_generation = sbi->s_next_generation++; @@ -332,17 +358,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) return ERR_PTR(err); } -void nilfs_free_inode(struct inode *inode) -{ - struct super_block *sb = inode->i_sb; - struct nilfs_sb_info *sbi = NILFS_SB(sb); - - clear_inode(inode); - /* XXX: check error code? Is there any thing I can do? */ - (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino); - atomic_dec(&sbi->s_inodes_count); -} - void nilfs_set_inode_flags(struct inode *inode) { unsigned int flags = NILFS_I(inode)->i_flags; @@ -393,7 +408,6 @@ int nilfs_read_inode_common(struct inode *inode, 0 : le32_to_cpu(raw_inode->i_dir_acl); #endif ii->i_dir_start_lookup = 0; - ii->i_cno = 0; inode->i_generation = le32_to_cpu(raw_inode->i_generation); if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || @@ -407,7 +421,8 @@ int nilfs_read_inode_common(struct inode *inode, return 0; } -static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, +static int __nilfs_read_inode(struct super_block *sb, + struct nilfs_root *root, unsigned long ino, struct inode *inode) { struct nilfs_sb_info *sbi = NILFS_SB(sb); @@ -417,11 +432,11 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, int err; down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ - err = nilfs_ifile_get_inode_block(sbi->s_ifile, ino, &bh); + err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); if (unlikely(err)) goto bad_inode; - raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); + raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); err = nilfs_read_inode_common(inode, raw_inode); if (err) @@ -444,14 +459,14 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, inode, inode->i_mode, huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); } - nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); + nilfs_ifile_unmap_inode(root->ifile, ino, bh); brelse(bh); up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ nilfs_set_inode_flags(inode); return 0; failed_unmap: - nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); + nilfs_ifile_unmap_inode(root->ifile, ino, bh); brelse(bh); bad_inode: @@ -459,18 +474,95 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, return err; } -struct inode *nilfs_iget(struct super_block *sb, unsigned long ino) +static int nilfs_iget_test(struct inode *inode, void *opaque) +{ + struct nilfs_iget_args *args = opaque; + struct nilfs_inode_info *ii; + + if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) + return 0; + + ii = NILFS_I(inode); + if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) + return !args->for_gc; + + return args->for_gc && args->cno == ii->i_cno; +} + +static int nilfs_iget_set(struct inode *inode, void *opaque) +{ + struct nilfs_iget_args *args = opaque; + + inode->i_ino = args->ino; + if (args->for_gc) { + NILFS_I(inode)->i_state = 1 << NILFS_I_GCINODE; + NILFS_I(inode)->i_cno = args->cno; + NILFS_I(inode)->i_root = NULL; + } else { + if (args->root && args->ino == NILFS_ROOT_INO) + nilfs_get_root(args->root); + NILFS_I(inode)->i_root = args->root; + } + return 0; +} + +struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, + unsigned long ino) +{ + struct nilfs_iget_args args = { + .ino = ino, .root = root, .cno = 0, .for_gc = 0 + }; + + return ilookup5(sb, ino, nilfs_iget_test, &args); +} + +struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, + unsigned long ino) +{ + struct nilfs_iget_args args = { + .ino = ino, .root = root, .cno = 0, .for_gc = 0 + }; + + return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); +} + +struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, + unsigned long ino) +{ + struct inode *inode; + int err; + + inode = nilfs_iget_locked(sb, root, ino); + if (unlikely(!inode)) + return ERR_PTR(-ENOMEM); + if (!(inode->i_state & I_NEW)) + return inode; + + err = __nilfs_read_inode(sb, root, ino, inode); + if (unlikely(err)) { + iget_failed(inode); + return ERR_PTR(err); + } + unlock_new_inode(inode); + return inode; +} + +struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, + __u64 cno) { + struct nilfs_iget_args args = { + .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 + }; struct inode *inode; int err; - inode = iget_locked(sb, ino); + inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); if (unlikely(!inode)) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; - err = __nilfs_read_inode(sb, ino, inode); + err = nilfs_init_gcinode(inode); if (unlikely(err)) { iget_failed(inode); return ERR_PTR(err); @@ -511,21 +603,20 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) { ino_t ino = inode->i_ino; struct nilfs_inode_info *ii = NILFS_I(inode); - struct super_block *sb = inode->i_sb; - struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct inode *ifile = ii->i_root->ifile; struct nilfs_inode *raw_inode; - raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh); + raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) - memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size); + memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); nilfs_write_inode_common(inode, raw_inode, 0); /* XXX: call with has_bmap = 0 is a workaround to avoid deadlock of bmap. This delays update of i_bmap to just before writing */ - nilfs_ifile_unmap_inode(sbi->s_ifile, ino, ibh); + nilfs_ifile_unmap_inode(ifile, ino, ibh); } #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ @@ -597,16 +688,41 @@ void nilfs_truncate(struct inode *inode) But truncate has no return value. */ } -void nilfs_delete_inode(struct inode *inode) +static void nilfs_clear_inode(struct inode *inode) +{ + struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + + /* + * Free resources allocated in nilfs_read_inode(), here. + */ + BUG_ON(!list_empty(&ii->i_dirty)); + brelse(ii->i_bh); + ii->i_bh = NULL; + + if (mdi && mdi->mi_palloc_cache) + nilfs_palloc_destroy_cache(inode); + + if (test_bit(NILFS_I_BMAP, &ii->i_state)) + nilfs_bmap_clear(ii->i_bmap); + + nilfs_btnode_cache_clear(&ii->i_btnode_cache); + + if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) + nilfs_put_root(ii->i_root); +} + +void nilfs_evict_inode(struct inode *inode) { struct nilfs_transaction_info ti; struct super_block *sb = inode->i_sb; struct nilfs_inode_info *ii = NILFS_I(inode); - if (unlikely(is_bad_inode(inode))) { + if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); - clear_inode(inode); + end_writeback(inode); + nilfs_clear_inode(inode); return; } nilfs_transaction_begin(sb, &ti, 0); /* never fails */ @@ -614,10 +730,16 @@ void nilfs_delete_inode(struct inode *inode) if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); + /* TODO: some of the following operations may fail. */ nilfs_truncate_bmap(ii, 0); nilfs_mark_inode_dirty(inode); - nilfs_free_inode(inode); - /* nilfs_free_inode() marks inode buffer dirty */ + end_writeback(inode); + + nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); + atomic_dec(&ii->i_root->inodes_count); + + nilfs_clear_inode(inode); + if (IS_SYNC(inode)) nilfs_set_transaction_flag(NILFS_TI_SYNC); nilfs_transaction_commit(sb); @@ -639,17 +761,41 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr) err = nilfs_transaction_begin(sb, &ti, 0); if (unlikely(err)) return err; - err = inode_setattr(inode, iattr); - if (!err && (iattr->ia_valid & ATTR_MODE)) + + if ((iattr->ia_valid & ATTR_SIZE) && + iattr->ia_size != i_size_read(inode)) { + err = vmtruncate(inode, iattr->ia_size); + if (unlikely(err)) + goto out_err; + } + + setattr_copy(inode, iattr); + mark_inode_dirty(inode); + + if (iattr->ia_valid & ATTR_MODE) { err = nilfs_acl_chmod(inode); - if (likely(!err)) - err = nilfs_transaction_commit(sb); - else - nilfs_transaction_abort(sb); + if (unlikely(err)) + goto out_err; + } + + return nilfs_transaction_commit(sb); +out_err: + nilfs_transaction_abort(sb); return err; } +int nilfs_permission(struct inode *inode, int mask) +{ + struct nilfs_root *root = NILFS_I(inode)->i_root; + + if ((mask & MAY_WRITE) && root && + root->cno != NILFS_CPTREE_CURRENT_CNO) + return -EROFS; /* snapshot is not writable */ + + return generic_permission(inode, mask, NULL); +} + int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, struct buffer_head **pbh) { @@ -659,8 +805,8 @@ int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, spin_lock(&sbi->s_inode_lock); if (ii->i_bh == NULL) { spin_unlock(&sbi->s_inode_lock); - err = nilfs_ifile_get_inode_block(sbi->s_ifile, inode->i_ino, - pbh); + err = nilfs_ifile_get_inode_block(ii->i_root->ifile, + inode->i_ino, pbh); if (unlikely(err)) return err; spin_lock(&sbi->s_inode_lock); @@ -740,7 +886,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) } nilfs_update_inode(inode, ibh); nilfs_mdt_mark_buffer_dirty(ibh); - nilfs_mdt_mark_dirty(sbi->s_ifile); + nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); brelse(ibh); return 0; } @@ -758,6 +904,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) void nilfs_dirty_inode(struct inode *inode) { struct nilfs_transaction_info ti; + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); if (is_bad_inode(inode)) { nilfs_warning(inode->i_sb, __func__, @@ -765,6 +912,10 @@ void nilfs_dirty_inode(struct inode *inode) dump_stack(); return; } + if (mdi) { + nilfs_mdt_mark_dirty(inode); + return; + } nilfs_transaction_begin(inode->i_sb, &ti, 0); nilfs_mark_inode_dirty(inode); nilfs_transaction_commit(inode->i_sb); /* never fails */ diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index f90a33d..3e90f86 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -22,7 +22,6 @@ #include <linux/fs.h> #include <linux/wait.h> -#include <linux/smp_lock.h> /* lock_kernel(), unlock_kernel() */ #include <linux/slab.h> #include <linux/capability.h> /* capable() */ #include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ @@ -118,7 +117,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, if (copy_from_user(&cpmode, argp, sizeof(cpmode))) goto out; - mutex_lock(&nilfs->ns_mount_mutex); + down_read(&inode->i_sb->s_umount); nilfs_transaction_begin(inode->i_sb, &ti, 0); ret = nilfs_cpfile_change_cpmode( @@ -128,7 +127,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, else nilfs_transaction_commit(inode->i_sb); /* never fails */ - mutex_unlock(&nilfs->ns_mount_mutex); + up_read(&inode->i_sb->s_umount); out: mnt_drop_write(filp->f_path.mnt); return ret; @@ -334,7 +333,7 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, return 0; } -static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, +static int nilfs_ioctl_move_blocks(struct super_block *sb, struct nilfs_argv *argv, void *buf) { size_t nmembs = argv->v_nmembs; @@ -349,7 +348,7 @@ static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, for (i = 0, vdesc = buf; i < nmembs; ) { ino = vdesc->vd_ino; cno = vdesc->vd_cno; - inode = nilfs_gc_iget(nilfs, ino, cno); + inode = nilfs_iget_for_gc(sb, ino, cno); if (unlikely(inode == NULL)) { ret = -ENOMEM; goto failed; @@ -357,11 +356,15 @@ static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, do { ret = nilfs_ioctl_move_inode_block(inode, vdesc, &buffers); - if (unlikely(ret < 0)) + if (unlikely(ret < 0)) { + iput(inode); goto failed; + } vdesc++; } while (++i < nmembs && vdesc->vd_ino == ino && vdesc->vd_cno == cno); + + iput(inode); /* The inode still remains in GC inode list */ } list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) { @@ -567,7 +570,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, } /* - * nilfs_ioctl_move_blocks() will call nilfs_gc_iget(), + * nilfs_ioctl_move_blocks() will call nilfs_iget_for_gc(), * which will operates an inode list without blocking. * To protect the list from concurrent operations, * nilfs_ioctl_move_blocks should be atomic operation. @@ -577,15 +580,16 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, goto out_free; } - ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]); + vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); + + ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); if (ret < 0) printk(KERN_ERR "NILFS: GC failed during preparation: " "cannot read source blocks: err=%d\n", ret); else ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); - if (ret < 0) - nilfs_remove_all_gcinode(nilfs); + nilfs_remove_all_gcinodes(nilfs); clear_nilfs_gc_running(nilfs); out_free: diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index 024be8c..39a5b84 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -28,6 +28,7 @@ #include <linux/swap.h> #include <linux/slab.h> #include "nilfs.h" +#include "btnode.h" #include "segment.h" #include "page.h" #include "mdt.h" @@ -35,7 +36,6 @@ #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) -#define INIT_UNUSED_INODE_FIELDS static int nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, @@ -77,25 +77,11 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, struct buffer_head *, void *)) { - struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; struct super_block *sb = inode->i_sb; struct nilfs_transaction_info ti; struct buffer_head *bh; int err; - if (!sb) { - /* - * Make sure this function is not called from any - * read-only context. - */ - if (!nilfs->ns_writer) { - WARN_ON(1); - err = -EROFS; - goto out; - } - sb = nilfs->ns_writer->s_super; - } - nilfs_transaction_begin(sb, &ti, 0); err = -ENOMEM; @@ -111,7 +97,7 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, if (buffer_uptodate(bh)) goto failed_bh; - bh->b_bdev = nilfs->ns_bdev; + bh->b_bdev = sb->s_bdev; err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); if (likely(!err)) { get_bh(bh); @@ -128,7 +114,7 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, err = nilfs_transaction_commit(sb); else nilfs_transaction_abort(sb); - out: + return err; } @@ -166,9 +152,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, unlock_buffer(bh); goto failed_bh; } - bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev; - bh->b_blocknr = (sector_t)blknum; - set_buffer_mapped(bh); + map_bh(bh, inode->i_sb, (sector_t)blknum); bh->b_end_io = end_buffer_read_sync; get_bh(bh); @@ -397,35 +381,24 @@ int nilfs_mdt_fetch_dirty(struct inode *inode) static int nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) { - struct inode *inode = container_of(page->mapping, - struct inode, i_data); - struct super_block *sb = inode->i_sb; - struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; - struct nilfs_sb_info *writer = NULL; + struct inode *inode; + struct super_block *sb; int err = 0; redirty_page_for_writepage(wbc, page); unlock_page(page); - if (page->mapping->assoc_mapping) - return 0; /* Do not request flush for shadow page cache */ - if (!sb) { - down_read(&nilfs->ns_writer_sem); - writer = nilfs->ns_writer; - if (!writer) { - up_read(&nilfs->ns_writer_sem); - return -EROFS; - } - sb = writer->s_super; - } + inode = page->mapping->host; + if (!inode) + return 0; + + sb = inode->i_sb; if (wbc->sync_mode == WB_SYNC_ALL) err = nilfs_construct_segment(sb); else if (wbc->for_reclaim) nilfs_flush_segment(sb, inode->i_ino); - if (writer) - up_read(&nilfs->ns_writer_sem); return err; } @@ -438,105 +411,27 @@ static const struct address_space_operations def_mdt_aops = { static const struct inode_operations def_mdt_iops; static const struct file_operations def_mdt_fops; -/* - * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile, - * ifile, or gcinodes. This allows the B-tree code and segment constructor - * to treat them like regular files, and this helps to simplify the - * implementation. - * On the other hand, some of the pseudo inodes have an irregular point: - * They don't have valid inode->i_sb pointer because their lifetimes are - * longer than those of the super block structs; they may continue for - * several consecutive mounts/umounts. This would need discussions. - */ -/** - * nilfs_mdt_new_common - allocate a pseudo inode for metadata file - * @nilfs: nilfs object - * @sb: super block instance the metadata file belongs to - * @ino: inode number - * @gfp_mask: gfp mask for data pages - * @objsz: size of the private object attached to inode->i_private - */ -struct inode * -nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, - ino_t ino, gfp_t gfp_mask, size_t objsz) + +int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) { - struct inode *inode = nilfs_alloc_inode_common(nilfs); + struct nilfs_mdt_info *mi; - if (!inode) - return NULL; - else { - struct address_space * const mapping = &inode->i_data; - struct nilfs_mdt_info *mi; - - mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS); - if (!mi) { - nilfs_destroy_inode(inode); - return NULL; - } - mi->mi_nilfs = nilfs; - init_rwsem(&mi->mi_sem); - - inode->i_sb = sb; /* sb may be NULL for some meta data files */ - inode->i_blkbits = nilfs->ns_blocksize_bits; - inode->i_flags = 0; - atomic_set(&inode->i_count, 1); - inode->i_nlink = 1; - inode->i_ino = ino; - inode->i_mode = S_IFREG; - inode->i_private = mi; - -#ifdef INIT_UNUSED_INODE_FIELDS - atomic_set(&inode->i_writecount, 0); - inode->i_size = 0; - inode->i_blocks = 0; - inode->i_bytes = 0; - inode->i_generation = 0; -#ifdef CONFIG_QUOTA - memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); -#endif - inode->i_pipe = NULL; - inode->i_bdev = NULL; - inode->i_cdev = NULL; - inode->i_rdev = 0; -#ifdef CONFIG_SECURITY - inode->i_security = NULL; -#endif - inode->dirtied_when = 0; - - INIT_LIST_HEAD(&inode->i_list); - INIT_LIST_HEAD(&inode->i_sb_list); - inode->i_state = 0; -#endif - - spin_lock_init(&inode->i_lock); - mutex_init(&inode->i_mutex); - init_rwsem(&inode->i_alloc_sem); - - mapping->host = NULL; /* instead of inode */ - mapping->flags = 0; - mapping_set_gfp_mask(mapping, gfp_mask); - mapping->assoc_mapping = NULL; - mapping->backing_dev_info = nilfs->ns_bdi; - - inode->i_mapping = mapping; - } + mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS); + if (!mi) + return -ENOMEM; - return inode; -} + init_rwsem(&mi->mi_sem); + inode->i_private = mi; -struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, - ino_t ino, size_t objsz) -{ - struct inode *inode; - - inode = nilfs_mdt_new_common(nilfs, sb, ino, NILFS_MDT_GFP, objsz); - if (!inode) - return NULL; + inode->i_mode = S_IFREG; + mapping_set_gfp_mask(inode->i_mapping, gfp_mask); + inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; inode->i_op = &def_mdt_iops; inode->i_fop = &def_mdt_fops; inode->i_mapping->a_ops = &def_mdt_aops; - return inode; + + return 0; } void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, @@ -549,34 +444,159 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); } -void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow) +static const struct address_space_operations shadow_map_aops = { + .sync_page = block_sync_page, +}; + +/** + * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file + * @inode: inode of the metadata file + * @shadow: shadow mapping + */ +int nilfs_mdt_setup_shadow_map(struct inode *inode, + struct nilfs_shadow_map *shadow) { - shadow->i_mapping->assoc_mapping = orig->i_mapping; - NILFS_I(shadow)->i_btnode_cache.assoc_mapping = - &NILFS_I(orig)->i_btnode_cache; + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct backing_dev_info *bdi = inode->i_sb->s_bdi; + + INIT_LIST_HEAD(&shadow->frozen_buffers); + nilfs_mapping_init_once(&shadow->frozen_data); + nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); + nilfs_mapping_init_once(&shadow->frozen_btnodes); + nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); + mi->mi_shadow = shadow; + return 0; } -static void nilfs_mdt_clear(struct inode *inode) +/** + * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map + * @inode: inode of the metadata file + */ +int nilfs_mdt_save_to_shadow_map(struct inode *inode) { + struct nilfs_mdt_info *mi = NILFS_MDT(inode); struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; + int ret; - invalidate_mapping_pages(inode->i_mapping, 0, -1); - truncate_inode_pages(inode->i_mapping, 0); + ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping); + if (ret) + goto out; + + ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, + &ii->i_btnode_cache); + if (ret) + goto out; - if (test_bit(NILFS_I_BMAP, &ii->i_state)) - nilfs_bmap_clear(ii->i_bmap); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + nilfs_bmap_save(ii->i_bmap, &shadow->bmap_store); + out: + return ret; } -void nilfs_mdt_destroy(struct inode *inode) +int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) { - struct nilfs_mdt_info *mdi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; + struct buffer_head *bh_frozen; + struct page *page; + int blkbits = inode->i_blkbits; + int ret = -ENOMEM; + + page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); + if (!page) + return ret; + + if (!page_has_buffers(page)) + create_empty_buffers(page, 1 << blkbits, 0); + + bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits); + if (bh_frozen) { + if (!buffer_uptodate(bh_frozen)) + nilfs_copy_buffer(bh_frozen, bh); + if (list_empty(&bh_frozen->b_assoc_buffers)) { + list_add_tail(&bh_frozen->b_assoc_buffers, + &shadow->frozen_buffers); + set_buffer_nilfs_redirected(bh); + } else { + brelse(bh_frozen); /* already frozen */ + } + ret = 0; + } + unlock_page(page); + page_cache_release(page); + return ret; +} + +struct buffer_head * +nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) +{ + struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; + struct buffer_head *bh_frozen = NULL; + struct page *page; + int n; + + page = find_lock_page(&shadow->frozen_data, bh->b_page->index); + if (page) { + if (page_has_buffers(page)) { + n = bh_offset(bh) >> inode->i_blkbits; + bh_frozen = nilfs_page_get_nth_block(page, n); + } + unlock_page(page); + page_cache_release(page); + } + return bh_frozen; +} + +static void nilfs_release_frozen_buffers(struct nilfs_shadow_map *shadow) +{ + struct list_head *head = &shadow->frozen_buffers; + struct buffer_head *bh; + + while (!list_empty(head)) { + bh = list_first_entry(head, struct buffer_head, + b_assoc_buffers); + list_del_init(&bh->b_assoc_buffers); + brelse(bh); /* drop ref-count to make it releasable */ + } +} + +/** + * nilfs_mdt_restore_from_shadow_map - restore dirty pages and bmap state + * @inode: inode of the metadata file + */ +void nilfs_mdt_restore_from_shadow_map(struct inode *inode) +{ + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct nilfs_inode_info *ii = NILFS_I(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; + + down_write(&mi->mi_sem); - if (mdi->mi_palloc_cache) - nilfs_palloc_destroy_cache(inode); - nilfs_mdt_clear(inode); + if (mi->mi_palloc_cache) + nilfs_palloc_clear_cache(inode); + + nilfs_clear_dirty_pages(inode->i_mapping); + nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); + + nilfs_clear_dirty_pages(&ii->i_btnode_cache); + nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); + + nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); + + up_write(&mi->mi_sem); +} + +/** + * nilfs_mdt_clear_shadow_map - truncate pages in shadow map caches + * @inode: inode of the metadata file + */ +void nilfs_mdt_clear_shadow_map(struct inode *inode) +{ + struct nilfs_mdt_info *mi = NILFS_MDT(inode); + struct nilfs_shadow_map *shadow = mi->mi_shadow; - kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ - kfree(mdi); - nilfs_destroy_inode(inode); + down_write(&mi->mi_sem); + nilfs_release_frozen_buffers(shadow); + truncate_inode_pages(&shadow->frozen_data, 0); + truncate_inode_pages(&shadow->frozen_btnodes, 0); + up_write(&mi->mi_sem); } diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index 6c4bbb0..b13734b 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h @@ -28,26 +28,33 @@ #include "nilfs.h" #include "page.h" +struct nilfs_shadow_map { + struct nilfs_bmap_store bmap_store; + struct address_space frozen_data; + struct address_space frozen_btnodes; + struct list_head frozen_buffers; +}; + /** * struct nilfs_mdt_info - on-memory private data of meta data files - * @mi_nilfs: back pointer to the_nilfs struct * @mi_sem: reader/writer semaphore for meta data operations * @mi_bgl: per-blockgroup locking * @mi_entry_size: size of an entry * @mi_first_entry_offset: offset to the first entry * @mi_entries_per_block: number of entries in a block * @mi_palloc_cache: persistent object allocator cache + * @mi_shadow: shadow of bmap and page caches * @mi_blocks_per_group: number of blocks in a group * @mi_blocks_per_desc_block: number of blocks per descriptor block */ struct nilfs_mdt_info { - struct the_nilfs *mi_nilfs; struct rw_semaphore mi_sem; struct blockgroup_lock *mi_bgl; unsigned mi_entry_size; unsigned mi_first_entry_offset; unsigned long mi_entries_per_block; struct nilfs_palloc_cache *mi_palloc_cache; + struct nilfs_shadow_map *mi_shadow; unsigned long mi_blocks_per_group; unsigned long mi_blocks_per_desc_block; }; @@ -59,9 +66,7 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode) { - struct super_block *sb = inode->i_sb; - - return sb ? NILFS_SB(sb)->s_nilfs : NILFS_MDT(inode)->mi_nilfs; + return NILFS_SB(inode->i_sb)->s_nilfs; } /* Default GFP flags using highmem */ @@ -76,14 +81,17 @@ int nilfs_mdt_forget_block(struct inode *, unsigned long); int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); int nilfs_mdt_fetch_dirty(struct inode *); -struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t, - size_t); -struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, - ino_t, gfp_t, size_t); -void nilfs_mdt_destroy(struct inode *); +int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz); void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); -void nilfs_mdt_set_shadow(struct inode *, struct inode *); +int nilfs_mdt_setup_shadow_map(struct inode *inode, + struct nilfs_shadow_map *shadow); +int nilfs_mdt_save_to_shadow_map(struct inode *inode); +void nilfs_mdt_restore_from_shadow_map(struct inode *inode); +void nilfs_mdt_clear_shadow_map(struct inode *inode); +int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh); +struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode, + struct buffer_head *bh); #define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh) @@ -100,7 +108,7 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode) static inline __u64 nilfs_mdt_cno(struct inode *inode) { - return NILFS_MDT(inode)->mi_nilfs->ns_cno; + return NILFS_I_NILFS(inode)->ns_cno; } #define nilfs_mdt_bgl_lock(inode, bg) \ diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index ad6ed2c..6e9557e 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -40,7 +40,11 @@ #include <linux/pagemap.h> #include "nilfs.h" +#include "export.h" +#define NILFS_FID_SIZE_NON_CONNECTABLE \ + (offsetof(struct nilfs_fid, parent_gen) / 4) +#define NILFS_FID_SIZE_CONNECTABLE (sizeof(struct nilfs_fid) / 4) static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) { @@ -70,29 +74,13 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) ino = nilfs_inode_by_name(dir, &dentry->d_name); inode = NULL; if (ino) { - inode = nilfs_iget(dir->i_sb, ino); + inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); if (IS_ERR(inode)) return ERR_CAST(inode); } return d_splice_alias(inode, dentry); } -struct dentry *nilfs_get_parent(struct dentry *child) -{ - unsigned long ino; - struct inode *inode; - struct qstr dotdot = {.name = "..", .len = 2}; - - ino = nilfs_inode_by_name(child->d_inode, &dotdot); - if (!ino) - return ERR_PTR(-ENOENT); - - inode = nilfs_iget(child->d_inode->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - return d_obtain_alias(inode); -} - /* * By the time this is called, we already have created * the directory cache entry for the new file, but it @@ -219,7 +207,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, inode->i_ctime = CURRENT_TIME; inode_inc_link_count(inode); - atomic_inc(&inode->i_count); + ihold(inode); err = nilfs_add_nondir(dentry, inode); if (!err) @@ -468,6 +456,115 @@ out: return err; } +/* + * Export operations + */ +static struct dentry *nilfs_get_parent(struct dentry *child) +{ + unsigned long ino; + struct inode *inode; + struct qstr dotdot = {.name = "..", .len = 2}; + struct nilfs_root *root; + + ino = nilfs_inode_by_name(child->d_inode, &dotdot); + if (!ino) + return ERR_PTR(-ENOENT); + + root = NILFS_I(child->d_inode)->i_root; + + inode = nilfs_iget(child->d_inode->i_sb, root, ino); + if (IS_ERR(inode)) + return ERR_CAST(inode); + + return d_obtain_alias(inode); +} + +static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno, + u64 ino, u32 gen) +{ + struct nilfs_root *root; + struct inode *inode; + + if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO) + return ERR_PTR(-ESTALE); + + root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); + if (!root) + return ERR_PTR(-ESTALE); + + inode = nilfs_iget(sb, root, ino); + nilfs_put_root(root); + + if (IS_ERR(inode)) + return ERR_CAST(inode); + if (gen && inode->i_generation != gen) { + iput(inode); + return ERR_PTR(-ESTALE); + } + return d_obtain_alias(inode); +} + +static struct dentry *nilfs_fh_to_dentry(struct super_block *sb, struct fid *fh, + int fh_len, int fh_type) +{ + struct nilfs_fid *fid = (struct nilfs_fid *)fh; + + if ((fh_len != NILFS_FID_SIZE_NON_CONNECTABLE && + fh_len != NILFS_FID_SIZE_CONNECTABLE) || + (fh_type != FILEID_NILFS_WITH_PARENT && + fh_type != FILEID_NILFS_WITHOUT_PARENT)) + return NULL; + + return nilfs_get_dentry(sb, fid->cno, fid->ino, fid->gen); +} + +static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh, + int fh_len, int fh_type) +{ + struct nilfs_fid *fid = (struct nilfs_fid *)fh; + + if (fh_len != NILFS_FID_SIZE_CONNECTABLE || + fh_type != FILEID_NILFS_WITH_PARENT) + return NULL; + + return nilfs_get_dentry(sb, fid->cno, fid->parent_ino, fid->parent_gen); +} + +static int nilfs_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp, + int connectable) +{ + struct nilfs_fid *fid = (struct nilfs_fid *)fh; + struct inode *inode = dentry->d_inode; + struct nilfs_root *root = NILFS_I(inode)->i_root; + int type; + + if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE || + (connectable && *lenp < NILFS_FID_SIZE_CONNECTABLE)) + return 255; + + fid->cno = root->cno; + fid->ino = inode->i_ino; + fid->gen = inode->i_generation; + + if (connectable && !S_ISDIR(inode->i_mode)) { + struct inode *parent; + + spin_lock(&dentry->d_lock); + parent = dentry->d_parent->d_inode; + fid->parent_ino = parent->i_ino; + fid->parent_gen = parent->i_generation; + spin_unlock(&dentry->d_lock); + + type = FILEID_NILFS_WITH_PARENT; + *lenp = NILFS_FID_SIZE_CONNECTABLE; + } else { + type = FILEID_NILFS_WITHOUT_PARENT; + *lenp = NILFS_FID_SIZE_NON_CONNECTABLE; + } + + return type; +} + const struct inode_operations nilfs_dir_inode_operations = { .create = nilfs_create, .lookup = nilfs_lookup, @@ -491,4 +588,12 @@ const struct inode_operations nilfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, + .permission = nilfs_permission, +}; + +const struct export_operations nilfs_export_ops = { + .encode_fh = nilfs_encode_fh, + .fh_to_dentry = nilfs_fh_to_dentry, + .fh_to_parent = nilfs_fh_to_parent, + .get_parent = nilfs_get_parent, }; diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index 47d6d79..f7560da 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h @@ -32,7 +32,6 @@ #include "the_nilfs.h" #include "sb.h" #include "bmap.h" -#include "bmap_union.h" /* * nilfs inode data in memory @@ -41,7 +40,7 @@ struct nilfs_inode_info { __u32 i_flags; unsigned long i_state; /* Dynamic state flags */ struct nilfs_bmap *i_bmap; - union nilfs_bmap_union i_bmap_union; + struct nilfs_bmap i_bmap_data; __u64 i_xattr; /* sector_t ??? */ __u32 i_dir_start_lookup; __u64 i_cno; /* check point number for GC inode */ @@ -60,6 +59,7 @@ struct nilfs_inode_info { #endif struct buffer_head *i_bh; /* i_bh contains a new or dirty disk inode */ + struct nilfs_root *i_root; struct inode vfs_inode; }; @@ -71,9 +71,7 @@ static inline struct nilfs_inode_info *NILFS_I(const struct inode *inode) static inline struct nilfs_inode_info * NILFS_BMAP_I(const struct nilfs_bmap *bmap) { - return container_of((union nilfs_bmap_union *)bmap, - struct nilfs_inode_info, - i_bmap_union); + return container_of(bmap, struct nilfs_inode_info, i_bmap_data); } static inline struct inode *NILFS_BTNC_I(struct address_space *btnc) @@ -103,7 +101,14 @@ enum { NILFS_I_INODE_DIRTY, /* write_inode is requested */ NILFS_I_BMAP, /* has bmap and btnode_cache */ NILFS_I_GCINODE, /* inode for GC, on memory only */ - NILFS_I_GCDAT, /* shadow DAT, on memory only */ +}; + +/* + * commit flags for nilfs_commit_super and nilfs_sync_super + */ +enum { + NILFS_SB_COMMIT = 0, /* Commit a super block alternately */ + NILFS_SB_COMMIT_ALL /* Commit both super blocks */ }; /* @@ -187,7 +192,7 @@ static inline int nilfs_doing_construction(void) static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs) { - return nilfs_doing_gc() ? nilfs->ns_gc_dat : nilfs->ns_dat; + return nilfs->ns_dat; } /* @@ -195,12 +200,9 @@ static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs) */ #ifdef CONFIG_NILFS_POSIX_ACL #error "NILFS: not yet supported POSIX ACL" -extern int nilfs_permission(struct inode *, int, struct nameidata *); extern int nilfs_acl_chmod(struct inode *); extern int nilfs_init_acl(struct inode *, struct inode *); #else -#define nilfs_permission NULL - static inline int nilfs_acl_chmod(struct inode *inode) { return 0; @@ -242,11 +244,19 @@ extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); extern void nilfs_set_inode_flags(struct inode *); extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *); extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int); -extern struct inode *nilfs_iget(struct super_block *, unsigned long); +struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, + unsigned long ino); +struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, + unsigned long ino); +struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, + unsigned long ino); +extern struct inode *nilfs_iget_for_gc(struct super_block *sb, + unsigned long ino, __u64 cno); extern void nilfs_update_inode(struct inode *, struct buffer_head *); extern void nilfs_truncate(struct inode *); -extern void nilfs_delete_inode(struct inode *); +extern void nilfs_evict_inode(struct inode *); extern int nilfs_setattr(struct dentry *, struct iattr *); +int nilfs_permission(struct inode *inode, int mask); extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, struct buffer_head **); extern int nilfs_inode_dirty(struct inode *); @@ -255,11 +265,7 @@ extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *, extern int nilfs_mark_inode_dirty(struct inode *); extern void nilfs_dirty_inode(struct inode *); -/* namei.c */ -extern struct dentry *nilfs_get_parent(struct dentry *); - /* super.c */ -extern struct inode *nilfs_alloc_inode_common(struct the_nilfs *); extern struct inode *nilfs_alloc_inode(struct super_block *); extern void nilfs_destroy_inode(struct inode *); extern void nilfs_error(struct super_block *, const char *, const char *, ...) @@ -270,9 +276,17 @@ extern struct nilfs_super_block * nilfs_read_super_block(struct super_block *, u64, int, struct buffer_head **); extern int nilfs_store_magic_and_option(struct super_block *, struct nilfs_super_block *, char *); +extern int nilfs_check_feature_compatibility(struct super_block *, + struct nilfs_super_block *); +extern void nilfs_set_log_cursor(struct nilfs_super_block *, + struct the_nilfs *); +extern struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *, + int flip); extern int nilfs_commit_super(struct nilfs_sb_info *, int); -extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64); -extern void nilfs_detach_checkpoint(struct nilfs_sb_info *); +extern int nilfs_cleanup_super(struct nilfs_sb_info *); +int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, + struct nilfs_root **root); +int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); /* gcinode.c */ int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64, @@ -280,16 +294,8 @@ int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64, int nilfs_gccache_submit_read_node(struct inode *, sector_t, __u64, struct buffer_head **); int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *); -int nilfs_init_gccache(struct the_nilfs *); -void nilfs_destroy_gccache(struct the_nilfs *); -void nilfs_clear_gcinode(struct inode *); -struct inode *nilfs_gc_iget(struct the_nilfs *, ino_t, __u64); -void nilfs_remove_all_gcinode(struct the_nilfs *); - -/* gcdat.c */ -int nilfs_init_gcdat_inode(struct the_nilfs *); -void nilfs_commit_gcdat_inode(struct the_nilfs *); -void nilfs_clear_gcdat_inode(struct the_nilfs *); +int nilfs_init_gcinode(struct inode *inode); +void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs); /* * Inodes and files operations diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 8de3e1e..a6c3c2e8 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -37,7 +37,8 @@ #define NILFS_BUFFER_INHERENT_BITS \ ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ - (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated)) + (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \ + (1UL << BH_NILFS_Checked)) static struct buffer_head * __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, @@ -78,8 +79,8 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, { int blkbits = inode->i_blkbits; pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); - struct page *page, *opage; - struct buffer_head *bh, *obh; + struct page *page; + struct buffer_head *bh; page = grab_cache_page(mapping, index); if (unlikely(!page)) @@ -91,30 +92,6 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, page_cache_release(page); return NULL; } - if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { - /* - * Shadow page cache uses assoc_mapping to point its original - * page cache. The following code tries the original cache - * if the given cache is a shadow and it didn't hit. - */ - opage = find_lock_page(mapping->assoc_mapping, index); - if (!opage) - return bh; - - obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, - b_state); - if (buffer_uptodate(obh)) { - nilfs_copy_buffer(bh, obh); - if (buffer_dirty(obh)) { - nilfs_mark_buffer_dirty(bh); - if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) - nilfs_mdt_mark_dirty(inode); - } - } - brelse(obh); - unlock_page(opage); - page_cache_release(opage); - } return bh; } @@ -129,6 +106,8 @@ void nilfs_forget_buffer(struct buffer_head *bh) lock_buffer(bh); clear_buffer_nilfs_volatile(bh); + clear_buffer_nilfs_checked(bh); + clear_buffer_nilfs_redirected(bh); clear_buffer_dirty(bh); if (nilfs_page_buffers_clean(page)) __nilfs_clear_page_dirty(page); @@ -480,6 +459,8 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) lock_buffer(bh); clear_buffer_dirty(bh); clear_buffer_nilfs_volatile(bh); + clear_buffer_nilfs_checked(bh); + clear_buffer_nilfs_redirected(bh); clear_buffer_uptodate(bh); clear_buffer_mapped(bh); unlock_buffer(bh); @@ -510,6 +491,31 @@ unsigned nilfs_page_count_clean_buffers(struct page *page, } return nc; } + +void nilfs_mapping_init_once(struct address_space *mapping) +{ + memset(mapping, 0, sizeof(*mapping)); + INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); + spin_lock_init(&mapping->tree_lock); + INIT_LIST_HEAD(&mapping->private_list); + spin_lock_init(&mapping->private_lock); + + spin_lock_init(&mapping->i_mmap_lock); + INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); + INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); +} + +void nilfs_mapping_init(struct address_space *mapping, + struct backing_dev_info *bdi, + const struct address_space_operations *aops) +{ + mapping->host = NULL; + mapping->flags = 0; + mapping_set_gfp_mask(mapping, GFP_NOFS); + mapping->assoc_mapping = NULL; + mapping->backing_dev_info = bdi; + mapping->a_ops = aops; +} /* * NILFS2 needs clear_page_dirty() in the following two cases: diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index 8abca4d..fb9e8a8 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h @@ -34,11 +34,15 @@ enum { BH_NILFS_Allocated = BH_PrivateStart, BH_NILFS_Node, BH_NILFS_Volatile, + BH_NILFS_Checked, + BH_NILFS_Redirected, }; BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */ BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ BUFFER_FNS(NILFS_Volatile, nilfs_volatile) +BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ +BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ void nilfs_mark_buffer_dirty(struct buffer_head *bh); @@ -57,6 +61,10 @@ void nilfs_free_private_page(struct page *); int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); void nilfs_copy_back_pages(struct address_space *, struct address_space *); void nilfs_clear_dirty_pages(struct address_space *); +void nilfs_mapping_init_once(struct address_space *mapping); +void nilfs_mapping_init(struct address_space *mapping, + struct backing_dev_info *bdi, + const struct address_space_operations *aops); unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); #define NILFS_PAGE_BUG(page, m, a...) \ diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index bae2a51..5d2711c2 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -91,27 +91,9 @@ static int nilfs_warn_segment_error(int err) return -EINVAL; } -static void store_segsum_info(struct nilfs_segsum_info *ssi, - struct nilfs_segment_summary *sum, - unsigned int blocksize) -{ - ssi->flags = le16_to_cpu(sum->ss_flags); - ssi->seg_seq = le64_to_cpu(sum->ss_seq); - ssi->ctime = le64_to_cpu(sum->ss_create); - ssi->next = le64_to_cpu(sum->ss_next); - ssi->nblocks = le32_to_cpu(sum->ss_nblocks); - ssi->nfinfo = le32_to_cpu(sum->ss_nfinfo); - ssi->sumbytes = le32_to_cpu(sum->ss_sumbytes); - - ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize); - ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi); - - /* need to verify ->ss_bytes field if read ->ss_cno */ -} - /** - * calc_crc_cont - check CRC of blocks continuously - * @sbi: nilfs_sb_info + * nilfs_compute_checksum - compute checksum of blocks continuously + * @nilfs: nilfs object * @bhs: buffer head of start block * @sum: place to store result * @offset: offset bytes in the first block @@ -119,23 +101,25 @@ static void store_segsum_info(struct nilfs_segsum_info *ssi, * @start: DBN of start block * @nblock: number of blocks to be checked */ -static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs, - u32 *sum, unsigned long offset, u64 check_bytes, - sector_t start, unsigned long nblock) +static int nilfs_compute_checksum(struct the_nilfs *nilfs, + struct buffer_head *bhs, u32 *sum, + unsigned long offset, u64 check_bytes, + sector_t start, unsigned long nblock) { - unsigned long blocksize = sbi->s_super->s_blocksize; + unsigned int blocksize = nilfs->ns_blocksize; unsigned long size; u32 crc; BUG_ON(offset >= blocksize); check_bytes -= offset; size = min_t(u64, check_bytes, blocksize - offset); - crc = crc32_le(sbi->s_nilfs->ns_crc_seed, + crc = crc32_le(nilfs->ns_crc_seed, (unsigned char *)bhs->b_data + offset, size); if (--nblock > 0) { do { - struct buffer_head *bh - = sb_bread(sbi->s_super, ++start); + struct buffer_head *bh; + + bh = __bread(nilfs->ns_bdev, ++start, blocksize); if (!bh) return -EIO; check_bytes -= size; @@ -150,12 +134,12 @@ static int calc_crc_cont(struct nilfs_sb_info *sbi, struct buffer_head *bhs, /** * nilfs_read_super_root_block - read super root block - * @sb: super_block + * @nilfs: nilfs object * @sr_block: disk block number of the super root block * @pbh: address of a buffer_head pointer to return super root buffer * @check: CRC check flag */ -int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, +int nilfs_read_super_root_block(struct the_nilfs *nilfs, sector_t sr_block, struct buffer_head **pbh, int check) { struct buffer_head *bh_sr; @@ -164,7 +148,7 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, int ret; *pbh = NULL; - bh_sr = sb_bread(sb, sr_block); + bh_sr = __bread(nilfs->ns_bdev, sr_block, nilfs->ns_blocksize); if (unlikely(!bh_sr)) { ret = NILFS_SEG_FAIL_IO; goto failed; @@ -174,12 +158,13 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, if (check) { unsigned bytes = le16_to_cpu(sr->sr_bytes); - if (bytes == 0 || bytes > sb->s_blocksize) { + if (bytes == 0 || bytes > nilfs->ns_blocksize) { ret = NILFS_SEG_FAIL_CHECKSUM_SUPER_ROOT; goto failed_bh; } - if (calc_crc_cont(NILFS_SB(sb), bh_sr, &crc, - sizeof(sr->sr_sum), bytes, sr_block, 1)) { + if (nilfs_compute_checksum( + nilfs, bh_sr, &crc, sizeof(sr->sr_sum), bytes, + sr_block, 1)) { ret = NILFS_SEG_FAIL_IO; goto failed_bh; } @@ -199,64 +184,76 @@ int nilfs_read_super_root_block(struct super_block *sb, sector_t sr_block, } /** - * load_segment_summary - read segment summary of the specified partial segment - * @sbi: nilfs_sb_info - * @pseg_start: start disk block number of partial segment - * @seg_seq: sequence number requested - * @ssi: pointer to nilfs_segsum_info struct to store information + * nilfs_read_log_header - read summary header of the specified log + * @nilfs: nilfs object + * @start_blocknr: start block number of the log + * @sum: pointer to return segment summary structure */ -static int -load_segment_summary(struct nilfs_sb_info *sbi, sector_t pseg_start, - u64 seg_seq, struct nilfs_segsum_info *ssi) +static struct buffer_head * +nilfs_read_log_header(struct the_nilfs *nilfs, sector_t start_blocknr, + struct nilfs_segment_summary **sum) { struct buffer_head *bh_sum; - struct nilfs_segment_summary *sum; + + bh_sum = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize); + if (bh_sum) + *sum = (struct nilfs_segment_summary *)bh_sum->b_data; + return bh_sum; +} + +/** + * nilfs_validate_log - verify consistency of log + * @nilfs: nilfs object + * @seg_seq: sequence number of segment + * @bh_sum: buffer head of summary block + * @sum: segment summary struct + */ +static int nilfs_validate_log(struct the_nilfs *nilfs, u64 seg_seq, + struct buffer_head *bh_sum, + struct nilfs_segment_summary *sum) +{ unsigned long nblock; u32 crc; - int ret = NILFS_SEG_FAIL_IO; + int ret; - bh_sum = sb_bread(sbi->s_super, pseg_start); - if (!bh_sum) + ret = NILFS_SEG_FAIL_MAGIC; + if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) goto out; - sum = (struct nilfs_segment_summary *)bh_sum->b_data; - - /* Check consistency of segment summary */ - if (le32_to_cpu(sum->ss_magic) != NILFS_SEGSUM_MAGIC) { - ret = NILFS_SEG_FAIL_MAGIC; - goto failed; - } - store_segsum_info(ssi, sum, sbi->s_super->s_blocksize); - if (seg_seq != ssi->seg_seq) { - ret = NILFS_SEG_FAIL_SEQ; - goto failed; - } + ret = NILFS_SEG_FAIL_SEQ; + if (le64_to_cpu(sum->ss_seq) != seg_seq) + goto out; - nblock = ssi->nblocks; - if (unlikely(nblock == 0 || - nblock > sbi->s_nilfs->ns_blocks_per_segment)) { + nblock = le32_to_cpu(sum->ss_nblocks); + ret = NILFS_SEG_FAIL_CONSISTENCY; + if (unlikely(nblock == 0 || nblock > nilfs->ns_blocks_per_segment)) /* This limits the number of blocks read in the CRC check */ - ret = NILFS_SEG_FAIL_CONSISTENCY; - goto failed; - } - if (calc_crc_cont(sbi, bh_sum, &crc, sizeof(sum->ss_datasum), - ((u64)nblock << sbi->s_super->s_blocksize_bits), - pseg_start, nblock)) { - ret = NILFS_SEG_FAIL_IO; - goto failed; - } - if (crc == le32_to_cpu(sum->ss_datasum)) - ret = 0; - else - ret = NILFS_SEG_FAIL_CHECKSUM_FULL; - failed: - brelse(bh_sum); - out: + goto out; + + ret = NILFS_SEG_FAIL_IO; + if (nilfs_compute_checksum(nilfs, bh_sum, &crc, sizeof(sum->ss_datasum), + ((u64)nblock << nilfs->ns_blocksize_bits), + bh_sum->b_blocknr, nblock)) + goto out; + + ret = NILFS_SEG_FAIL_CHECKSUM_FULL; + if (crc != le32_to_cpu(sum->ss_datasum)) + goto out; + ret = 0; +out: return ret; } -static void *segsum_get(struct super_block *sb, struct buffer_head **pbh, - unsigned int *offset, unsigned int bytes) +/** + * nilfs_read_summary_info - read an item on summary blocks of a log + * @nilfs: nilfs object + * @pbh: the current buffer head on summary blocks [in, out] + * @offset: the current byte offset on summary blocks [in, out] + * @bytes: byte size of the item to be read + */ +static void *nilfs_read_summary_info(struct the_nilfs *nilfs, + struct buffer_head **pbh, + unsigned int *offset, unsigned int bytes) { void *ptr; sector_t blocknr; @@ -265,7 +262,8 @@ static void *segsum_get(struct super_block *sb, struct buffer_head **pbh, if (bytes > (*pbh)->b_size - *offset) { blocknr = (*pbh)->b_blocknr; brelse(*pbh); - *pbh = sb_bread(sb, blocknr + 1); + *pbh = __bread(nilfs->ns_bdev, blocknr + 1, + nilfs->ns_blocksize); if (unlikely(!*pbh)) return NULL; *offset = 0; @@ -275,9 +273,18 @@ static void *segsum_get(struct super_block *sb, struct buffer_head **pbh, return ptr; } -static void segsum_skip(struct super_block *sb, struct buffer_head **pbh, - unsigned int *offset, unsigned int bytes, - unsigned long count) +/** + * nilfs_skip_summary_info - skip items on summary blocks of a log + * @nilfs: nilfs object + * @pbh: the current buffer head on summary blocks [in, out] + * @offset: the current byte offset on summary blocks [in, out] + * @bytes: byte size of the item to be skipped + * @count: number of items to be skipped + */ +static void nilfs_skip_summary_info(struct the_nilfs *nilfs, + struct buffer_head **pbh, + unsigned int *offset, unsigned int bytes, + unsigned long count) { unsigned int rest_item_in_current_block = ((*pbh)->b_size - *offset) / bytes; @@ -294,36 +301,46 @@ static void segsum_skip(struct super_block *sb, struct buffer_head **pbh, *offset = bytes * (count - (bcnt - 1) * nitem_per_block); brelse(*pbh); - *pbh = sb_bread(sb, blocknr + bcnt); + *pbh = __bread(nilfs->ns_bdev, blocknr + bcnt, + nilfs->ns_blocksize); } } -static int -collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, - struct nilfs_segsum_info *ssi, - struct list_head *head) +/** + * nilfs_scan_dsync_log - get block information of a log written for data sync + * @nilfs: nilfs object + * @start_blocknr: start block number of the log + * @sum: log summary information + * @head: list head to add nilfs_recovery_block struct + */ +static int nilfs_scan_dsync_log(struct the_nilfs *nilfs, sector_t start_blocknr, + struct nilfs_segment_summary *sum, + struct list_head *head) { struct buffer_head *bh; unsigned int offset; - unsigned long nfinfo = ssi->nfinfo; - sector_t blocknr = sum_blocknr + ssi->nsumblk; + u32 nfinfo, sumbytes; + sector_t blocknr; ino_t ino; int err = -EIO; + nfinfo = le32_to_cpu(sum->ss_nfinfo); if (!nfinfo) return 0; - bh = sb_bread(sbi->s_super, sum_blocknr); + sumbytes = le32_to_cpu(sum->ss_sumbytes); + blocknr = start_blocknr + DIV_ROUND_UP(sumbytes, nilfs->ns_blocksize); + bh = __bread(nilfs->ns_bdev, start_blocknr, nilfs->ns_blocksize); if (unlikely(!bh)) goto out; - offset = le16_to_cpu( - ((struct nilfs_segment_summary *)bh->b_data)->ss_bytes); + offset = le16_to_cpu(sum->ss_bytes); for (;;) { unsigned long nblocks, ndatablk, nnodeblk; struct nilfs_finfo *finfo; - finfo = segsum_get(sbi->s_super, &bh, &offset, sizeof(*finfo)); + finfo = nilfs_read_summary_info(nilfs, &bh, &offset, + sizeof(*finfo)); if (unlikely(!finfo)) goto out; @@ -336,8 +353,8 @@ collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, struct nilfs_recovery_block *rb; struct nilfs_binfo_v *binfo; - binfo = segsum_get(sbi->s_super, &bh, &offset, - sizeof(*binfo)); + binfo = nilfs_read_summary_info(nilfs, &bh, &offset, + sizeof(*binfo)); if (unlikely(!binfo)) goto out; @@ -355,9 +372,9 @@ collect_blocks_from_segsum(struct nilfs_sb_info *sbi, sector_t sum_blocknr, } if (--nfinfo == 0) break; - blocknr += nnodeblk; /* always 0 for the data sync segments */ - segsum_skip(sbi->s_super, &bh, &offset, sizeof(__le64), - nnodeblk); + blocknr += nnodeblk; /* always 0 for data sync logs */ + nilfs_skip_summary_info(nilfs, &bh, &offset, sizeof(__le64), + nnodeblk); if (unlikely(!bh)) goto out; } @@ -423,7 +440,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, segnum[2] = ri->ri_segnum; segnum[3] = ri->ri_nextnum; - nilfs_attach_writer(nilfs, sbi); /* * Releasing the next segment of the latest super root. * The next segment is invalidated by this recovery. @@ -463,18 +479,17 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, failed: /* No need to recover sufile because it will be destroyed on error */ - nilfs_detach_writer(nilfs, sbi); return err; } -static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi, +static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, struct nilfs_recovery_block *rb, struct page *page) { struct buffer_head *bh_org; void *kaddr; - bh_org = sb_bread(sbi->s_super, rb->blocknr); + bh_org = __bread(nilfs->ns_bdev, rb->blocknr, nilfs->ns_blocksize); if (unlikely(!bh_org)) return -EIO; @@ -485,19 +500,21 @@ static int nilfs_recovery_copy_block(struct nilfs_sb_info *sbi, return 0; } -static int recover_dsync_blocks(struct nilfs_sb_info *sbi, - struct list_head *head, - unsigned long *nr_salvaged_blocks) +static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, + struct nilfs_sb_info *sbi, + struct nilfs_root *root, + struct list_head *head, + unsigned long *nr_salvaged_blocks) { struct inode *inode; struct nilfs_recovery_block *rb, *n; - unsigned blocksize = sbi->s_super->s_blocksize; + unsigned blocksize = nilfs->ns_blocksize; struct page *page; loff_t pos; int err = 0, err2 = 0; list_for_each_entry_safe(rb, n, head, list) { - inode = nilfs_iget(sbi->s_super, rb->ino); + inode = nilfs_iget(sbi->s_super, root, rb->ino); if (IS_ERR(inode)) { err = PTR_ERR(inode); inode = NULL; @@ -505,13 +522,16 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi, } pos = rb->blkoff << inode->i_blkbits; - page = NULL; - err = block_write_begin(NULL, inode->i_mapping, pos, blocksize, - 0, &page, NULL, nilfs_get_block); - if (unlikely(err)) + err = block_write_begin(inode->i_mapping, pos, blocksize, + 0, &page, nilfs_get_block); + if (unlikely(err)) { + loff_t isize = inode->i_size; + if (pos + blocksize > isize) + vmtruncate(inode, isize); goto failed_inode; + } - err = nilfs_recovery_copy_block(sbi, rb, page); + err = nilfs_recovery_copy_block(nilfs, rb, page); if (unlikely(err)) goto failed_page; @@ -551,18 +571,21 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi, /** * nilfs_do_roll_forward - salvage logical segments newer than the latest * checkpoint + * @nilfs: nilfs object * @sbi: nilfs_sb_info - * @nilfs: the_nilfs * @ri: pointer to a nilfs_recovery_info */ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, + struct nilfs_root *root, struct nilfs_recovery_info *ri) { - struct nilfs_segsum_info ssi; + struct buffer_head *bh_sum = NULL; + struct nilfs_segment_summary *sum; sector_t pseg_start; sector_t seg_start, seg_end; /* Starting/ending DBN of full segment */ unsigned long nsalvaged_blocks = 0; + unsigned int flags; u64 seg_seq; __u64 segnum, nextnum = 0; int empty_seg = 0; @@ -574,15 +597,20 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, }; int state = RF_INIT_ST; - nilfs_attach_writer(nilfs, sbi); pseg_start = ri->ri_lsegs_start; seg_seq = ri->ri_lsegs_start_seq; segnum = nilfs_get_segnum_of_block(nilfs, pseg_start); nilfs_get_segment_range(nilfs, segnum, &seg_start, &seg_end); while (segnum != ri->ri_segnum || pseg_start <= ri->ri_pseg_start) { + brelse(bh_sum); + bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum); + if (!bh_sum) { + err = -EIO; + goto failed; + } - ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi); + ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum); if (ret) { if (ret == NILFS_SEG_FAIL_IO) { err = -EIO; @@ -590,33 +618,38 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, } goto strayed; } - if (unlikely(NILFS_SEG_HAS_SR(&ssi))) + + flags = le16_to_cpu(sum->ss_flags); + if (flags & NILFS_SS_SR) goto confused; /* Found a valid partial segment; do recovery actions */ - nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); + nextnum = nilfs_get_segnum_of_block(nilfs, + le64_to_cpu(sum->ss_next)); empty_seg = 0; - nilfs->ns_ctime = ssi.ctime; - if (!(ssi.flags & NILFS_SS_GC)) - nilfs->ns_nongc_ctime = ssi.ctime; + nilfs->ns_ctime = le64_to_cpu(sum->ss_create); + if (!(flags & NILFS_SS_GC)) + nilfs->ns_nongc_ctime = nilfs->ns_ctime; switch (state) { case RF_INIT_ST: - if (!NILFS_SEG_LOGBGN(&ssi) || !NILFS_SEG_DSYNC(&ssi)) + if (!(flags & NILFS_SS_LOGBGN) || + !(flags & NILFS_SS_SYNDT)) goto try_next_pseg; state = RF_DSYNC_ST; /* Fall through */ case RF_DSYNC_ST: - if (!NILFS_SEG_DSYNC(&ssi)) + if (!(flags & NILFS_SS_SYNDT)) goto confused; - err = collect_blocks_from_segsum( - sbi, pseg_start, &ssi, &dsync_blocks); + err = nilfs_scan_dsync_log(nilfs, pseg_start, sum, + &dsync_blocks); if (unlikely(err)) goto failed; - if (NILFS_SEG_LOGEND(&ssi)) { - err = recover_dsync_blocks( - sbi, &dsync_blocks, &nsalvaged_blocks); + if (flags & NILFS_SS_LOGEND) { + err = nilfs_recover_dsync_blocks( + nilfs, sbi, root, &dsync_blocks, + &nsalvaged_blocks); if (unlikely(err)) goto failed; state = RF_INIT_ST; @@ -627,7 +660,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, try_next_pseg: if (pseg_start == ri->ri_lsegs_end) break; - pseg_start += ssi.nblocks; + pseg_start += le32_to_cpu(sum->ss_nblocks); if (pseg_start < seg_end) continue; goto feed_segment; @@ -652,8 +685,8 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, ri->ri_need_recovery = NILFS_RECOVERY_ROLLFORWARD_DONE; } out: + brelse(bh_sum); dispose_recovery_list(&dsync_blocks); - nilfs_detach_writer(sbi->s_nilfs, sbi); return err; confused: @@ -667,7 +700,6 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, } static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, - struct nilfs_sb_info *sbi, struct nilfs_recovery_info *ri) { struct buffer_head *bh; @@ -677,7 +709,7 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, nilfs_get_segnum_of_block(nilfs, ri->ri_super_root)) return; - bh = sb_getblk(sbi->s_super, ri->ri_lsegs_start); + bh = __getblk(nilfs->ns_bdev, ri->ri_lsegs_start, nilfs->ns_blocksize); BUG_ON(!bh); memset(bh->b_data, 0, bh->b_size); set_buffer_dirty(bh); @@ -690,9 +722,8 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, } /** - * nilfs_recover_logical_segments - salvage logical segments written after - * the latest super root - * @nilfs: the_nilfs + * nilfs_salvage_orphan_logs - salvage logs written after the latest checkpoint + * @nilfs: nilfs object * @sbi: nilfs_sb_info * @ri: pointer to a nilfs_recovery_info struct to store search results. * @@ -709,23 +740,24 @@ static void nilfs_finish_roll_forward(struct the_nilfs *nilfs, * * %-ENOMEM - Insufficient memory available. */ -int nilfs_recover_logical_segments(struct the_nilfs *nilfs, - struct nilfs_sb_info *sbi, - struct nilfs_recovery_info *ri) +int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, + struct nilfs_sb_info *sbi, + struct nilfs_recovery_info *ri) { + struct nilfs_root *root; int err; if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0) return 0; - err = nilfs_attach_checkpoint(sbi, ri->ri_cno); + err = nilfs_attach_checkpoint(sbi, ri->ri_cno, true, &root); if (unlikely(err)) { printk(KERN_ERR "NILFS: error loading the latest checkpoint.\n"); return err; } - err = nilfs_do_roll_forward(nilfs, sbi, ri); + err = nilfs_do_roll_forward(nilfs, sbi, root, ri); if (unlikely(err)) goto failed; @@ -737,7 +769,7 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, goto failed; } - err = nilfs_attach_segment_constructor(sbi); + err = nilfs_attach_segment_constructor(sbi, root); if (unlikely(err)) goto failed; @@ -751,18 +783,17 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, goto failed; } - nilfs_finish_roll_forward(nilfs, sbi, ri); + nilfs_finish_roll_forward(nilfs, ri); } failed: - nilfs_detach_checkpoint(sbi); + nilfs_put_root(root); return err; } /** * nilfs_search_super_root - search the latest valid super root * @nilfs: the_nilfs - * @sbi: nilfs_sb_info * @ri: pointer to a nilfs_recovery_info struct to store search results. * * nilfs_search_super_root() looks for the latest super-root from a partial @@ -775,14 +806,19 @@ int nilfs_recover_logical_segments(struct the_nilfs *nilfs, * %-EINVAL - No valid segment found * * %-EIO - I/O error + * + * %-ENOMEM - Insufficient memory available. */ -int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, +int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_recovery_info *ri) { - struct nilfs_segsum_info ssi; + struct buffer_head *bh_sum = NULL; + struct nilfs_segment_summary *sum; sector_t pseg_start, pseg_end, sr_pseg_start = 0; sector_t seg_start, seg_end; /* range of full segment (block number) */ sector_t b, end; + unsigned long nblocks; + unsigned int flags; u64 seg_seq; __u64 segnum, nextnum = 0; __u64 cno; @@ -801,17 +837,24 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, /* Read ahead segment */ b = seg_start; while (b <= seg_end) - sb_breadahead(sbi->s_super, b++); + __breadahead(nilfs->ns_bdev, b++, nilfs->ns_blocksize); for (;;) { - /* Load segment summary */ - ret = load_segment_summary(sbi, pseg_start, seg_seq, &ssi); + brelse(bh_sum); + ret = NILFS_SEG_FAIL_IO; + bh_sum = nilfs_read_log_header(nilfs, pseg_start, &sum); + if (!bh_sum) + goto failed; + + ret = nilfs_validate_log(nilfs, seg_seq, bh_sum, sum); if (ret) { if (ret == NILFS_SEG_FAIL_IO) goto failed; goto strayed; } - pseg_end = pseg_start + ssi.nblocks - 1; + + nblocks = le32_to_cpu(sum->ss_nblocks); + pseg_end = pseg_start + nblocks - 1; if (unlikely(pseg_end > seg_end)) { ret = NILFS_SEG_FAIL_CONSISTENCY; goto strayed; @@ -821,11 +864,13 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, ri->ri_pseg_start = pseg_start; ri->ri_seq = seg_seq; ri->ri_segnum = segnum; - nextnum = nilfs_get_segnum_of_block(nilfs, ssi.next); + nextnum = nilfs_get_segnum_of_block(nilfs, + le64_to_cpu(sum->ss_next)); ri->ri_nextnum = nextnum; empty_seg = 0; - if (!NILFS_SEG_HAS_SR(&ssi) && !scan_newer) { + flags = le16_to_cpu(sum->ss_flags); + if (!(flags & NILFS_SS_SR) && !scan_newer) { /* This will never happen because a superblock (last_segment) always points to a pseg having a super root. */ @@ -836,14 +881,15 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, if (pseg_start == seg_start) { nilfs_get_segment_range(nilfs, nextnum, &b, &end); while (b <= end) - sb_breadahead(sbi->s_super, b++); + __breadahead(nilfs->ns_bdev, b++, + nilfs->ns_blocksize); } - if (!NILFS_SEG_HAS_SR(&ssi)) { - if (!ri->ri_lsegs_start && NILFS_SEG_LOGBGN(&ssi)) { + if (!(flags & NILFS_SS_SR)) { + if (!ri->ri_lsegs_start && (flags & NILFS_SS_LOGBGN)) { ri->ri_lsegs_start = pseg_start; ri->ri_lsegs_start_seq = seg_seq; } - if (NILFS_SEG_LOGEND(&ssi)) + if (flags & NILFS_SS_LOGEND) ri->ri_lsegs_end = pseg_start; goto try_next_pseg; } @@ -854,12 +900,12 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, ri->ri_lsegs_start = ri->ri_lsegs_end = 0; nilfs_dispose_segment_list(&segments); - nilfs->ns_pseg_offset = (sr_pseg_start = pseg_start) - + ssi.nblocks - seg_start; + sr_pseg_start = pseg_start; + nilfs->ns_pseg_offset = pseg_start + nblocks - seg_start; nilfs->ns_seg_seq = seg_seq; nilfs->ns_segnum = segnum; nilfs->ns_cno = cno; /* nilfs->ns_cno = ri->ri_cno + 1 */ - nilfs->ns_ctime = ssi.ctime; + nilfs->ns_ctime = le64_to_cpu(sum->ss_create); nilfs->ns_nextnum = nextnum; if (scan_newer) @@ -870,15 +916,9 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, scan_newer = 1; } - /* reset region for roll-forward */ - pseg_start += ssi.nblocks; - if (pseg_start < seg_end) - continue; - goto feed_segment; - try_next_pseg: /* Standing on a course, or met an inconsistent state */ - pseg_start += ssi.nblocks; + pseg_start += nblocks; if (pseg_start < seg_end) continue; goto feed_segment; @@ -909,6 +949,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, super_root_found: /* Updating pointers relating to the latest checkpoint */ + brelse(bh_sum); list_splice_tail(&segments, &ri->ri_used_segments); nilfs->ns_last_pseg = sr_pseg_start; nilfs->ns_last_seq = nilfs->ns_seg_seq; @@ -916,6 +957,7 @@ int nilfs_search_super_root(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, return 0; failed: + brelse(bh_sum); nilfs_dispose_segment_list(&segments); return (ret < 0) ? ret : nilfs_warn_segment_error(ret); } diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h index 0776ccc..35a0715 100644 --- a/fs/nilfs2/sb.h +++ b/fs/nilfs2/sb.h @@ -42,11 +42,6 @@ struct nilfs_sc_info; * NILFS super-block data in memory */ struct nilfs_sb_info { - /* Snapshot status */ - __u64 s_snapshot_cno; /* Checkpoint number */ - atomic_t s_inodes_count; - atomic_t s_blocks_count; /* Reserved (might be deleted) */ - /* Mount options */ unsigned long s_mount_opt; uid_t s_resuid; @@ -59,8 +54,6 @@ struct nilfs_sb_info { /* Fundamental members */ struct super_block *s_super; /* reverse pointer to super_block */ struct the_nilfs *s_nilfs; - struct list_head s_list; /* list head for nilfs->ns_supers */ - atomic_t s_count; /* reference count */ /* Segment constructor */ struct list_head s_dirty_files; /* dirty files list */ @@ -68,9 +61,6 @@ struct nilfs_sb_info { spinlock_t s_inode_lock; /* Lock for the nilfs inode. It covers s_dirty_files list */ - /* Metadata files */ - struct inode *s_ifile; /* index file inode */ - /* Inode allocator */ spinlock_t s_next_gen_lock; u32 s_next_generation; diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 2e6a272..0f83e93 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -371,7 +371,8 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, struct bio *bio = wi->bio; int err; - if (segbuf->sb_nbio > 0 && bdi_write_congested(wi->nilfs->ns_bdi)) { + if (segbuf->sb_nbio > 0 && + bdi_write_congested(segbuf->sb_super->s_bdi)) { wait_for_completion(&segbuf->sb_bio_event); segbuf->sb_nbio--; if (unlikely(atomic_read(&segbuf->sb_err))) { @@ -508,7 +509,7 @@ static int nilfs_segbuf_write(struct nilfs_segment_buffer *segbuf, * Last BIO is always sent through the following * submission. */ - rw |= (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG); + rw |= REQ_SYNC | REQ_UNPLUG; res = nilfs_segbuf_submit_bio(segbuf, &wi, rw); } diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h index 85fbb66..b04f08c 100644 --- a/fs/nilfs2/segbuf.h +++ b/fs/nilfs2/segbuf.h @@ -54,17 +54,6 @@ struct nilfs_segsum_info { sector_t next; }; -/* macro for the flags */ -#define NILFS_SEG_HAS_SR(sum) ((sum)->flags & NILFS_SS_SR) -#define NILFS_SEG_LOGBGN(sum) ((sum)->flags & NILFS_SS_LOGBGN) -#define NILFS_SEG_LOGEND(sum) ((sum)->flags & NILFS_SS_LOGEND) -#define NILFS_SEG_DSYNC(sum) ((sum)->flags & NILFS_SS_SYNDT) -#define NILFS_SEG_SIMPLEX(sum) \ - (((sum)->flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) == \ - (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) - -#define NILFS_SEG_EMPTY(sum) ((sum)->nblocks == (sum)->nsumblk) - /** * struct nilfs_segment_buffer - Segment buffer * @sb_super: back pointer to a superblock struct @@ -141,6 +130,19 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *, struct buffer_head **); void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *); +static inline int nilfs_segbuf_simplex(struct nilfs_segment_buffer *segbuf) +{ + unsigned int flags = segbuf->sb_sum.flags; + + return (flags & (NILFS_SS_LOGBGN | NILFS_SS_LOGEND)) == + (NILFS_SS_LOGBGN | NILFS_SS_LOGEND); +} + +static inline int nilfs_segbuf_empty(struct nilfs_segment_buffer *segbuf) +{ + return segbuf->sb_sum.nblocks == segbuf->sb_sum.nsumblk; +} + static inline void nilfs_segbuf_add_segsum_buffer(struct nilfs_segment_buffer *segbuf, struct buffer_head *bh) diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index c920164..687d090 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -191,6 +191,8 @@ int nilfs_transaction_begin(struct super_block *sb, if (ret > 0) return 0; + vfs_check_frozen(sb, SB_FREEZE_WRITE); + sbi = NILFS_SB(sb); nilfs = sbi->s_nilfs; down_read(&nilfs->ns_segctor_sem); @@ -366,8 +368,7 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) if (nilfs_doing_gc()) flags = NILFS_SS_GC; - err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, - sci->sc_sbi->s_nilfs->ns_cno); + err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno); if (unlikely(err)) return err; @@ -440,17 +441,26 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, struct nilfs_finfo *finfo; struct nilfs_inode_info *ii; struct nilfs_segment_buffer *segbuf; + __u64 cno; if (sci->sc_blk_cnt == 0) return; ii = NILFS_I(inode); + + if (test_bit(NILFS_I_GCINODE, &ii->i_state)) + cno = ii->i_cno; + else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) + cno = 0; + else + cno = sci->sc_cno; + finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr, sizeof(*finfo)); finfo->fi_ino = cpu_to_le64(inode->i_ino); finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt); finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt); - finfo->fi_cno = cpu_to_le64(ii->i_cno); + finfo->fi_cno = cpu_to_le64(cno); segbuf = sci->sc_curseg; segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset + @@ -755,12 +765,12 @@ static void nilfs_dispose_list(struct nilfs_sb_info *sbi, } } -static int nilfs_test_metadata_dirty(struct nilfs_sb_info *sbi) +static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, + struct nilfs_root *root) { - struct the_nilfs *nilfs = sbi->s_nilfs; int ret = 0; - if (nilfs_mdt_fetch_dirty(sbi->s_ifile)) + if (nilfs_mdt_fetch_dirty(root->ifile)) ret++; if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile)) ret++; @@ -785,7 +795,7 @@ static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) struct nilfs_sb_info *sbi = sci->sc_sbi; int ret = 0; - if (nilfs_test_metadata_dirty(sbi)) + if (nilfs_test_metadata_dirty(sbi->s_nilfs, sci->sc_root)) set_bit(NILFS_SC_DIRTY, &sci->sc_flags); spin_lock(&sbi->s_inode_lock); @@ -801,7 +811,7 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) struct nilfs_sb_info *sbi = sci->sc_sbi; struct the_nilfs *nilfs = sbi->s_nilfs; - nilfs_mdt_clear_dirty(sbi->s_ifile); + nilfs_mdt_clear_dirty(sci->sc_root->ifile); nilfs_mdt_clear_dirty(nilfs->ns_cpfile); nilfs_mdt_clear_dirty(nilfs->ns_sufile); nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); @@ -848,9 +858,9 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) raw_cp->cp_snapshot_list.ssl_next = 0; raw_cp->cp_snapshot_list.ssl_prev = 0; raw_cp->cp_inodes_count = - cpu_to_le64(atomic_read(&sbi->s_inodes_count)); + cpu_to_le64(atomic_read(&sci->sc_root->inodes_count)); raw_cp->cp_blocks_count = - cpu_to_le64(atomic_read(&sbi->s_blocks_count)); + cpu_to_le64(atomic_read(&sci->sc_root->blocks_count)); raw_cp->cp_nblk_inc = cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); @@ -861,7 +871,8 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) else nilfs_checkpoint_set_minor(raw_cp); - nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1); + nilfs_write_inode_common(sci->sc_root->ifile, + &raw_cp->cp_ifile_inode, 1); nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); return 0; @@ -886,13 +897,12 @@ static void nilfs_fill_in_file_bmap(struct inode *ifile, } } -static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci, - struct inode *ifile) +static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci) { struct nilfs_inode_info *ii; list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) { - nilfs_fill_in_file_bmap(ifile, ii); + nilfs_fill_in_file_bmap(sci->sc_root->ifile, ii); set_bit(NILFS_I_COLLECTED, &ii->i_state); } } @@ -1135,7 +1145,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; /* Fall through */ case NILFS_ST_IFILE: - err = nilfs_segctor_scan_file(sci, sbi->s_ifile, + err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile, &nilfs_sc_file_ops); if (unlikely(err)) break; @@ -1599,7 +1609,7 @@ nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out) kunmap_atomic(kaddr, KM_USER0); if (!TestSetPageWriteback(clone_page)) - inc_zone_page_state(clone_page, NR_WRITEBACK); + account_page_writeback(clone_page); unlock_page(clone_page); return 0; @@ -1900,6 +1910,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) set_buffer_uptodate(bh); clear_buffer_dirty(bh); clear_buffer_nilfs_volatile(bh); + clear_buffer_nilfs_redirected(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { end_page_writeback(bd_page); @@ -1914,12 +1925,12 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) } } - if (!NILFS_SEG_SIMPLEX(&segbuf->sb_sum)) { - if (NILFS_SEG_LOGBGN(&segbuf->sb_sum)) { + if (!nilfs_segbuf_simplex(segbuf)) { + if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) { set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); sci->sc_lseg_stime = jiffies; } - if (NILFS_SEG_LOGEND(&segbuf->sb_sum)) + if (segbuf->sb_sum.flags & NILFS_SS_LOGEND) clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); } } @@ -1936,11 +1947,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) nilfs_drop_collected_inodes(&sci->sc_dirty_files); - if (nilfs_doing_gc()) { + if (nilfs_doing_gc()) nilfs_drop_collected_inodes(&sci->sc_gc_inodes); - if (update_sr) - nilfs_commit_gcdat_inode(nilfs); - } else + else nilfs->ns_nongc_ctime = sci->sc_seg_ctime; sci->sc_nblk_inc += sci->sc_nblk_this_inc; @@ -1951,7 +1960,6 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) if (update_sr) { nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, segbuf->sb_sum.seg_seq, nilfs->ns_cno++); - set_nilfs_sb_dirty(nilfs); clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); @@ -1977,7 +1985,7 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, struct nilfs_sb_info *sbi) { struct nilfs_inode_info *ii, *n; - __u64 cno = sbi->s_nilfs->ns_cno; + struct inode *ifile = sci->sc_root->ifile; spin_lock(&sbi->s_inode_lock); retry: @@ -1988,14 +1996,14 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, spin_unlock(&sbi->s_inode_lock); err = nilfs_ifile_get_inode_block( - sbi->s_ifile, ii->vfs_inode.i_ino, &ibh); + ifile, ii->vfs_inode.i_ino, &ibh); if (unlikely(err)) { nilfs_warning(sbi->s_super, __func__, "failed to get inode block.\n"); return err; } nilfs_mdt_mark_buffer_dirty(ibh); - nilfs_mdt_mark_dirty(sbi->s_ifile); + nilfs_mdt_mark_dirty(ifile); spin_lock(&sbi->s_inode_lock); if (likely(!ii->i_bh)) ii->i_bh = ibh; @@ -2003,7 +2011,6 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, brelse(ibh); goto retry; } - ii->i_cno = cno; clear_bit(NILFS_I_QUEUED, &ii->i_state); set_bit(NILFS_I_BUSY, &ii->i_state); @@ -2012,8 +2019,6 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, } spin_unlock(&sbi->s_inode_lock); - NILFS_I(sbi->s_ifile)->i_cno = cno; - return 0; } @@ -2022,19 +2027,13 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, { struct nilfs_transaction_info *ti = current->journal_info; struct nilfs_inode_info *ii, *n; - __u64 cno = sbi->s_nilfs->ns_cno; spin_lock(&sbi->s_inode_lock); list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || - test_bit(NILFS_I_DIRTY, &ii->i_state)) { - /* The current checkpoint number (=nilfs->ns_cno) is - changed between check-in and check-out only if the - super root is written out. So, we can update i_cno - for the inodes that remain in the dirty list. */ - ii->i_cno = cno; + test_bit(NILFS_I_DIRTY, &ii->i_state)) continue; - } + clear_bit(NILFS_I_BUSY, &ii->i_state); brelse(ii->i_bh); ii->i_bh = NULL; @@ -2055,12 +2054,13 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) int err; sci->sc_stage.scnt = NILFS_ST_INIT; + sci->sc_cno = nilfs->ns_cno; err = nilfs_segctor_check_in_files(sci, sbi); if (unlikely(err)) goto out; - if (nilfs_test_metadata_dirty(sbi)) + if (nilfs_test_metadata_dirty(nilfs, sci->sc_root)) set_bit(NILFS_SC_DIRTY, &sci->sc_flags); if (nilfs_segctor_clean(sci)) @@ -2082,7 +2082,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) /* Avoid empty segment */ if (sci->sc_stage.scnt == NILFS_ST_DONE && - NILFS_SEG_EMPTY(&sci->sc_curseg->sb_sum)) { + nilfs_segbuf_empty(sci->sc_curseg)) { nilfs_segctor_abort_construction(sci, nilfs, 1); goto out; } @@ -2092,7 +2092,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) goto failed; if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) - nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile); + nilfs_segctor_fill_in_file_bmap(sci); if (mode == SC_LSEG_SR && sci->sc_stage.scnt >= NILFS_ST_CPFILE) { @@ -2408,6 +2408,7 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) { struct nilfs_sb_info *sbi = sci->sc_sbi; struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_super_block **sbp; int err = 0; nilfs_segctor_accept(sci); @@ -2423,8 +2424,13 @@ static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && nilfs_discontinued(nilfs)) { down_write(&nilfs->ns_sem); - err = nilfs_commit_super( - sbi, nilfs_altsb_need_update(nilfs)); + err = -EIO; + sbp = nilfs_prepare_super(sbi, + nilfs_sb_will_flip(nilfs)); + if (likely(sbp)) { + nilfs_set_log_cursor(sbp[0], nilfs); + err = nilfs_commit_super(sbi, NILFS_SB_COMMIT); + } up_write(&nilfs->ns_sem); } } @@ -2447,9 +2453,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) list_for_each_entry_safe(ii, n, head, i_dirty) { if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) continue; - hlist_del_init(&ii->vfs_inode.i_hash); list_del_init(&ii->i_dirty); - nilfs_clear_gcinode(&ii->vfs_inode); + iput(&ii->vfs_inode); } } @@ -2467,13 +2472,15 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, nilfs_transaction_lock(sbi, &ti, 1); - err = nilfs_init_gcdat_inode(nilfs); + err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat); if (unlikely(err)) goto out_unlock; err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); - if (unlikely(err)) + if (unlikely(err)) { + nilfs_mdt_restore_from_shadow_map(nilfs->ns_dat); goto out_unlock; + } sci->sc_freesegs = kbufs[4]; sci->sc_nfreesegs = argv[4].v_nmembs; @@ -2505,7 +2512,7 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, out_unlock: sci->sc_freesegs = NULL; sci->sc_nfreesegs = 0; - nilfs_clear_gcdat_inode(nilfs); + nilfs_mdt_clear_shadow_map(nilfs->ns_dat); nilfs_transaction_unlock(sbi); return err; } @@ -2667,6 +2674,8 @@ static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) } static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) + __acquires(&sci->sc_state_lock) + __releases(&sci->sc_state_lock) { sci->sc_state |= NILFS_SEGCTOR_QUIT; @@ -2681,7 +2690,8 @@ static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) /* * Setup & clean-up functions */ -static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) +static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi, + struct nilfs_root *root) { struct nilfs_sc_info *sci; @@ -2692,6 +2702,9 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) sci->sc_sbi = sbi; sci->sc_super = sbi->s_super; + nilfs_get_root(root); + sci->sc_root = root; + init_waitqueue_head(&sci->sc_wait_request); init_waitqueue_head(&sci->sc_wait_daemon); init_waitqueue_head(&sci->sc_wait_task); @@ -2766,6 +2779,8 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) WARN_ON(!list_empty(&sci->sc_segbufs)); WARN_ON(!list_empty(&sci->sc_write_logs)); + nilfs_put_root(sci->sc_root); + down_write(&sbi->s_nilfs->ns_segctor_sem); del_timer_sync(&sci->sc_timer); @@ -2775,6 +2790,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) /** * nilfs_attach_segment_constructor - attach a segment constructor * @sbi: nilfs_sb_info + * @root: root object of the current filesystem tree * * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, * initializes it, and starts the segment constructor. @@ -2784,9 +2800,9 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) * * %-ENOMEM - Insufficient memory available. */ -int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi) +int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, + struct nilfs_root *root) { - struct the_nilfs *nilfs = sbi->s_nilfs; int err; if (NILFS_SC(sbi)) { @@ -2798,14 +2814,12 @@ int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi) nilfs_detach_segment_constructor(sbi); } - sbi->s_sc_info = nilfs_segctor_new(sbi); + sbi->s_sc_info = nilfs_segctor_new(sbi, root); if (!sbi->s_sc_info) return -ENOMEM; - nilfs_attach_writer(nilfs, sbi); err = nilfs_segctor_start_thread(NILFS_SC(sbi)); if (err) { - nilfs_detach_writer(nilfs, sbi); kfree(sbi->s_sc_info); sbi->s_sc_info = NULL; } @@ -2842,5 +2856,4 @@ void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) up_write(&nilfs->ns_segctor_sem); nilfs_dispose_list(sbi, &garbage_list, 1); - nilfs_detach_writer(nilfs, sbi); } diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 01e20db..cd8056e 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h @@ -29,6 +29,8 @@ #include <linux/nilfs2_fs.h> #include "sb.h" +struct nilfs_root; + /** * struct nilfs_recovery_info - Recovery information * @ri_need_recovery: Recovery status @@ -87,6 +89,7 @@ struct nilfs_segsum_pointer { * struct nilfs_sc_info - Segment constructor information * @sc_super: Back pointer to super_block struct * @sc_sbi: Back pointer to nilfs_sb_info struct + * @sc_root: root object of the current filesystem tree * @sc_nblk_inc: Block count of current generation * @sc_dirty_files: List of files to be written * @sc_gc_inodes: List of GC inodes having blocks to be written @@ -107,6 +110,7 @@ struct nilfs_segsum_pointer { * @sc_datablk_cnt: Data block count of a file * @sc_nblk_this_inc: Number of blocks included in the current logical segment * @sc_seg_ctime: Creation time + * @sc_cno: checkpoint number of current log * @sc_flags: Internal flags * @sc_state_lock: spinlock for sc_state and so on * @sc_state: Segctord state flags @@ -128,6 +132,7 @@ struct nilfs_segsum_pointer { struct nilfs_sc_info { struct super_block *sc_super; struct nilfs_sb_info *sc_sbi; + struct nilfs_root *sc_root; unsigned long sc_nblk_inc; @@ -156,7 +161,7 @@ struct nilfs_sc_info { unsigned long sc_datablk_cnt; unsigned long sc_nblk_this_inc; time_t sc_seg_ctime; - + __u64 sc_cno; unsigned long sc_flags; spinlock_t sc_state_lock; @@ -230,17 +235,18 @@ extern void nilfs_flush_segment(struct super_block *, ino_t); extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, void **); -extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *); +int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, + struct nilfs_root *root); extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); /* recovery.c */ -extern int nilfs_read_super_root_block(struct super_block *, sector_t, +extern int nilfs_read_super_root_block(struct the_nilfs *, sector_t, struct buffer_head **, int); -extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_sb_info *, +extern int nilfs_search_super_root(struct the_nilfs *, struct nilfs_recovery_info *); -extern int nilfs_recover_logical_segments(struct the_nilfs *, - struct nilfs_sb_info *, - struct nilfs_recovery_info *); +extern int nilfs_salvage_orphan_logs(struct the_nilfs *, + struct nilfs_sb_info *, + struct nilfs_recovery_info *); extern void nilfs_dispose_segment_list(struct list_head *); #endif /* _NILFS_SEGMENT_H */ diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 3c6cc60..1d6f488 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c @@ -505,7 +505,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) { struct buffer_head *header_bh; struct nilfs_sufile_header *header; - struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; + struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); void *kaddr; int ret; @@ -583,7 +583,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, struct nilfs_segment_usage *su; struct nilfs_suinfo *si = buf; size_t susz = NILFS_MDT(sufile)->mi_entry_size; - struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; + struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); void *kaddr; unsigned long nsegs, segusages_per_block; ssize_t n; @@ -635,46 +635,55 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, } /** - * nilfs_sufile_read - read sufile inode - * @sufile: sufile inode + * nilfs_sufile_read - read or get sufile inode + * @sb: super block instance + * @susize: size of a segment usage entry * @raw_inode: on-disk sufile inode + * @inodep: buffer to store the inode */ -int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode) +int nilfs_sufile_read(struct super_block *sb, size_t susize, + struct nilfs_inode *raw_inode, struct inode **inodep) { - struct nilfs_sufile_info *sui = NILFS_SUI(sufile); + struct inode *sufile; + struct nilfs_sufile_info *sui; struct buffer_head *header_bh; struct nilfs_sufile_header *header; void *kaddr; - int ret; + int err; - ret = nilfs_read_inode_common(sufile, raw_inode); - if (ret < 0) - return ret; + sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO); + if (unlikely(!sufile)) + return -ENOMEM; + if (!(sufile->i_state & I_NEW)) + goto out; - ret = nilfs_sufile_get_header_block(sufile, &header_bh); - if (!ret) { - kaddr = kmap_atomic(header_bh->b_page, KM_USER0); - header = kaddr + bh_offset(header_bh); - sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); - kunmap_atomic(kaddr, KM_USER0); - brelse(header_bh); - } - return ret; -} + err = nilfs_mdt_init(sufile, NILFS_MDT_GFP, sizeof(*sui)); + if (err) + goto failed; -/** - * nilfs_sufile_new - create sufile - * @nilfs: nilfs object - * @susize: size of a segment usage entry - */ -struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize) -{ - struct inode *sufile; + nilfs_mdt_set_entry_size(sufile, susize, + sizeof(struct nilfs_sufile_header)); + + err = nilfs_read_inode_common(sufile, raw_inode); + if (err) + goto failed; + + err = nilfs_sufile_get_header_block(sufile, &header_bh); + if (err) + goto failed; - sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO, - sizeof(struct nilfs_sufile_info)); - if (sufile) - nilfs_mdt_set_entry_size(sufile, susize, - sizeof(struct nilfs_sufile_header)); - return sufile; + sui = NILFS_SUI(sufile); + kaddr = kmap_atomic(header_bh->b_page, KM_USER0); + header = kaddr + bh_offset(header_bh); + sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); + kunmap_atomic(kaddr, KM_USER0); + brelse(header_bh); + + unlock_new_inode(sufile); + out: + *inodep = sufile; + return 0; + failed: + iget_failed(sufile); + return err; } diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 15163b8..a943fba 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h @@ -31,7 +31,7 @@ static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) { - return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments; + return NILFS_I_NILFS(sufile)->ns_nsegments; } unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); @@ -61,8 +61,8 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, struct buffer_head *); -int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode); -struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize); +int nilfs_sufile_read(struct super_block *sb, size_t susize, + struct nilfs_inode *raw_inode, struct inode **inodep); /** * nilfs_sufile_scrap - make a segment garbage diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 414ef68..f804d41 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -45,16 +45,17 @@ #include <linux/parser.h> #include <linux/random.h> #include <linux/crc32.h> -#include <linux/smp_lock.h> #include <linux/vfs.h> #include <linux/writeback.h> #include <linux/kobject.h> -#include <linux/exportfs.h> #include <linux/seq_file.h> #include <linux/mount.h> #include "nilfs.h" +#include "export.h" #include "mdt.h" #include "alloc.h" +#include "btree.h" +#include "btnode.h" #include "page.h" #include "cpfile.h" #include "ifile.h" @@ -67,13 +68,33 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " "(NILFS)"); MODULE_LICENSE("GPL"); -struct kmem_cache *nilfs_inode_cachep; +static struct kmem_cache *nilfs_inode_cachep; struct kmem_cache *nilfs_transaction_cachep; struct kmem_cache *nilfs_segbuf_cachep; struct kmem_cache *nilfs_btree_path_cache; +static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount); static int nilfs_remount(struct super_block *sb, int *flags, char *data); +static void nilfs_set_error(struct nilfs_sb_info *sbi) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_super_block **sbp; + + down_write(&nilfs->ns_sem); + if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { + nilfs->ns_mount_state |= NILFS_ERROR_FS; + sbp = nilfs_prepare_super(sbi, 0); + if (likely(sbp)) { + sbp[0]->s_state |= cpu_to_le16(NILFS_ERROR_FS); + if (sbp[1]) + sbp[1]->s_state |= cpu_to_le16(NILFS_ERROR_FS); + nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); + } + } + up_write(&nilfs->ns_sem); +} + /** * nilfs_error() - report failure condition on a filesystem * @@ -99,16 +120,7 @@ void nilfs_error(struct super_block *sb, const char *function, va_end(args); if (!(sb->s_flags & MS_RDONLY)) { - struct the_nilfs *nilfs = sbi->s_nilfs; - - down_write(&nilfs->ns_sem); - if (!(nilfs->ns_mount_state & NILFS_ERROR_FS)) { - nilfs->ns_mount_state |= NILFS_ERROR_FS; - nilfs->ns_sbp[0]->s_state |= - cpu_to_le16(NILFS_ERROR_FS); - nilfs_commit_super(sbi, 1); - } - up_write(&nilfs->ns_sem); + nilfs_set_error(sbi); if (nilfs_test_opt(sbi, ERRORS_RO)) { printk(KERN_CRIT "Remounting filesystem read-only\n"); @@ -135,7 +147,7 @@ void nilfs_warning(struct super_block *sb, const char *function, } -struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs) +struct inode *nilfs_alloc_inode(struct super_block *sb) { struct nilfs_inode_info *ii; @@ -144,70 +156,55 @@ struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs) return NULL; ii->i_bh = NULL; ii->i_state = 0; + ii->i_cno = 0; ii->vfs_inode.i_version = 1; - nilfs_btnode_cache_init(&ii->i_btnode_cache, nilfs->ns_bdi); + nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi); return &ii->vfs_inode; } -struct inode *nilfs_alloc_inode(struct super_block *sb) -{ - return nilfs_alloc_inode_common(NILFS_SB(sb)->s_nilfs); -} - void nilfs_destroy_inode(struct inode *inode) { - kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); -} - -static void nilfs_clear_inode(struct inode *inode) -{ - struct nilfs_inode_info *ii = NILFS_I(inode); - - /* - * Free resources allocated in nilfs_read_inode(), here. - */ - BUG_ON(!list_empty(&ii->i_dirty)); - brelse(ii->i_bh); - ii->i_bh = NULL; - - if (test_bit(NILFS_I_BMAP, &ii->i_state)) - nilfs_bmap_clear(ii->i_bmap); + struct nilfs_mdt_info *mdi = NILFS_MDT(inode); - nilfs_btnode_cache_clear(&ii->i_btnode_cache); + if (mdi) { + kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ + kfree(mdi); + } + kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); } -static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb) +static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) { struct the_nilfs *nilfs = sbi->s_nilfs; int err; - int barrier_done = 0; - if (nilfs_test_opt(sbi, BARRIER)) { - set_buffer_ordered(nilfs->ns_sbh[0]); - barrier_done = 1; - } retry: set_buffer_dirty(nilfs->ns_sbh[0]); - err = sync_dirty_buffer(nilfs->ns_sbh[0]); - if (err == -EOPNOTSUPP && barrier_done) { - nilfs_warning(sbi->s_super, __func__, - "barrier-based sync failed. " - "disabling barriers\n"); - nilfs_clear_opt(sbi, BARRIER); - barrier_done = 0; - clear_buffer_ordered(nilfs->ns_sbh[0]); - goto retry; + if (nilfs_test_opt(sbi, BARRIER)) { + err = __sync_dirty_buffer(nilfs->ns_sbh[0], + WRITE_SYNC | WRITE_FLUSH_FUA); + } else { + err = sync_dirty_buffer(nilfs->ns_sbh[0]); } + if (unlikely(err)) { printk(KERN_ERR "NILFS: unable to write superblock (err=%d)\n", err); if (err == -EIO && nilfs->ns_sbh[1]) { + /* + * sbp[0] points to newer log than sbp[1], + * so copy sbp[0] to sbp[1] to take over sbp[0]. + */ + memcpy(nilfs->ns_sbp[1], nilfs->ns_sbp[0], + nilfs->ns_sbsize); nilfs_fall_back_super_block(nilfs); goto retry; } } else { struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; + nilfs->ns_sbwcount++; + /* * The latest segment becomes trailable from the position * written in superblock. @@ -216,66 +213,122 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int dupsb) /* update GC protection for recent segments */ if (nilfs->ns_sbh[1]) { - sbp = NULL; - if (dupsb) { + if (flag == NILFS_SB_COMMIT_ALL) { set_buffer_dirty(nilfs->ns_sbh[1]); - if (!sync_dirty_buffer(nilfs->ns_sbh[1])) - sbp = nilfs->ns_sbp[1]; + if (sync_dirty_buffer(nilfs->ns_sbh[1]) < 0) + goto out; } + if (le64_to_cpu(nilfs->ns_sbp[1]->s_last_cno) < + le64_to_cpu(nilfs->ns_sbp[0]->s_last_cno)) + sbp = nilfs->ns_sbp[1]; } - if (sbp) { - spin_lock(&nilfs->ns_last_segment_lock); - nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); - spin_unlock(&nilfs->ns_last_segment_lock); - } - } + spin_lock(&nilfs->ns_last_segment_lock); + nilfs->ns_prot_seq = le64_to_cpu(sbp->s_last_seq); + spin_unlock(&nilfs->ns_last_segment_lock); + } + out: return err; } -int nilfs_commit_super(struct nilfs_sb_info *sbi, int dupsb) +void nilfs_set_log_cursor(struct nilfs_super_block *sbp, + struct the_nilfs *nilfs) +{ + sector_t nfreeblocks; + + /* nilfs->ns_sem must be locked by the caller. */ + nilfs_count_free_blocks(nilfs, &nfreeblocks); + sbp->s_free_blocks_count = cpu_to_le64(nfreeblocks); + + spin_lock(&nilfs->ns_last_segment_lock); + sbp->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); + sbp->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); + sbp->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); + spin_unlock(&nilfs->ns_last_segment_lock); +} + +struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *sbi, + int flip) { struct the_nilfs *nilfs = sbi->s_nilfs; struct nilfs_super_block **sbp = nilfs->ns_sbp; - sector_t nfreeblocks; - time_t t; - int err; - /* nilfs->sem must be locked by the caller. */ + /* nilfs->ns_sem must be locked by the caller. */ if (sbp[0]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { - if (sbp[1] && sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) - nilfs_swap_super_block(nilfs); - else { + if (sbp[1] && + sbp[1]->s_magic == cpu_to_le16(NILFS_SUPER_MAGIC)) { + memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); + } else { printk(KERN_CRIT "NILFS: superblock broke on dev %s\n", sbi->s_super->s_id); - return -EIO; + return NULL; } + } else if (sbp[1] && + sbp[1]->s_magic != cpu_to_le16(NILFS_SUPER_MAGIC)) { + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); } - err = nilfs_count_free_blocks(nilfs, &nfreeblocks); - if (unlikely(err)) { - printk(KERN_ERR "NILFS: failed to count free blocks\n"); - return err; - } - spin_lock(&nilfs->ns_last_segment_lock); - sbp[0]->s_last_seq = cpu_to_le64(nilfs->ns_last_seq); - sbp[0]->s_last_pseg = cpu_to_le64(nilfs->ns_last_pseg); - sbp[0]->s_last_cno = cpu_to_le64(nilfs->ns_last_cno); - spin_unlock(&nilfs->ns_last_segment_lock); + if (flip && sbp[1]) + nilfs_swap_super_block(nilfs); + + return sbp; +} + +int nilfs_commit_super(struct nilfs_sb_info *sbi, int flag) +{ + struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_super_block **sbp = nilfs->ns_sbp; + time_t t; + + /* nilfs->ns_sem must be locked by the caller. */ t = get_seconds(); - nilfs->ns_sbwtime[0] = t; - sbp[0]->s_free_blocks_count = cpu_to_le64(nfreeblocks); + nilfs->ns_sbwtime = t; sbp[0]->s_wtime = cpu_to_le64(t); sbp[0]->s_sum = 0; sbp[0]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, (unsigned char *)sbp[0], nilfs->ns_sbsize)); - if (dupsb && sbp[1]) { - memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); - nilfs->ns_sbwtime[1] = t; + if (flag == NILFS_SB_COMMIT_ALL && sbp[1]) { + sbp[1]->s_wtime = sbp[0]->s_wtime; + sbp[1]->s_sum = 0; + sbp[1]->s_sum = cpu_to_le32(crc32_le(nilfs->ns_crc_seed, + (unsigned char *)sbp[1], + nilfs->ns_sbsize)); } clear_nilfs_sb_dirty(nilfs); - return nilfs_sync_super(sbi, dupsb); + return nilfs_sync_super(sbi, flag); +} + +/** + * nilfs_cleanup_super() - write filesystem state for cleanup + * @sbi: nilfs_sb_info to be unmounted or degraded to read-only + * + * This function restores state flags in the on-disk super block. + * This will set "clean" flag (i.e. NILFS_VALID_FS) unless the + * filesystem was not clean previously. + */ +int nilfs_cleanup_super(struct nilfs_sb_info *sbi) +{ + struct nilfs_super_block **sbp; + int flag = NILFS_SB_COMMIT; + int ret = -EIO; + + sbp = nilfs_prepare_super(sbi, 0); + if (sbp) { + sbp[0]->s_state = cpu_to_le16(sbi->s_nilfs->ns_mount_state); + nilfs_set_log_cursor(sbp[0], sbi->s_nilfs); + if (sbp[1] && sbp[0]->s_last_cno == sbp[1]->s_last_cno) { + /* + * make the "clean" flag also to the opposite + * super block if both super blocks point to + * the same checkpoint. + */ + sbp[1]->s_state = sbp[0]->s_state; + flag = NILFS_SB_COMMIT_ALL; + } + ret = nilfs_commit_super(sbi, flag); + } + return ret; } static void nilfs_put_super(struct super_block *sb) @@ -283,34 +336,29 @@ static void nilfs_put_super(struct super_block *sb) struct nilfs_sb_info *sbi = NILFS_SB(sb); struct the_nilfs *nilfs = sbi->s_nilfs; - lock_kernel(); - nilfs_detach_segment_constructor(sbi); if (!(sb->s_flags & MS_RDONLY)) { down_write(&nilfs->ns_sem); - nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); - nilfs_commit_super(sbi, 1); + nilfs_cleanup_super(sbi); up_write(&nilfs->ns_sem); } - down_write(&nilfs->ns_super_sem); - if (nilfs->ns_current == sbi) - nilfs->ns_current = NULL; - up_write(&nilfs->ns_super_sem); - nilfs_detach_checkpoint(sbi); - put_nilfs(sbi->s_nilfs); + iput(nilfs->ns_sufile); + iput(nilfs->ns_cpfile); + iput(nilfs->ns_dat); + + destroy_nilfs(nilfs); sbi->s_super = NULL; sb->s_fs_info = NULL; - nilfs_put_sbinfo(sbi); - - unlock_kernel(); + kfree(sbi); } static int nilfs_sync_fs(struct super_block *sb, int wait) { struct nilfs_sb_info *sbi = NILFS_SB(sb); struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_super_block **sbp; int err = 0; /* This function is called when super block should be written back */ @@ -318,27 +366,34 @@ static int nilfs_sync_fs(struct super_block *sb, int wait) err = nilfs_construct_segment(sb); down_write(&nilfs->ns_sem); - if (nilfs_sb_dirty(nilfs)) - nilfs_commit_super(sbi, 1); + if (nilfs_sb_dirty(nilfs)) { + sbp = nilfs_prepare_super(sbi, nilfs_sb_will_flip(nilfs)); + if (likely(sbp)) { + nilfs_set_log_cursor(sbp[0], nilfs); + nilfs_commit_super(sbi, NILFS_SB_COMMIT); + } + } up_write(&nilfs->ns_sem); return err; } -int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) +int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, + struct nilfs_root **rootp) { struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_root *root; struct nilfs_checkpoint *raw_cp; struct buffer_head *bh_cp; - int err; + int err = -ENOMEM; - down_write(&nilfs->ns_super_sem); - list_add(&sbi->s_list, &nilfs->ns_supers); - up_write(&nilfs->ns_super_sem); + root = nilfs_find_or_create_root( + nilfs, curr_mnt ? NILFS_CPTREE_CURRENT_CNO : cno); + if (!root) + return err; - sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size); - if (!sbi->s_ifile) - return -ENOMEM; + if (root->ifile) + goto reuse; /* already attached checkpoint */ down_read(&nilfs->ns_segctor_sem); err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, @@ -354,44 +409,64 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) } goto failed; } - err = nilfs_read_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode); - if (unlikely(err)) + + err = nilfs_ifile_read(sbi->s_super, root, nilfs->ns_inode_size, + &raw_cp->cp_ifile_inode, &root->ifile); + if (err) goto failed_bh; - atomic_set(&sbi->s_inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); - atomic_set(&sbi->s_blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); + + atomic_set(&root->inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); + atomic_set(&root->blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); + + reuse: + *rootp = root; return 0; failed_bh: nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); failed: - nilfs_mdt_destroy(sbi->s_ifile); - sbi->s_ifile = NULL; + nilfs_put_root(root); - down_write(&nilfs->ns_super_sem); - list_del_init(&sbi->s_list); - up_write(&nilfs->ns_super_sem); + return err; +} +static int nilfs_freeze(struct super_block *sb) +{ + struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct the_nilfs *nilfs = sbi->s_nilfs; + int err; + + if (sb->s_flags & MS_RDONLY) + return 0; + + /* Mark super block clean */ + down_write(&nilfs->ns_sem); + err = nilfs_cleanup_super(sbi); + up_write(&nilfs->ns_sem); return err; } -void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) +static int nilfs_unfreeze(struct super_block *sb) { + struct nilfs_sb_info *sbi = NILFS_SB(sb); struct the_nilfs *nilfs = sbi->s_nilfs; - nilfs_mdt_destroy(sbi->s_ifile); - sbi->s_ifile = NULL; - down_write(&nilfs->ns_super_sem); - list_del_init(&sbi->s_list); - up_write(&nilfs->ns_super_sem); + if (sb->s_flags & MS_RDONLY) + return 0; + + down_write(&nilfs->ns_sem); + nilfs_setup_super(sbi, false); + up_write(&nilfs->ns_sem); + return 0; } static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; - struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct the_nilfs *nilfs = sbi->s_nilfs; + struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root; + struct the_nilfs *nilfs = root->nilfs; u64 id = huge_encode_dev(sb->s_bdev->bd_dev); unsigned long long blocks; unsigned long overhead; @@ -427,7 +502,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bfree = nfreeblocks; buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? (buf->f_bfree - nrsvblocks) : 0; - buf->f_files = atomic_read(&sbi->s_inodes_count); + buf->f_files = atomic_read(&root->inodes_count); buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ buf->f_namelen = NILFS_NAME_LEN; buf->f_fsid.val[0] = (u32)id; @@ -440,22 +515,22 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) { struct super_block *sb = vfs->mnt_sb; struct nilfs_sb_info *sbi = NILFS_SB(sb); + struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root; if (!nilfs_test_opt(sbi, BARRIER)) - seq_printf(seq, ",nobarrier"); - if (nilfs_test_opt(sbi, SNAPSHOT)) - seq_printf(seq, ",cp=%llu", - (unsigned long long int)sbi->s_snapshot_cno); + seq_puts(seq, ",nobarrier"); + if (root->cno != NILFS_CPTREE_CURRENT_CNO) + seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno); if (nilfs_test_opt(sbi, ERRORS_PANIC)) - seq_printf(seq, ",errors=panic"); + seq_puts(seq, ",errors=panic"); if (nilfs_test_opt(sbi, ERRORS_CONT)) - seq_printf(seq, ",errors=continue"); + seq_puts(seq, ",errors=continue"); if (nilfs_test_opt(sbi, STRICT_ORDER)) - seq_printf(seq, ",order=strict"); + seq_puts(seq, ",order=strict"); if (nilfs_test_opt(sbi, NORECOVERY)) - seq_printf(seq, ",norecovery"); + seq_puts(seq, ",norecovery"); if (nilfs_test_opt(sbi, DISCARD)) - seq_printf(seq, ",discard"); + seq_puts(seq, ",discard"); return 0; } @@ -467,85 +542,45 @@ static const struct super_operations nilfs_sops = { /* .write_inode = nilfs_write_inode, */ /* .put_inode = nilfs_put_inode, */ /* .drop_inode = nilfs_drop_inode, */ - .delete_inode = nilfs_delete_inode, + .evict_inode = nilfs_evict_inode, .put_super = nilfs_put_super, /* .write_super = nilfs_write_super, */ .sync_fs = nilfs_sync_fs, + .freeze_fs = nilfs_freeze, + .unfreeze_fs = nilfs_unfreeze, /* .write_super_lockfs */ /* .unlockfs */ .statfs = nilfs_statfs, .remount_fs = nilfs_remount, - .clear_inode = nilfs_clear_inode, /* .umount_begin */ .show_options = nilfs_show_options }; -static struct inode * -nilfs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) -{ - struct inode *inode; - - if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO && - ino != NILFS_SKETCH_INO) - return ERR_PTR(-ESTALE); - - inode = nilfs_iget(sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - if (generation && inode->i_generation != generation) { - iput(inode); - return ERR_PTR(-ESTALE); - } - - return inode; -} - -static struct dentry * -nilfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, - int fh_type) -{ - return generic_fh_to_dentry(sb, fid, fh_len, fh_type, - nilfs_nfs_get_inode); -} - -static struct dentry * -nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, - int fh_type) -{ - return generic_fh_to_parent(sb, fid, fh_len, fh_type, - nilfs_nfs_get_inode); -} - -static const struct export_operations nilfs_export_ops = { - .fh_to_dentry = nilfs_fh_to_dentry, - .fh_to_parent = nilfs_fh_to_parent, - .get_parent = nilfs_get_parent, -}; - enum { Opt_err_cont, Opt_err_panic, Opt_err_ro, - Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, - Opt_discard, Opt_err, + Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, + Opt_discard, Opt_nodiscard, Opt_err, }; static match_table_t tokens = { {Opt_err_cont, "errors=continue"}, {Opt_err_panic, "errors=panic"}, {Opt_err_ro, "errors=remount-ro"}, + {Opt_barrier, "barrier"}, {Opt_nobarrier, "nobarrier"}, {Opt_snapshot, "cp=%u"}, {Opt_order, "order=%s"}, {Opt_norecovery, "norecovery"}, {Opt_discard, "discard"}, + {Opt_nodiscard, "nodiscard"}, {Opt_err, NULL} }; -static int parse_options(char *options, struct super_block *sb) +static int parse_options(char *options, struct super_block *sb, int is_remount) { struct nilfs_sb_info *sbi = NILFS_SB(sb); char *p; substring_t args[MAX_OPT_ARGS]; - int option; if (!options) return 1; @@ -557,6 +592,9 @@ static int parse_options(char *options, struct super_block *sb) token = match_token(p, tokens, args); switch (token) { + case Opt_barrier: + nilfs_set_opt(sbi, BARRIER); + break; case Opt_nobarrier: nilfs_clear_opt(sbi, BARRIER); break; @@ -580,12 +618,12 @@ static int parse_options(char *options, struct super_block *sb) nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT); break; case Opt_snapshot: - if (match_int(&args[0], &option) || option <= 0) - return 0; - if (!(sb->s_flags & MS_RDONLY)) + if (is_remount) { + printk(KERN_ERR + "NILFS: \"%s\" option is invalid " + "for remount.\n", p); return 0; - sbi->s_snapshot_cno = option; - nilfs_set_opt(sbi, SNAPSHOT); + } break; case Opt_norecovery: nilfs_set_opt(sbi, NORECOVERY); @@ -593,6 +631,9 @@ static int parse_options(char *options, struct super_block *sb) case Opt_discard: nilfs_set_opt(sbi, DISCARD); break; + case Opt_nodiscard: + nilfs_clear_opt(sbi, DISCARD); + break; default: printk(KERN_ERR "NILFS: Unrecognized mount option \"%s\"\n", p); @@ -610,14 +651,24 @@ nilfs_set_default_options(struct nilfs_sb_info *sbi, NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; } -static int nilfs_setup_super(struct nilfs_sb_info *sbi) +static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount) { struct the_nilfs *nilfs = sbi->s_nilfs; - struct nilfs_super_block *sbp = nilfs->ns_sbp[0]; - int max_mnt_count = le16_to_cpu(sbp->s_max_mnt_count); - int mnt_count = le16_to_cpu(sbp->s_mnt_count); + struct nilfs_super_block **sbp; + int max_mnt_count; + int mnt_count; + + /* nilfs->ns_sem must be locked by the caller. */ + sbp = nilfs_prepare_super(sbi, 0); + if (!sbp) + return -EIO; + + if (!is_mount) + goto skip_mount_setup; + + max_mnt_count = le16_to_cpu(sbp[0]->s_max_mnt_count); + mnt_count = le16_to_cpu(sbp[0]->s_mnt_count); - /* nilfs->sem must be locked by the caller. */ if (nilfs->ns_mount_state & NILFS_ERROR_FS) { printk(KERN_WARNING "NILFS warning: mounting fs with errors\n"); @@ -628,12 +679,17 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi) #endif } if (!max_mnt_count) - sbp->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); + sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); + + sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1); + sbp[0]->s_mtime = cpu_to_le64(get_seconds()); - sbp->s_mnt_count = cpu_to_le16(mnt_count + 1); - sbp->s_state = cpu_to_le16(le16_to_cpu(sbp->s_state) & ~NILFS_VALID_FS); - sbp->s_mtime = cpu_to_le64(get_seconds()); - return nilfs_commit_super(sbi, 1); +skip_mount_setup: + sbp[0]->s_state = + cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); + /* synchronize sbp[1] with sbp[0] */ + memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); + return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); } struct nilfs_super_block *nilfs_read_super_block(struct super_block *sb, @@ -670,7 +726,165 @@ int nilfs_store_magic_and_option(struct super_block *sb, sbi->s_interval = le32_to_cpu(sbp->s_c_interval); sbi->s_watermark = le32_to_cpu(sbp->s_c_block_max); - return !parse_options(data, sb) ? -EINVAL : 0 ; + return !parse_options(data, sb, 0) ? -EINVAL : 0 ; +} + +int nilfs_check_feature_compatibility(struct super_block *sb, + struct nilfs_super_block *sbp) +{ + __u64 features; + + features = le64_to_cpu(sbp->s_feature_incompat) & + ~NILFS_FEATURE_INCOMPAT_SUPP; + if (features) { + printk(KERN_ERR "NILFS: couldn't mount because of unsupported " + "optional features (%llx)\n", + (unsigned long long)features); + return -EINVAL; + } + features = le64_to_cpu(sbp->s_feature_compat_ro) & + ~NILFS_FEATURE_COMPAT_RO_SUPP; + if (!(sb->s_flags & MS_RDONLY) && features) { + printk(KERN_ERR "NILFS: couldn't mount RDWR because of " + "unsupported optional features (%llx)\n", + (unsigned long long)features); + return -EINVAL; + } + return 0; +} + +static int nilfs_get_root_dentry(struct super_block *sb, + struct nilfs_root *root, + struct dentry **root_dentry) +{ + struct inode *inode; + struct dentry *dentry; + int ret = 0; + + inode = nilfs_iget(sb, root, NILFS_ROOT_INO); + if (IS_ERR(inode)) { + printk(KERN_ERR "NILFS: get root inode failed\n"); + ret = PTR_ERR(inode); + goto out; + } + if (!S_ISDIR(inode->i_mode) || !inode->i_blocks || !inode->i_size) { + iput(inode); + printk(KERN_ERR "NILFS: corrupt root inode.\n"); + ret = -EINVAL; + goto out; + } + + if (root->cno == NILFS_CPTREE_CURRENT_CNO) { + dentry = d_find_alias(inode); + if (!dentry) { + dentry = d_alloc_root(inode); + if (!dentry) { + iput(inode); + ret = -ENOMEM; + goto failed_dentry; + } + } else { + iput(inode); + } + } else { + dentry = d_obtain_alias(inode); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); + goto failed_dentry; + } + } + *root_dentry = dentry; + out: + return ret; + + failed_dentry: + printk(KERN_ERR "NILFS: get root dentry failed\n"); + goto out; +} + +static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, + struct dentry **root_dentry) +{ + struct the_nilfs *nilfs = NILFS_SB(s)->s_nilfs; + struct nilfs_root *root; + int ret; + + down_read(&nilfs->ns_segctor_sem); + ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno); + up_read(&nilfs->ns_segctor_sem); + if (ret < 0) { + ret = (ret == -ENOENT) ? -EINVAL : ret; + goto out; + } else if (!ret) { + printk(KERN_ERR "NILFS: The specified checkpoint is " + "not a snapshot (checkpoint number=%llu).\n", + (unsigned long long)cno); + ret = -EINVAL; + goto out; + } + + ret = nilfs_attach_checkpoint(NILFS_SB(s), cno, false, &root); + if (ret) { + printk(KERN_ERR "NILFS: error loading snapshot " + "(checkpoint number=%llu).\n", + (unsigned long long)cno); + goto out; + } + ret = nilfs_get_root_dentry(s, root, root_dentry); + nilfs_put_root(root); + out: + return ret; +} + +static int nilfs_tree_was_touched(struct dentry *root_dentry) +{ + return atomic_read(&root_dentry->d_count) > 1; +} + +/** + * nilfs_try_to_shrink_tree() - try to shrink dentries of a checkpoint + * @root_dentry: root dentry of the tree to be shrunk + * + * This function returns true if the tree was in-use. + */ +static int nilfs_try_to_shrink_tree(struct dentry *root_dentry) +{ + if (have_submounts(root_dentry)) + return true; + shrink_dcache_parent(root_dentry); + return nilfs_tree_was_touched(root_dentry); +} + +int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) +{ + struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; + struct nilfs_root *root; + struct inode *inode; + struct dentry *dentry; + int ret; + + if (cno < 0 || cno > nilfs->ns_cno) + return false; + + if (cno >= nilfs_last_cno(nilfs)) + return true; /* protect recent checkpoints */ + + ret = false; + root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); + if (root) { + inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO); + if (inode) { + dentry = d_find_alias(inode); + if (dentry) { + if (nilfs_tree_was_touched(dentry)) + ret = nilfs_try_to_shrink_tree(dentry); + dput(dentry); + } + iput(inode); + } + nilfs_put_root(root); + } + return ret; } /** @@ -678,17 +892,17 @@ int nilfs_store_magic_and_option(struct super_block *sb, * @sb: super_block * @data: mount options * @silent: silent mode flag - * @nilfs: the_nilfs struct * * This function is called exclusively by nilfs->ns_mount_mutex. * So, the recovery process is protected from other simultaneous mounts. */ static int -nilfs_fill_super(struct super_block *sb, void *data, int silent, - struct the_nilfs *nilfs) +nilfs_fill_super(struct super_block *sb, void *data, int silent) { + struct the_nilfs *nilfs; struct nilfs_sb_info *sbi; - struct inode *root; + struct nilfs_root *fsroot; + struct backing_dev_info *bdi; __u64 cno; int err; @@ -697,19 +911,21 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, return -ENOMEM; sb->s_fs_info = sbi; + sbi->s_super = sb; - get_nilfs(nilfs); + nilfs = alloc_nilfs(sb->s_bdev); + if (!nilfs) { + err = -ENOMEM; + goto failed_sbi; + } sbi->s_nilfs = nilfs; - sbi->s_super = sb; - atomic_set(&sbi->s_count, 1); err = init_nilfs(nilfs, sbi, (char *)data); if (err) - goto failed_sbi; + goto failed_nilfs; spin_lock_init(&sbi->s_inode_lock); INIT_LIST_HEAD(&sbi->s_dirty_files); - INIT_LIST_HEAD(&sbi->s_list); /* * Following initialization is overlapped because @@ -725,141 +941,80 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, sb->s_export_op = &nilfs_export_ops; sb->s_root = NULL; sb->s_time_gran = 1; - sb->s_bdi = nilfs->ns_bdi; + + bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; + sb->s_bdi = bdi ? : &default_backing_dev_info; err = load_nilfs(nilfs, sbi); if (err) - goto failed_sbi; + goto failed_nilfs; cno = nilfs_last_cno(nilfs); - - if (sb->s_flags & MS_RDONLY) { - if (nilfs_test_opt(sbi, SNAPSHOT)) { - down_read(&nilfs->ns_segctor_sem); - err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, - sbi->s_snapshot_cno); - up_read(&nilfs->ns_segctor_sem); - if (err < 0) { - if (err == -ENOENT) - err = -EINVAL; - goto failed_sbi; - } - if (!err) { - printk(KERN_ERR - "NILFS: The specified checkpoint is " - "not a snapshot " - "(checkpoint number=%llu).\n", - (unsigned long long)sbi->s_snapshot_cno); - err = -EINVAL; - goto failed_sbi; - } - cno = sbi->s_snapshot_cno; - } - } - - err = nilfs_attach_checkpoint(sbi, cno); + err = nilfs_attach_checkpoint(sbi, cno, true, &fsroot); if (err) { - printk(KERN_ERR "NILFS: error loading a checkpoint" - " (checkpoint number=%llu).\n", (unsigned long long)cno); - goto failed_sbi; + printk(KERN_ERR "NILFS: error loading last checkpoint " + "(checkpoint number=%llu).\n", (unsigned long long)cno); + goto failed_unload; } if (!(sb->s_flags & MS_RDONLY)) { - err = nilfs_attach_segment_constructor(sbi); + err = nilfs_attach_segment_constructor(sbi, fsroot); if (err) goto failed_checkpoint; } - root = nilfs_iget(sb, NILFS_ROOT_INO); - if (IS_ERR(root)) { - printk(KERN_ERR "NILFS: get root inode failed\n"); - err = PTR_ERR(root); - goto failed_segctor; - } - if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { - iput(root); - printk(KERN_ERR "NILFS: corrupt root inode.\n"); - err = -EINVAL; - goto failed_segctor; - } - sb->s_root = d_alloc_root(root); - if (!sb->s_root) { - iput(root); - printk(KERN_ERR "NILFS: get root dentry failed\n"); - err = -ENOMEM; + err = nilfs_get_root_dentry(sb, fsroot, &sb->s_root); + if (err) goto failed_segctor; - } + + nilfs_put_root(fsroot); if (!(sb->s_flags & MS_RDONLY)) { down_write(&nilfs->ns_sem); - nilfs_setup_super(sbi); + nilfs_setup_super(sbi, true); up_write(&nilfs->ns_sem); } - down_write(&nilfs->ns_super_sem); - if (!nilfs_test_opt(sbi, SNAPSHOT)) - nilfs->ns_current = sbi; - up_write(&nilfs->ns_super_sem); - return 0; failed_segctor: nilfs_detach_segment_constructor(sbi); failed_checkpoint: - nilfs_detach_checkpoint(sbi); + nilfs_put_root(fsroot); + + failed_unload: + iput(nilfs->ns_sufile); + iput(nilfs->ns_cpfile); + iput(nilfs->ns_dat); + + failed_nilfs: + destroy_nilfs(nilfs); failed_sbi: - put_nilfs(nilfs); sb->s_fs_info = NULL; - nilfs_put_sbinfo(sbi); + kfree(sbi); return err; } static int nilfs_remount(struct super_block *sb, int *flags, char *data) { struct nilfs_sb_info *sbi = NILFS_SB(sb); - struct nilfs_super_block *sbp; struct the_nilfs *nilfs = sbi->s_nilfs; unsigned long old_sb_flags; struct nilfs_mount_options old_opts; - int was_snapshot, err; - - lock_kernel(); + int err; - down_write(&nilfs->ns_super_sem); old_sb_flags = sb->s_flags; old_opts.mount_opt = sbi->s_mount_opt; - old_opts.snapshot_cno = sbi->s_snapshot_cno; - was_snapshot = nilfs_test_opt(sbi, SNAPSHOT); - if (!parse_options(data, sb)) { + if (!parse_options(data, sb, 1)) { err = -EINVAL; goto restore_opts; } sb->s_flags = (sb->s_flags & ~MS_POSIXACL); err = -EINVAL; - if (was_snapshot) { - if (!(*flags & MS_RDONLY)) { - printk(KERN_ERR "NILFS (device %s): cannot remount " - "snapshot read/write.\n", - sb->s_id); - goto restore_opts; - } else if (sbi->s_snapshot_cno != old_opts.snapshot_cno) { - printk(KERN_ERR "NILFS (device %s): cannot " - "remount to a different snapshot.\n", - sb->s_id); - goto restore_opts; - } - } else { - if (nilfs_test_opt(sbi, SNAPSHOT)) { - printk(KERN_ERR "NILFS (device %s): cannot change " - "a regular mount to a snapshot.\n", - sb->s_id); - goto restore_opts; - } - } if (!nilfs_valid_fs(nilfs)) { printk(KERN_WARNING "NILFS (device %s): couldn't " @@ -880,40 +1035,47 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) * the RDONLY flag and then mark the partition as valid again. */ down_write(&nilfs->ns_sem); - sbp = nilfs->ns_sbp[0]; - if (!(sbp->s_state & le16_to_cpu(NILFS_VALID_FS)) && - (nilfs->ns_mount_state & NILFS_VALID_FS)) - sbp->s_state = cpu_to_le16(nilfs->ns_mount_state); - sbp->s_mtime = cpu_to_le64(get_seconds()); - nilfs_commit_super(sbi, 1); + nilfs_cleanup_super(sbi); up_write(&nilfs->ns_sem); } else { + __u64 features; + struct nilfs_root *root; + /* * Mounting a RDONLY partition read-write, so reread and * store the current valid flag. (It may have been changed * by fsck since we originally mounted the partition.) */ + down_read(&nilfs->ns_sem); + features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) & + ~NILFS_FEATURE_COMPAT_RO_SUPP; + up_read(&nilfs->ns_sem); + if (features) { + printk(KERN_WARNING "NILFS (device %s): couldn't " + "remount RDWR because of unsupported optional " + "features (%llx)\n", + sb->s_id, (unsigned long long)features); + err = -EROFS; + goto restore_opts; + } + sb->s_flags &= ~MS_RDONLY; - err = nilfs_attach_segment_constructor(sbi); + root = NILFS_I(sb->s_root->d_inode)->i_root; + err = nilfs_attach_segment_constructor(sbi, root); if (err) goto restore_opts; down_write(&nilfs->ns_sem); - nilfs_setup_super(sbi); + nilfs_setup_super(sbi, true); up_write(&nilfs->ns_sem); } out: - up_write(&nilfs->ns_super_sem); - unlock_kernel(); return 0; restore_opts: sb->s_flags = old_sb_flags; sbi->s_mount_opt = old_opts.mount_opt; - sbi->s_snapshot_cno = old_opts.snapshot_cno; - up_write(&nilfs->ns_super_sem); - unlock_kernel(); return err; } @@ -933,7 +1095,7 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd) { char *p, *options = data; substring_t args[MAX_OPT_ARGS]; - int option, token; + int token; int ret = 0; do { @@ -941,16 +1103,18 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd) if (p != NULL && *p) { token = match_token(p, tokens, args); if (token == Opt_snapshot) { - if (!(sd->flags & MS_RDONLY)) + if (!(sd->flags & MS_RDONLY)) { ret++; - else { - ret = match_int(&args[0], &option); - if (!ret) { - if (option > 0) - sd->cno = option; - else - ret++; - } + } else { + sd->cno = simple_strtoull(args[0].from, + NULL, 0); + /* + * No need to see the end pointer; + * match_token() has done syntax + * checking. + */ + if (sd->cno == 0) + ret++; } } if (ret) @@ -967,43 +1131,33 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd) static int nilfs_set_bdev_super(struct super_block *s, void *data) { - struct nilfs_super_data *sd = data; - - s->s_bdev = sd->bdev; + s->s_bdev = data; s->s_dev = s->s_bdev->bd_dev; return 0; } static int nilfs_test_bdev_super(struct super_block *s, void *data) { - struct nilfs_super_data *sd = data; - - return sd->sbi && s->s_fs_info == (void *)sd->sbi; + return (void *)s->s_bdev == data; } -static int -nilfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry * +nilfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) { struct nilfs_super_data sd; struct super_block *s; fmode_t mode = FMODE_READ; - struct the_nilfs *nilfs; - int err, need_to_close = 1; + struct dentry *root_dentry; + int err, s_new = false; if (!(flags & MS_RDONLY)) mode |= FMODE_WRITE; sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type); if (IS_ERR(sd.bdev)) - return PTR_ERR(sd.bdev); + return ERR_CAST(sd.bdev); - /* - * To get mount instance using sget() vfs-routine, NILFS needs - * much more information than normal filesystems to identify mount - * instance. For snapshot mounts, not only a mount type (ro-mount - * or rw-mount) but also a checkpoint number is required. - */ sd.cno = 0; sd.flags = flags; if (nilfs_identify((char *)data, &sd)) { @@ -1011,101 +1165,91 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, goto failed; } - nilfs = find_or_create_nilfs(sd.bdev); - if (!nilfs) { - err = -ENOMEM; - goto failed; - } - - mutex_lock(&nilfs->ns_mount_mutex); - - if (!sd.cno) { - /* - * Check if an exclusive mount exists or not. - * Snapshot mounts coexist with a current mount - * (i.e. rw-mount or ro-mount), whereas rw-mount and - * ro-mount are mutually exclusive. - */ - down_read(&nilfs->ns_super_sem); - if (nilfs->ns_current && - ((nilfs->ns_current->s_super->s_flags ^ flags) - & MS_RDONLY)) { - up_read(&nilfs->ns_super_sem); - err = -EBUSY; - goto failed_unlock; - } - up_read(&nilfs->ns_super_sem); - } - /* - * Find existing nilfs_sb_info struct + * once the super is inserted into the list by sget, s_umount + * will protect the lockfs code from trying to start a snapshot + * while we are mounting */ - sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno); - - /* - * Get super block instance holding the nilfs_sb_info struct. - * A new instance is allocated if no existing mount is present or - * existing instance has been unmounted. - */ - s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); - if (sd.sbi) - nilfs_put_sbinfo(sd.sbi); - + mutex_lock(&sd.bdev->bd_fsfreeze_mutex); + if (sd.bdev->bd_fsfreeze_count > 0) { + mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); + err = -EBUSY; + goto failed; + } + s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, sd.bdev); + mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); if (IS_ERR(s)) { err = PTR_ERR(s); - goto failed_unlock; + goto failed; } if (!s->s_root) { char b[BDEVNAME_SIZE]; + s_new = true; + /* New superblock instance created */ s->s_flags = flags; s->s_mode = mode; strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(sd.bdev)); - err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0, - nilfs); + err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (err) - goto cancel_new; + goto failed_super; s->s_flags |= MS_ACTIVE; - need_to_close = 0; + } else if (!sd.cno) { + int busy = false; + + if (nilfs_tree_was_touched(s->s_root)) { + busy = nilfs_try_to_shrink_tree(s->s_root); + if (busy && (flags ^ s->s_flags) & MS_RDONLY) { + printk(KERN_ERR "NILFS: the device already " + "has a %s mount.\n", + (s->s_flags & MS_RDONLY) ? + "read-only" : "read/write"); + err = -EBUSY; + goto failed_super; + } + } + if (!busy) { + /* + * Try remount to setup mount states if the current + * tree is not mounted and only snapshots use this sb. + */ + err = nilfs_remount(s, &flags, data); + if (err) + goto failed_super; + } } - mutex_unlock(&nilfs->ns_mount_mutex); - put_nilfs(nilfs); - if (need_to_close) - close_bdev_exclusive(sd.bdev, mode); - simple_set_mnt(mnt, s); - return 0; + if (sd.cno) { + err = nilfs_attach_snapshot(s, sd.cno, &root_dentry); + if (err) + goto failed_super; + } else { + root_dentry = dget(s->s_root); + } - failed_unlock: - mutex_unlock(&nilfs->ns_mount_mutex); - put_nilfs(nilfs); - failed: - close_bdev_exclusive(sd.bdev, mode); + if (!s_new) + close_bdev_exclusive(sd.bdev, mode); - return err; + return root_dentry; - cancel_new: - /* Abandoning the newly allocated superblock */ - mutex_unlock(&nilfs->ns_mount_mutex); - put_nilfs(nilfs); + failed_super: deactivate_locked_super(s); - /* - * deactivate_locked_super() invokes close_bdev_exclusive(). - * We must finish all post-cleaning before this call; - * put_nilfs() needs the block device. - */ - return err; + + failed: + if (!s_new) + close_bdev_exclusive(sd.bdev, mode); + return ERR_PTR(err); } struct file_system_type nilfs_fs_type = { .owner = THIS_MODULE, .name = "nilfs2", - .get_sb = nilfs_get_sb, + .mount = nilfs_mount, .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV, }; @@ -1119,7 +1263,7 @@ static void nilfs_inode_init_once(void *obj) init_rwsem(&ii->xattr_sem); #endif nilfs_btnode_cache_init_once(&ii->i_btnode_cache); - ii->i_bmap = (struct nilfs_bmap *)&ii->i_bmap_union; + ii->i_bmap = &ii->i_bmap_data; inode_init_once(&ii->vfs_inode); } diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 8c10973..0254be2 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -35,8 +35,7 @@ #include "segbuf.h" -static LIST_HEAD(nilfs_objects); -static DEFINE_SPINLOCK(nilfs_lock); +static int nilfs_valid_sb(struct nilfs_super_block *sbp); void nilfs_set_last_segment(struct the_nilfs *nilfs, sector_t start_blocknr, u64 seq, __u64 cno) @@ -45,20 +44,27 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs, nilfs->ns_last_pseg = start_blocknr; nilfs->ns_last_seq = seq; nilfs->ns_last_cno = cno; + + if (!nilfs_sb_dirty(nilfs)) { + if (nilfs->ns_prev_seq == nilfs->ns_last_seq) + goto stay_cursor; + + set_nilfs_sb_dirty(nilfs); + } + nilfs->ns_prev_seq = nilfs->ns_last_seq; + + stay_cursor: spin_unlock(&nilfs->ns_last_segment_lock); } /** - * alloc_nilfs - allocate the_nilfs structure + * alloc_nilfs - allocate a nilfs object * @bdev: block device to which the_nilfs is related * - * alloc_nilfs() allocates memory for the_nilfs and - * initializes its reference count and locks. - * * Return Value: On success, pointer to the_nilfs is returned. * On error, NULL is returned. */ -static struct the_nilfs *alloc_nilfs(struct block_device *bdev) +struct the_nilfs *alloc_nilfs(struct block_device *bdev) { struct the_nilfs *nilfs; @@ -67,92 +73,25 @@ static struct the_nilfs *alloc_nilfs(struct block_device *bdev) return NULL; nilfs->ns_bdev = bdev; - atomic_set(&nilfs->ns_count, 1); atomic_set(&nilfs->ns_ndirtyblks, 0); init_rwsem(&nilfs->ns_sem); - init_rwsem(&nilfs->ns_super_sem); - mutex_init(&nilfs->ns_mount_mutex); - init_rwsem(&nilfs->ns_writer_sem); - INIT_LIST_HEAD(&nilfs->ns_list); - INIT_LIST_HEAD(&nilfs->ns_supers); + INIT_LIST_HEAD(&nilfs->ns_gc_inodes); spin_lock_init(&nilfs->ns_last_segment_lock); - nilfs->ns_gc_inodes_h = NULL; + nilfs->ns_cptree = RB_ROOT; + spin_lock_init(&nilfs->ns_cptree_lock); init_rwsem(&nilfs->ns_segctor_sem); return nilfs; } /** - * find_or_create_nilfs - find or create nilfs object - * @bdev: block device to which the_nilfs is related - * - * find_nilfs() looks up an existent nilfs object created on the - * device and gets the reference count of the object. If no nilfs object - * is found on the device, a new nilfs object is allocated. - * - * Return Value: On success, pointer to the nilfs object is returned. - * On error, NULL is returned. + * destroy_nilfs - destroy nilfs object + * @nilfs: nilfs object to be released */ -struct the_nilfs *find_or_create_nilfs(struct block_device *bdev) +void destroy_nilfs(struct the_nilfs *nilfs) { - struct the_nilfs *nilfs, *new = NULL; - - retry: - spin_lock(&nilfs_lock); - list_for_each_entry(nilfs, &nilfs_objects, ns_list) { - if (nilfs->ns_bdev == bdev) { - get_nilfs(nilfs); - spin_unlock(&nilfs_lock); - if (new) - put_nilfs(new); - return nilfs; /* existing object */ - } - } - if (new) { - list_add_tail(&new->ns_list, &nilfs_objects); - spin_unlock(&nilfs_lock); - return new; /* new object */ - } - spin_unlock(&nilfs_lock); - - new = alloc_nilfs(bdev); - if (new) - goto retry; - return NULL; /* insufficient memory */ -} - -/** - * put_nilfs - release a reference to the_nilfs - * @nilfs: the_nilfs structure to be released - * - * put_nilfs() decrements a reference counter of the_nilfs. - * If the reference count reaches zero, the_nilfs is freed. - */ -void put_nilfs(struct the_nilfs *nilfs) -{ - spin_lock(&nilfs_lock); - if (!atomic_dec_and_test(&nilfs->ns_count)) { - spin_unlock(&nilfs_lock); - return; - } - list_del_init(&nilfs->ns_list); - spin_unlock(&nilfs_lock); - - /* - * Increment of ns_count never occurs below because the caller - * of get_nilfs() holds at least one reference to the_nilfs. - * Thus its exclusion control is not required here. - */ - might_sleep(); - if (nilfs_loaded(nilfs)) { - nilfs_mdt_destroy(nilfs->ns_sufile); - nilfs_mdt_destroy(nilfs->ns_cpfile); - nilfs_mdt_destroy(nilfs->ns_dat); - nilfs_mdt_destroy(nilfs->ns_gc_dat); - } if (nilfs_init(nilfs)) { - nilfs_destroy_gccache(nilfs); brelse(nilfs->ns_sbh[0]); brelse(nilfs->ns_sbh[1]); } @@ -160,16 +99,17 @@ void put_nilfs(struct the_nilfs *nilfs) } static int nilfs_load_super_root(struct the_nilfs *nilfs, - struct nilfs_sb_info *sbi, sector_t sr_block) + struct super_block *sb, sector_t sr_block) { struct buffer_head *bh_sr; struct nilfs_super_root *raw_sr; struct nilfs_super_block **sbp = nilfs->ns_sbp; + struct nilfs_inode *rawi; unsigned dat_entry_size, segment_usage_size, checkpoint_size; unsigned inode_size; int err; - err = nilfs_read_super_root_block(sbi->s_super, sr_block, &bh_sr, 1); + err = nilfs_read_super_root_block(nilfs, sr_block, &bh_sr, 1); if (unlikely(err)) return err; @@ -181,40 +121,22 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, inode_size = nilfs->ns_inode_size; - err = -ENOMEM; - nilfs->ns_dat = nilfs_dat_new(nilfs, dat_entry_size); - if (unlikely(!nilfs->ns_dat)) + rawi = (void *)bh_sr->b_data + NILFS_SR_DAT_OFFSET(inode_size); + err = nilfs_dat_read(sb, dat_entry_size, rawi, &nilfs->ns_dat); + if (err) goto failed; - nilfs->ns_gc_dat = nilfs_dat_new(nilfs, dat_entry_size); - if (unlikely(!nilfs->ns_gc_dat)) + rawi = (void *)bh_sr->b_data + NILFS_SR_CPFILE_OFFSET(inode_size); + err = nilfs_cpfile_read(sb, checkpoint_size, rawi, &nilfs->ns_cpfile); + if (err) goto failed_dat; - nilfs->ns_cpfile = nilfs_cpfile_new(nilfs, checkpoint_size); - if (unlikely(!nilfs->ns_cpfile)) - goto failed_gc_dat; - - nilfs->ns_sufile = nilfs_sufile_new(nilfs, segment_usage_size); - if (unlikely(!nilfs->ns_sufile)) + rawi = (void *)bh_sr->b_data + NILFS_SR_SUFILE_OFFSET(inode_size); + err = nilfs_sufile_read(sb, segment_usage_size, rawi, + &nilfs->ns_sufile); + if (err) goto failed_cpfile; - nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); - - err = nilfs_dat_read(nilfs->ns_dat, (void *)bh_sr->b_data + - NILFS_SR_DAT_OFFSET(inode_size)); - if (unlikely(err)) - goto failed_sufile; - - err = nilfs_cpfile_read(nilfs->ns_cpfile, (void *)bh_sr->b_data + - NILFS_SR_CPFILE_OFFSET(inode_size)); - if (unlikely(err)) - goto failed_sufile; - - err = nilfs_sufile_read(nilfs->ns_sufile, (void *)bh_sr->b_data + - NILFS_SR_SUFILE_OFFSET(inode_size)); - if (unlikely(err)) - goto failed_sufile; - raw_sr = (struct nilfs_super_root *)bh_sr->b_data; nilfs->ns_nongc_ctime = le64_to_cpu(raw_sr->sr_nongc_ctime); @@ -222,17 +144,11 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, brelse(bh_sr); return err; - failed_sufile: - nilfs_mdt_destroy(nilfs->ns_sufile); - failed_cpfile: - nilfs_mdt_destroy(nilfs->ns_cpfile); - - failed_gc_dat: - nilfs_mdt_destroy(nilfs->ns_gc_dat); + iput(nilfs->ns_cpfile); failed_dat: - nilfs_mdt_destroy(nilfs->ns_dat); + iput(nilfs->ns_dat); goto failed; } @@ -248,6 +164,37 @@ static void nilfs_clear_recovery_info(struct nilfs_recovery_info *ri) } /** + * nilfs_store_log_cursor - load log cursor from a super block + * @nilfs: nilfs object + * @sbp: buffer storing super block to be read + * + * nilfs_store_log_cursor() reads the last position of the log + * containing a super root from a given super block, and initializes + * relevant information on the nilfs object preparatory for log + * scanning and recovery. + */ +static int nilfs_store_log_cursor(struct the_nilfs *nilfs, + struct nilfs_super_block *sbp) +{ + int ret = 0; + + nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg); + nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno); + nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq); + + nilfs->ns_prev_seq = nilfs->ns_last_seq; + nilfs->ns_seg_seq = nilfs->ns_last_seq; + nilfs->ns_segnum = + nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg); + nilfs->ns_cno = nilfs->ns_last_cno + 1; + if (nilfs->ns_segnum >= nilfs->ns_nsegments) { + printk(KERN_ERR "NILFS invalid last segment number.\n"); + ret = -EINVAL; + } + return ret; +} + +/** * load_nilfs - load and recover the nilfs * @nilfs: the_nilfs structure to be released * @sbi: nilfs_sb_info used to recover past segment @@ -264,15 +211,6 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) int valid_fs = nilfs_valid_fs(nilfs); int err; - if (nilfs_loaded(nilfs)) { - if (valid_fs || - ((s_flags & MS_RDONLY) && nilfs_test_opt(sbi, NORECOVERY))) - return 0; - printk(KERN_ERR "NILFS: the filesystem is in an incomplete " - "recovery state.\n"); - return -EINVAL; - } - if (!valid_fs) { printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); if (s_flags & MS_RDONLY) { @@ -285,13 +223,55 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) nilfs_init_recovery_info(&ri); - err = nilfs_search_super_root(nilfs, sbi, &ri); + err = nilfs_search_super_root(nilfs, &ri); if (unlikely(err)) { - printk(KERN_ERR "NILFS: error searching super root.\n"); - goto failed; + struct nilfs_super_block **sbp = nilfs->ns_sbp; + int blocksize; + + if (err != -EINVAL) + goto scan_error; + + if (!nilfs_valid_sb(sbp[1])) { + printk(KERN_WARNING + "NILFS warning: unable to fall back to spare" + "super block\n"); + goto scan_error; + } + printk(KERN_INFO + "NILFS: try rollback from an earlier position\n"); + + /* + * restore super block with its spare and reconfigure + * relevant states of the nilfs object. + */ + memcpy(sbp[0], sbp[1], nilfs->ns_sbsize); + nilfs->ns_crc_seed = le32_to_cpu(sbp[0]->s_crc_seed); + nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); + + /* verify consistency between two super blocks */ + blocksize = BLOCK_SIZE << le32_to_cpu(sbp[0]->s_log_block_size); + if (blocksize != nilfs->ns_blocksize) { + printk(KERN_WARNING + "NILFS warning: blocksize differs between " + "two super blocks (%d != %d)\n", + blocksize, nilfs->ns_blocksize); + goto scan_error; + } + + err = nilfs_store_log_cursor(nilfs, sbp[0]); + if (err) + goto scan_error; + + /* drop clean flag to allow roll-forward and recovery */ + nilfs->ns_mount_state &= ~NILFS_VALID_FS; + valid_fs = 0; + + err = nilfs_search_super_root(nilfs, &ri); + if (err) + goto scan_error; } - err = nilfs_load_super_root(nilfs, sbi, ri.ri_super_root); + err = nilfs_load_super_root(nilfs, sbi->s_super, ri.ri_super_root); if (unlikely(err)) { printk(KERN_ERR "NILFS: error loading super root.\n"); goto failed; @@ -301,11 +281,23 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) goto skip_recovery; if (s_flags & MS_RDONLY) { + __u64 features; + if (nilfs_test_opt(sbi, NORECOVERY)) { printk(KERN_INFO "NILFS: norecovery option specified. " "skipping roll-forward recovery\n"); goto skip_recovery; } + features = le64_to_cpu(nilfs->ns_sbp[0]->s_feature_compat_ro) & + ~NILFS_FEATURE_COMPAT_RO_SUPP; + if (features) { + printk(KERN_ERR "NILFS: couldn't proceed with " + "recovery because of unsupported optional " + "features (%llx)\n", + (unsigned long long)features); + err = -EROFS; + goto failed_unload; + } if (really_read_only) { printk(KERN_ERR "NILFS: write access " "unavailable, cannot proceed.\n"); @@ -320,14 +312,13 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) goto failed_unload; } - err = nilfs_recover_logical_segments(nilfs, sbi, &ri); + err = nilfs_salvage_orphan_logs(nilfs, sbi, &ri); if (err) goto failed_unload; down_write(&nilfs->ns_sem); - nilfs->ns_mount_state |= NILFS_VALID_FS; - nilfs->ns_sbp[0]->s_state = cpu_to_le16(nilfs->ns_mount_state); - err = nilfs_commit_super(sbi, 1); + nilfs->ns_mount_state |= NILFS_VALID_FS; /* set "clean" flag */ + err = nilfs_cleanup_super(sbi); up_write(&nilfs->ns_sem); if (err) { @@ -343,10 +334,14 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) sbi->s_super->s_flags = s_flags; return 0; + scan_error: + printk(KERN_ERR "NILFS: error searching super root.\n"); + goto failed; + failed_unload: - nilfs_mdt_destroy(nilfs->ns_cpfile); - nilfs_mdt_destroy(nilfs->ns_sufile); - nilfs_mdt_destroy(nilfs->ns_dat); + iput(nilfs->ns_cpfile); + iput(nilfs->ns_sufile); + iput(nilfs->ns_dat); failed: nilfs_clear_recovery_info(&ri); @@ -368,8 +363,8 @@ static unsigned long long nilfs_max_size(unsigned int blkbits) static int nilfs_store_disk_layout(struct the_nilfs *nilfs, struct nilfs_super_block *sbp) { - if (le32_to_cpu(sbp->s_rev_level) != NILFS_CURRENT_REV) { - printk(KERN_ERR "NILFS: revision mismatch " + if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) { + printk(KERN_ERR "NILFS: unsupported revision " "(superblock rev.=%d.%d, current rev.=%d.%d). " "Please check the version of mkfs.nilfs.\n", le32_to_cpu(sbp->s_rev_level), @@ -509,14 +504,14 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, return -EINVAL; } - if (swp) { + if (!valid[!swp]) printk(KERN_WARNING "NILFS warning: broken superblock. " "using spare superblock.\n"); + if (swp) nilfs_swap_super_block(nilfs); - } - nilfs->ns_sbwtime[0] = le64_to_cpu(sbp[0]->s_wtime); - nilfs->ns_sbwtime[1] = valid[!swp] ? le64_to_cpu(sbp[1]->s_wtime) : 0; + nilfs->ns_sbwcount = 0; + nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); nilfs->ns_prot_seq = le64_to_cpu(sbp[valid[1] & !swp]->s_last_seq); *sbpp = sbp[0]; return 0; @@ -531,12 +526,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, * * init_nilfs() performs common initialization per block device (e.g. * reading the super block, getting disk layout information, initializing - * shared fields in the_nilfs). It takes on some portion of the jobs - * typically done by a fill_super() routine. This division arises from - * the nature that multiple NILFS instances may be simultaneously - * mounted on a device. - * For multiple mounts on the same device, only the first mount - * invokes these tasks. + * shared fields in the_nilfs). * * Return Value: On success, 0 is returned. On error, a negative error * code is returned. @@ -545,30 +535,12 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) { struct super_block *sb = sbi->s_super; struct nilfs_super_block *sbp; - struct backing_dev_info *bdi; int blocksize; int err; down_write(&nilfs->ns_sem); - if (nilfs_init(nilfs)) { - /* Load values from existing the_nilfs */ - sbp = nilfs->ns_sbp[0]; - err = nilfs_store_magic_and_option(sb, sbp, data); - if (err) - goto out; - - blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); - if (sb->s_blocksize != blocksize && - !sb_set_blocksize(sb, blocksize)) { - printk(KERN_ERR "NILFS: blocksize %d unfit to device\n", - blocksize); - err = -EINVAL; - } - sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); - goto out; - } - blocksize = sb_min_blocksize(sb, BLOCK_SIZE); + blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); if (!blocksize) { printk(KERN_ERR "NILFS: unable to set blocksize\n"); err = -EINVAL; @@ -582,7 +554,18 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) if (err) goto failed_sbh; + err = nilfs_check_feature_compatibility(sb, sbp); + if (err) + goto failed_sbh; + blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); + if (blocksize < NILFS_MIN_BLOCK_SIZE || + blocksize > NILFS_MAX_BLOCK_SIZE) { + printk(KERN_ERR "NILFS: couldn't mount because of unsupported " + "filesystem blocksize %d\n", blocksize); + err = -EINVAL; + goto failed_sbh; + } if (sb->s_blocksize != blocksize) { int hw_blocksize = bdev_logical_block_size(sb->s_bdev); @@ -604,6 +587,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) when reloading fails. */ } nilfs->ns_blocksize_bits = sb->s_blocksize_bits; + nilfs->ns_blocksize = blocksize; err = nilfs_store_disk_layout(nilfs, sbp); if (err) @@ -613,29 +597,7 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); - bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; - nilfs->ns_bdi = bdi ? : &default_backing_dev_info; - - /* Finding last segment */ - nilfs->ns_last_pseg = le64_to_cpu(sbp->s_last_pseg); - nilfs->ns_last_cno = le64_to_cpu(sbp->s_last_cno); - nilfs->ns_last_seq = le64_to_cpu(sbp->s_last_seq); - - nilfs->ns_seg_seq = nilfs->ns_last_seq; - nilfs->ns_segnum = - nilfs_get_segnum_of_block(nilfs, nilfs->ns_last_pseg); - nilfs->ns_cno = nilfs->ns_last_cno + 1; - if (nilfs->ns_segnum >= nilfs->ns_nsegments) { - printk(KERN_ERR "NILFS invalid last segment number.\n"); - err = -EINVAL; - goto failed_sbh; - } - /* Dummy values */ - nilfs->ns_free_segments_count = - nilfs->ns_nsegments - (nilfs->ns_segnum + 1); - - /* Initialize gcinode cache */ - err = nilfs_init_gccache(nilfs); + err = nilfs_store_log_cursor(nilfs, sbp); if (err) goto failed_sbh; @@ -673,8 +635,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, - BLKDEV_IFL_BARRIER); + GFP_NOFS, 0); if (ret < 0) return ret; nblocks = 0; @@ -684,7 +645,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, BLKDEV_IFL_BARRIER); + GFP_NOFS, 0); return ret; } @@ -711,79 +672,92 @@ int nilfs_near_disk_full(struct the_nilfs *nilfs) return ncleansegs <= nilfs->ns_nrsvsegs + nincsegs; } -/** - * nilfs_find_sbinfo - find existing nilfs_sb_info structure - * @nilfs: nilfs object - * @rw_mount: mount type (non-zero value for read/write mount) - * @cno: checkpoint number (zero for read-only mount) - * - * nilfs_find_sbinfo() returns the nilfs_sb_info structure which - * @rw_mount and @cno (in case of snapshots) matched. If no instance - * was found, NULL is returned. Although the super block instance can - * be unmounted after this function returns, the nilfs_sb_info struct - * is kept on memory until nilfs_put_sbinfo() is called. - */ -struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *nilfs, - int rw_mount, __u64 cno) +struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno) { - struct nilfs_sb_info *sbi; - - down_read(&nilfs->ns_super_sem); - /* - * The SNAPSHOT flag and sb->s_flags are supposed to be - * protected with nilfs->ns_super_sem. - */ - sbi = nilfs->ns_current; - if (rw_mount) { - if (sbi && !(sbi->s_super->s_flags & MS_RDONLY)) - goto found; /* read/write mount */ - else - goto out; - } else if (cno == 0) { - if (sbi && (sbi->s_super->s_flags & MS_RDONLY)) - goto found; /* read-only mount */ - else - goto out; + struct rb_node *n; + struct nilfs_root *root; + + spin_lock(&nilfs->ns_cptree_lock); + n = nilfs->ns_cptree.rb_node; + while (n) { + root = rb_entry(n, struct nilfs_root, rb_node); + + if (cno < root->cno) { + n = n->rb_left; + } else if (cno > root->cno) { + n = n->rb_right; + } else { + atomic_inc(&root->count); + spin_unlock(&nilfs->ns_cptree_lock); + return root; + } } + spin_unlock(&nilfs->ns_cptree_lock); - list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { - if (nilfs_test_opt(sbi, SNAPSHOT) && - sbi->s_snapshot_cno == cno) - goto found; /* snapshot mount */ - } - out: - up_read(&nilfs->ns_super_sem); return NULL; - - found: - atomic_inc(&sbi->s_count); - up_read(&nilfs->ns_super_sem); - return sbi; } -int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, - int snapshot_mount) +struct nilfs_root * +nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) { - struct nilfs_sb_info *sbi; - int ret = 0; + struct rb_node **p, *parent; + struct nilfs_root *root, *new; + + root = nilfs_lookup_root(nilfs, cno); + if (root) + return root; + + new = kmalloc(sizeof(*root), GFP_KERNEL); + if (!new) + return NULL; + + spin_lock(&nilfs->ns_cptree_lock); - down_read(&nilfs->ns_super_sem); - if (cno == 0 || cno > nilfs->ns_cno) - goto out_unlock; + p = &nilfs->ns_cptree.rb_node; + parent = NULL; - list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { - if (sbi->s_snapshot_cno == cno && - (!snapshot_mount || nilfs_test_opt(sbi, SNAPSHOT))) { - /* exclude read-only mounts */ - ret++; - break; + while (*p) { + parent = *p; + root = rb_entry(parent, struct nilfs_root, rb_node); + + if (cno < root->cno) { + p = &(*p)->rb_left; + } else if (cno > root->cno) { + p = &(*p)->rb_right; + } else { + atomic_inc(&root->count); + spin_unlock(&nilfs->ns_cptree_lock); + kfree(new); + return root; } } - /* for protecting recent checkpoints */ - if (cno >= nilfs_last_cno(nilfs)) - ret++; - out_unlock: - up_read(&nilfs->ns_super_sem); - return ret; + new->cno = cno; + new->ifile = NULL; + new->nilfs = nilfs; + atomic_set(&new->count, 1); + atomic_set(&new->inodes_count, 0); + atomic_set(&new->blocks_count, 0); + + rb_link_node(&new->rb_node, parent, p); + rb_insert_color(&new->rb_node, &nilfs->ns_cptree); + + spin_unlock(&nilfs->ns_cptree_lock); + + return new; +} + +void nilfs_put_root(struct nilfs_root *root) +{ + if (atomic_dec_and_test(&root->count)) { + struct the_nilfs *nilfs = root->nilfs; + + spin_lock(&nilfs->ns_cptree_lock); + rb_erase(&root->rb_node, &nilfs->ns_cptree); + spin_unlock(&nilfs->ns_cptree_lock); + if (root->ifile) + iput(root->ifile); + + kfree(root); + } } diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index 1ab9745..69226e1 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h @@ -26,6 +26,7 @@ #include <linux/types.h> #include <linux/buffer_head.h> +#include <linux/rbtree.h> #include <linux/fs.h> #include <linux/blkdev.h> #include <linux/backing-dev.h> @@ -45,21 +46,13 @@ enum { /** * struct the_nilfs - struct to supervise multiple nilfs mount points * @ns_flags: flags - * @ns_count: reference count - * @ns_list: list head for nilfs_list * @ns_bdev: block device - * @ns_bdi: backing dev info - * @ns_writer: back pointer to writable nilfs_sb_info * @ns_sem: semaphore for shared states - * @ns_super_sem: semaphore for global operations across super block instances - * @ns_mount_mutex: mutex protecting mount process of nilfs - * @ns_writer_sem: semaphore protecting ns_writer attach/detach - * @ns_current: back pointer to current mount * @ns_sbh: buffer heads of on-disk super blocks * @ns_sbp: pointers to super block data - * @ns_sbwtime: previous write time of super blocks + * @ns_sbwtime: previous write time of super block + * @ns_sbwcount: write count of super block * @ns_sbsize: size of valid data in super block - * @ns_supers: list of nilfs super block structs * @ns_seg_seq: segment sequence counter * @ns_segnum: index number of the latest full segment. * @ns_nextnum: index number of the full segment index to be used next @@ -73,15 +66,16 @@ enum { * @ns_last_seq: sequence value of the latest segment * @ns_last_cno: checkpoint number of the latest segment * @ns_prot_seq: least sequence number of segments which must not be reclaimed - * @ns_free_segments_count: counter of free segments + * @ns_prev_seq: base sequence number used to decide if advance log cursor * @ns_segctor_sem: segment constructor semaphore * @ns_dat: DAT file inode * @ns_cpfile: checkpoint file inode * @ns_sufile: segusage file inode - * @ns_gc_dat: shadow inode of the DAT file inode for GC + * @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root) + * @ns_cptree_lock: lock protecting @ns_cptree * @ns_gc_inodes: dummy inodes to keep live blocks - * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks * @ns_blocksize_bits: bit length of block size + * @ns_blocksize: block size * @ns_nsegments: number of segments in filesystem * @ns_blocks_per_segment: number of blocks per segment * @ns_r_segments_percentage: reserved segments percentage @@ -93,22 +87,9 @@ enum { */ struct the_nilfs { unsigned long ns_flags; - atomic_t ns_count; - struct list_head ns_list; struct block_device *ns_bdev; - struct backing_dev_info *ns_bdi; - struct nilfs_sb_info *ns_writer; struct rw_semaphore ns_sem; - struct rw_semaphore ns_super_sem; - struct mutex ns_mount_mutex; - struct rw_semaphore ns_writer_sem; - - /* - * components protected by ns_super_sem - */ - struct nilfs_sb_info *ns_current; - struct list_head ns_supers; /* * used for @@ -119,7 +100,8 @@ struct the_nilfs { */ struct buffer_head *ns_sbh[2]; struct nilfs_super_block *ns_sbp[2]; - time_t ns_sbwtime[2]; + time_t ns_sbwtime; + unsigned ns_sbwcount; unsigned ns_sbsize; unsigned ns_mount_state; @@ -149,7 +131,7 @@ struct the_nilfs { u64 ns_last_seq; __u64 ns_last_cno; u64 ns_prot_seq; - unsigned long ns_free_segments_count; + u64 ns_prev_seq; struct rw_semaphore ns_segctor_sem; @@ -160,14 +142,17 @@ struct the_nilfs { struct inode *ns_dat; struct inode *ns_cpfile; struct inode *ns_sufile; - struct inode *ns_gc_dat; - /* GC inode list and hash table head */ + /* Checkpoint tree */ + struct rb_root ns_cptree; + spinlock_t ns_cptree_lock; + + /* GC inode list */ struct list_head ns_gc_inodes; - struct hlist_head *ns_gc_inodes_h; /* Disk layout information (static) */ unsigned int ns_blocksize_bits; + unsigned int ns_blocksize; unsigned long ns_nsegments; unsigned long ns_blocks_per_segment; unsigned long ns_r_segments_percentage; @@ -178,9 +163,6 @@ struct the_nilfs { u32 ns_crc_seed; }; -#define NILFS_GCINODE_HASH_BITS 8 -#define NILFS_GCINODE_HASH_SIZE (1<<NILFS_GCINODE_HASH_BITS) - #define THE_NILFS_FNS(bit, name) \ static inline void set_nilfs_##name(struct the_nilfs *nilfs) \ { \ @@ -201,65 +183,67 @@ THE_NILFS_FNS(DISCONTINUED, discontinued) THE_NILFS_FNS(GC_RUNNING, gc_running) THE_NILFS_FNS(SB_DIRTY, sb_dirty) +/** + * struct nilfs_root - nilfs root object + * @cno: checkpoint number + * @rb_node: red-black tree node + * @count: refcount of this structure + * @nilfs: nilfs object + * @ifile: inode file + * @root: root inode + * @inodes_count: number of inodes + * @blocks_count: number of blocks (Reserved) + */ +struct nilfs_root { + __u64 cno; + struct rb_node rb_node; + + atomic_t count; + struct the_nilfs *nilfs; + struct inode *ifile; + + atomic_t inodes_count; + atomic_t blocks_count; +}; + +/* Special checkpoint number */ +#define NILFS_CPTREE_CURRENT_CNO 0 + /* Minimum interval of periodical update of superblocks (in seconds) */ #define NILFS_SB_FREQ 10 -#define NILFS_ALTSB_FREQ 60 /* spare superblock */ static inline int nilfs_sb_need_update(struct the_nilfs *nilfs) { u64 t = get_seconds(); - return t < nilfs->ns_sbwtime[0] || - t > nilfs->ns_sbwtime[0] + NILFS_SB_FREQ; + return t < nilfs->ns_sbwtime || t > nilfs->ns_sbwtime + NILFS_SB_FREQ; } -static inline int nilfs_altsb_need_update(struct the_nilfs *nilfs) +static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs) { - u64 t = get_seconds(); - struct nilfs_super_block **sbp = nilfs->ns_sbp; - return sbp[1] && t > nilfs->ns_sbwtime[1] + NILFS_ALTSB_FREQ; + int flip_bits = nilfs->ns_sbwcount & 0x0FL; + return (flip_bits != 0x08 && flip_bits != 0x0F); } void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); -struct the_nilfs *find_or_create_nilfs(struct block_device *); -void put_nilfs(struct the_nilfs *); +struct the_nilfs *alloc_nilfs(struct block_device *bdev); +void destroy_nilfs(struct the_nilfs *nilfs); int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); +struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); +struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs, + __u64 cno); +void nilfs_put_root(struct nilfs_root *root); struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64); -int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); int nilfs_near_disk_full(struct the_nilfs *); void nilfs_fall_back_super_block(struct the_nilfs *); void nilfs_swap_super_block(struct the_nilfs *); -static inline void get_nilfs(struct the_nilfs *nilfs) -{ - /* Caller must have at least one reference of the_nilfs. */ - atomic_inc(&nilfs->ns_count); -} - -static inline void -nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) -{ - down_write(&nilfs->ns_writer_sem); - nilfs->ns_writer = sbi; - up_write(&nilfs->ns_writer_sem); -} - -static inline void -nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) -{ - down_write(&nilfs->ns_writer_sem); - if (sbi == nilfs->ns_writer) - nilfs->ns_writer = NULL; - up_write(&nilfs->ns_writer_sem); -} - -static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) +static inline void nilfs_get_root(struct nilfs_root *root) { - if (atomic_dec_and_test(&sbi->s_count)) - kfree(sbi); + atomic_inc(&root->count); } static inline int nilfs_valid_fs(struct the_nilfs *nilfs) |