From 65e5cd0a151d53d3d79ef4d81783d1dbc01d4b61 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 14 May 2013 15:47:43 +0900 Subject: f2fs: fix inconsistency of block count during recovery Currently f2fs recovers the dentry of fsynced files. When power-off-recovery is conducted, this newly recovered inode should increase node block count as well as inode block count. This patch resolves this inconsistency that results in: 1. create a file 2. write data 3. fsync 4. reboot without sync 5. mount and recover the file 6. node block count is 1 and inode block count is 2 : fall into the inconsistent state 7. unlink the file : trigger the following BUG_ON ------------[ cut here ]------------ kernel BUG at /home/zeus/f2fs_test/src/fs/f2fs/f2fs.h:716! Call Trace: [] ? get_node_page+0x50/0x1a0 [f2fs] [] remove_inode_page+0x8c/0x100 [f2fs] [] ? f2fs_evict_inode+0x180/0x2d0 [f2fs] [] f2fs_evict_inode+0x1be/0x2d0 [f2fs] [] evict+0xa7/0x1a0 [] iput+0x105/0x190 [] d_kill+0xe0/0x120 [] dput+0xe7/0x1e0 [] __fput+0x19d/0x2d0 [] ____fput+0xe/0x10 [] task_work_run+0xb5/0xe0 [] do_notify_resume+0x71/0xb0 [] int_signal+0x12/0x17 Reported-and-Tested-by: Chris Fries Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3df43b4..9641534 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1492,6 +1492,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) new_ni = old_ni; new_ni.ino = ino; + if (!inc_valid_node_count(sbi, NULL, 1)) + WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR); inc_valid_inode_count(sbi); -- cgit v0.10.2 From 650495dedc34daf8590c708a5b48f82ed2787b75 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 May 2013 08:38:35 +0900 Subject: f2fs: fix the inconsistent state of data pages In get_lock_data_page, if there is a data race between get_dnode_of_data for node and grab_cache_page for data, f2fs is able to face with the following BUG_ON(dn.data_blkaddr == NEW_ADDR). kernel BUG at /home/zeus/f2fs_test/src/fs/f2fs/data.c:251! [] get_lock_data_page+0x1ec/0x210 [f2fs] Call Trace: [] f2fs_readdir+0x89/0x210 [f2fs] [] ? fillonedir+0x100/0x100 [] ? fillonedir+0x100/0x100 [] vfs_readdir+0xb8/0xe0 [] sys_getdents+0x8f/0x110 [] system_call_fastpath+0x16/0x1b This bug is able to be occurred when the block address of the data block is changed after f2fs_put_dnode(). In order to avoid that, this patch fixes the lock order of node and data blocks in which the node block lock is covered by the data block lock. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 91ff93b..05fb5c6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -233,18 +233,23 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) struct page *page; int err; +repeat: + page = grab_cache_page(mapping, index); + if (!page) + return ERR_PTR(-ENOMEM); + set_new_dnode(&dn, inode, NULL, NULL, 0); err = get_dnode_of_data(&dn, index, LOOKUP_NODE); - if (err) + if (err) { + f2fs_put_page(page, 1); return ERR_PTR(err); + } f2fs_put_dnode(&dn); - if (dn.data_blkaddr == NULL_ADDR) + if (dn.data_blkaddr == NULL_ADDR) { + f2fs_put_page(page, 1); return ERR_PTR(-ENOENT); -repeat: - page = grab_cache_page(mapping, index); - if (!page) - return ERR_PTR(-ENOMEM); + } if (PageUptodate(page)) return page; -- cgit v0.10.2 From addbe45b005d73f876d55bcfc16f4a6ce52a55e3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 May 2013 10:49:13 +0900 Subject: f2fs: remove redundant assignment We don't need to assign a value redundantly. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 60c8a50..2941987 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -126,7 +126,6 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) entry = get_fsync_inode(head, ino_of_node(page)); if (entry) { - entry->blkaddr = blkaddr; if (IS_INODE(page) && is_dent_dnode(page)) set_inode_flag(F2FS_I(entry->inode), FI_INC_LINK); @@ -150,10 +149,10 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) kmem_cache_free(fsync_entry_slab, entry); goto unlock_out; } - list_add_tail(&entry->list, head); - entry->blkaddr = blkaddr; } + entry->blkaddr = blkaddr; + if (IS_INODE(page)) { err = recover_inode(entry->inode, page); if (err == -ENOENT) { -- cgit v0.10.2 From 8c26d7d5717adf7f06d98c4416852d09566edd7c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 May 2013 16:12:18 +0900 Subject: f2fs: fix por_doing variable coverage The reason of using sbi->por_doing is to alleviate data writes during the recovery. The find_fsync_dnodes() produces some dirty dentry pages, so we should cover it too with sbi->por_doing. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2941987..4d89514 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -381,6 +381,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&inode_list); /* step #1: find fsynced inode numbers */ + sbi->por_doing = 1; err = find_fsync_dnodes(sbi, &inode_list); if (err) goto out; @@ -389,13 +390,12 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) goto out; /* step #2: recover data */ - sbi->por_doing = 1; err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); - sbi->por_doing = 0; BUG_ON(!list_empty(&inode_list)); out: destroy_fsync_dnodes(sbi, &inode_list); kmem_cache_destroy(fsync_entry_slab); + sbi->por_doing = 0; write_checkpoint(sbi, false); return err; } -- cgit v0.10.2 From 74d0b917ef7789097e12d60fc054efa427ce9171 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 May 2013 16:40:02 +0900 Subject: f2fs: fix BUG_ON during f2fs_evict_inode(dir) During the dentry recovery routine, recover_inode() triggers __f2fs_add_link with its directory inode. In the following scenario, a bug is captured. 1. dir = f2fs_iget(pino) 2. __f2fs_add_link(dir, name) 3. iput(dir) -> f2fs_evict_inode() faces with BUG_ON(atomic_read(fi->dirty_dents)) Kernel BUG at ffffffffa01c0676 [verbose debug info unavailable] [] f2fs_evict_inode+0x276/0x300 [f2fs] Call Trace: [] evict+0xb0/0x1b0 [] iput+0x105/0x190 [] recover_fsync_data+0x3bc/0x1070 [f2fs] [] ? io_schedule+0xaa/0xd0 [] ? __wait_on_bit_lock+0x7b/0xc0 [] ? __lock_page+0x67/0x70 [] ? kmem_cache_alloc+0x31/0x140 [] ? __d_instantiate+0x92/0xf0 [] ? security_d_instantiate+0x1b/0x30 [] ? d_instantiate+0x54/0x70 This means that we should flush all the dentry pages between iget and iput(). But, during the recovery routine, it is unallowed due to consistency, so we have to wait the whole recovery process. And then, write_checkpoint flushes all the dirty dentry blocks, and nicely we can put the stale dir inodes from the dirty_dir_inode_list. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b1de01d..3d11449 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -514,6 +514,29 @@ void remove_dirty_dir_inode(struct inode *inode) } out: spin_unlock(&sbi->dir_inode_lock); + + /* Only from the recovery routine */ + if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) + iput(inode); +} + +struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct list_head *head = &sbi->dir_inode_list; + struct list_head *this; + struct inode *inode = NULL; + + spin_lock(&sbi->dir_inode_lock); + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode->i_ino == ino) { + inode = entry->inode; + break; + } + } + spin_unlock(&sbi->dir_inode_lock); + return inode; } void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 20aab02..ef6cac8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -846,6 +846,7 @@ enum { FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ + FI_DELAY_IPUT, /* used for the recovery */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1012,6 +1013,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *); int get_valid_checkpoint(struct f2fs_sb_info *); void set_dirty_dir_page(struct inode *, struct page *); void remove_dirty_dir_inode(struct inode *); +struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); void sync_dirty_dir_inodes(struct f2fs_sb_info *); void write_checkpoint(struct f2fs_sb_info *, bool); void init_orphan_info(struct f2fs_sb_info *); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 4d89514..23f5803 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -42,6 +42,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode) { struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage); struct f2fs_inode *raw_inode = &(raw_node->i); + nid_t pino = le32_to_cpu(raw_inode->i_pino); struct qstr name; struct f2fs_dir_entry *de; struct page *page; @@ -51,10 +52,14 @@ static int recover_dentry(struct page *ipage, struct inode *inode) if (!is_dent_dnode(ipage)) goto out; - dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino)); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - goto out; + dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); + if (!dir) { + dir = f2fs_iget(inode->i_sb, pino); + if (IS_ERR(dir)) { + err = PTR_ERR(dir); + goto out; + } + set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); } name.len = le32_to_cpu(raw_inode->i_namelen); @@ -67,7 +72,6 @@ static int recover_dentry(struct page *ipage, struct inode *inode) } else { err = __f2fs_add_link(dir, &name, inode); } - iput(dir); out: kunmap(ipage); return err; -- cgit v0.10.2 From 0a364af18f27b86869149c4d128262ec1e0ccb25 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 May 2013 08:57:43 +0900 Subject: f2fs: remove unnecessary por_doing check This por_doing check is totally not related to the recovery process. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 47abc97..729b285 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -149,8 +149,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); - if (!sbi->por_doing) - d_instantiate(dentry, inode); + d_instantiate(dentry, inode); unlock_new_inode(inode); return 0; out: -- cgit v0.10.2 From 1646cfac952ff87fcbc18a77164472aa61d08094 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 09:42:28 +0900 Subject: f2fs: skip get_node_page if locked node page is passed If get_dnode_of_data gets a locked node page, let's skip redundant get_node_page calls. This is for the futher enhancement. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9641534..f63f0a4 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -408,10 +408,13 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) level = get_node_path(index, offset, noffset); nids[0] = dn->inode->i_ino; - npage[0] = get_node_page(sbi, nids[0]); - if (IS_ERR(npage[0])) - return PTR_ERR(npage[0]); + npage[0] = dn->inode_page; + if (!npage[0]) { + npage[0] = get_node_page(sbi, nids[0]); + if (IS_ERR(npage[0])) + return PTR_ERR(npage[0]); + } parent = npage[0]; if (level != 0) nids[1] = get_nid(parent, offset[0], true); -- cgit v0.10.2 From 64aa7ed98db489d1c41ef140876ada38498678ab Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 09:55:50 +0900 Subject: f2fs: change get_new_data_page to pass a locked node page This patch is for passing a locked node page to get_dnode_of_data. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 05fb5c6..af74549 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -280,8 +280,8 @@ repeat: * Also, caller should grab and release a mutex by calling mutex_lock_op() and * mutex_unlock_op(). */ -struct page *get_new_data_page(struct inode *inode, pgoff_t index, - bool new_i_size) +struct page *get_new_data_page(struct inode *inode, + struct page *npage, pgoff_t index, bool new_i_size) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct address_space *mapping = inode->i_mapping; @@ -289,18 +289,20 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index, struct dnode_of_data dn; int err; - set_new_dnode(&dn, inode, NULL, NULL, 0); + set_new_dnode(&dn, inode, npage, npage, 0); err = get_dnode_of_data(&dn, index, ALLOC_NODE); if (err) return ERR_PTR(err); if (dn.data_blkaddr == NULL_ADDR) { if (reserve_new_block(&dn)) { - f2fs_put_dnode(&dn); + if (!npage) + f2fs_put_dnode(&dn); return ERR_PTR(-ENOSPC); } } - f2fs_put_dnode(&dn); + if (!npage) + f2fs_put_dnode(&dn); repeat: page = grab_cache_page(mapping, index); if (!page) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1ac6b93..7db6e58 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -287,7 +287,7 @@ static int make_empty_dir(struct inode *inode, struct inode *parent) struct f2fs_dir_entry *de; void *kaddr; - dentry_page = get_new_data_page(inode, 0, true); + dentry_page = get_new_data_page(inode, NULL, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); @@ -448,7 +448,7 @@ start: bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); for (block = bidx; block <= (bidx + nblock - 1); block++) { - dentry_page = get_new_data_page(dir, block, true); + dentry_page = get_new_data_page(dir, NULL, block, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ef6cac8..cbae2b6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1027,7 +1027,7 @@ int reserve_new_block(struct dnode_of_data *); void update_extent_cache(block_t, struct dnode_of_data *); struct page *find_data_page(struct inode *, pgoff_t, bool); struct page *get_lock_data_page(struct inode *, pgoff_t); -struct page *get_new_data_page(struct inode *, pgoff_t, bool); +struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); int do_write_data_page(struct page *); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1cae864..b8e34db 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -387,7 +387,7 @@ static void fill_zero(struct inode *inode, pgoff_t index, f2fs_balance_fs(sbi); ilock = mutex_lock_op(sbi); - page = get_new_data_page(inode, index, false); + page = get_new_data_page(inode, NULL, index, false); mutex_unlock_op(sbi, ilock); if (!IS_ERR(page)) { -- cgit v0.10.2 From 44a83ff6a81d84ab83bcb43a49ff1ba6c7e17cd1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 10:10:29 +0900 Subject: f2fs: update inode page after creation I found a bug when testing power-off-recovery as follows. [Bug Scenario] 1. create a file 2. fsync the file 3. reboot w/o any sync 4. try to recover the file - found its fsync mark - found its dentry mark : try to recover its dentry - get its file name - get its parent inode number : here we got zero value The reason why we get the wrong parent inode number is that we didn't synchronize the inode page with its newly created inode information perfectly. Especially, previous f2fs stores fi->i_pino and writes it to the cached node page in a wrong order, which incurs the zero-valued i_pino during the recovery. So, this patch modifies the creation flow to fix the synchronization order of inode page with its inode. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index af74549..c320f7f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -279,6 +279,7 @@ repeat: * * Also, caller should grab and release a mutex by calling mutex_lock_op() and * mutex_unlock_op(). + * Note that, npage is set only by make_empty_dir. */ struct page *get_new_data_page(struct inode *inode, struct page *npage, pgoff_t index, bool new_i_size) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 7db6e58..fc1dacf 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -264,15 +264,10 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_put_page(page, 1); } -void init_dent_inode(const struct qstr *name, struct page *ipage) +static void init_dent_inode(const struct qstr *name, struct page *ipage) { struct f2fs_node *rn; - if (IS_ERR(ipage)) - return; - - wait_on_page_writeback(ipage); - /* copy name info. to this inode page */ rn = (struct f2fs_node *)page_address(ipage); rn->i.i_namelen = cpu_to_le32(name->len); @@ -280,14 +275,15 @@ void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } -static int make_empty_dir(struct inode *inode, struct inode *parent) +static int make_empty_dir(struct inode *inode, + struct inode *parent, struct page *page) { struct page *dentry_page; struct f2fs_dentry_block *dentry_blk; struct f2fs_dir_entry *de; void *kaddr; - dentry_page = get_new_data_page(inode, NULL, 0, true); + dentry_page = get_new_data_page(inode, page, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); @@ -317,42 +313,47 @@ static int make_empty_dir(struct inode *inode, struct inode *parent) return 0; } -static int init_inode_metadata(struct inode *inode, +static struct page *init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *name) { + struct page *page; + int err; + if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { - int err; - err = new_inode_page(inode, name); - if (err) - return err; + page = new_inode_page(inode, name); + if (IS_ERR(page)) + return page; if (S_ISDIR(inode->i_mode)) { - err = make_empty_dir(inode, dir); - if (err) { - remove_inode_page(inode); - return err; - } + err = make_empty_dir(inode, dir, page); + if (err) + goto error; } err = f2fs_init_acl(inode, dir); - if (err) { - remove_inode_page(inode); - return err; - } + if (err) + goto error; + + wait_on_page_writeback(page); } else { - struct page *ipage; - ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); - if (IS_ERR(ipage)) - return PTR_ERR(ipage); - set_cold_node(inode, ipage); - init_dent_inode(name, ipage); - f2fs_put_page(ipage, 1); + page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); + if (IS_ERR(page)) + return page; + + wait_on_page_writeback(page); + set_cold_node(inode, page); } - if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { + + init_dent_inode(name, page); + + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) inc_nlink(inode); - update_inode_page(inode); - } - return 0; + return page; + +error: + f2fs_put_page(page, 1); + remove_inode_page(inode); + return ERR_PTR(err); } static void update_parent_metadata(struct inode *dir, struct inode *inode, @@ -423,6 +424,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in struct page *dentry_page = NULL; struct f2fs_dentry_block *dentry_blk = NULL; int slots = GET_DENTRY_SLOTS(namelen); + struct page *page; int err = 0; int i; @@ -465,12 +467,13 @@ start: ++level; goto start; add_dentry: - err = init_inode_metadata(inode, dir, name); - if (err) - goto fail; - wait_on_page_writeback(dentry_page); + page = init_inode_metadata(inode, dir, name); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } de = &dentry_blk->dentry[bit_pos]; de->hash_code = dentry_hash; de->name_len = cpu_to_le16(namelen); @@ -481,10 +484,12 @@ add_dentry: test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); set_page_dirty(dentry_page); - update_parent_metadata(dir, inode, current_depth); - - /* update parent inode number before releasing dentry page */ + /* we don't need to mark_inode_dirty now */ F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + + update_parent_metadata(dir, inode, current_depth); fail: kunmap(dentry_page); f2fs_put_page(dentry_page, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cbae2b6..9360a03 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -914,7 +914,6 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); ino_t f2fs_inode_by_name(struct inode *, struct qstr *); void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); -void init_dent_inode(const struct qstr *, struct page *); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); int f2fs_make_empty(struct inode *, struct inode *); @@ -949,7 +948,7 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); int remove_inode_page(struct inode *); -int new_inode_page(struct inode *, const struct qstr *); +struct page *new_inode_page(struct inode *, const struct qstr *); struct page *new_node_page(struct dnode_of_data *, unsigned int); void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f63f0a4..b41482d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -806,19 +806,15 @@ int remove_inode_page(struct inode *inode) return 0; } -int new_inode_page(struct inode *inode, const struct qstr *name) +struct page *new_inode_page(struct inode *inode, const struct qstr *name) { - struct page *page; struct dnode_of_data dn; /* allocate inode page for new inode */ set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); - page = new_node_page(&dn, 0); - init_dent_inode(name, page); - if (IS_ERR(page)) - return PTR_ERR(page); - f2fs_put_page(page, 1); - return 0; + + /* caller should f2fs_put_page(page, 1); */ + return new_node_page(&dn, 0); } struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) -- cgit v0.10.2 From f356fe0cba0e3523e538987916bd2acedd4e6f41 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 May 2013 15:04:49 +0900 Subject: f2fs: add debug msgs in the recovery routine This patch adds some trivial debugging messages in the recovery process. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b41482d..5a59780 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1495,7 +1495,6 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) WARN_ON(1); set_node_addr(sbi, &new_ni, NEW_ADDR); inc_valid_inode_count(sbi); - f2fs_put_page(ipage, 1); return 0; } diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 23f5803..6ad4e53 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -49,9 +49,6 @@ static int recover_dentry(struct page *ipage, struct inode *inode) struct inode *dir; int err = 0; - if (!is_dent_dnode(ipage)) - goto out; - dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); if (!dir) { dir = f2fs_iget(inode->i_sb, pino); @@ -73,6 +70,9 @@ static int recover_dentry(struct page *ipage, struct inode *inode) err = __f2fs_add_link(dir, &name, inode); } out: + f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " + "ino = %x, name = %s, dir = %lx, err = %d", + ino_of_node(ipage), raw_inode->i_name, dir->i_ino, err); kunmap(ipage); return err; } @@ -83,6 +83,9 @@ static int recover_inode(struct inode *inode, struct page *node_page) struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; struct f2fs_inode *raw_inode = &(raw_node->i); + if (!IS_INODE(node_page)) + return 0; + inode->i_mode = le16_to_cpu(raw_inode->i_mode); i_size_write(inode, le64_to_cpu(raw_inode->i_size)); inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); @@ -92,7 +95,12 @@ static int recover_inode(struct inode *inode, struct page *node_page) inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); - return recover_dentry(node_page, inode); + if (is_dent_dnode(node_page)) + return recover_dentry(node_page, inode); + + f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", + ino_of_node(node_page), raw_inode->i_name); + return 0; } static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) @@ -123,7 +131,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) lock_page(page); if (cp_ver != cpver_of_node(page)) - goto unlock_out; + break; if (!is_fsync_dnode(page)) goto next; @@ -137,40 +145,33 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) if (IS_INODE(page) && is_dent_dnode(page)) { err = recover_inode_page(sbi, page); if (err) - goto unlock_out; + break; } /* add this fsync inode to the list */ entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); if (!entry) { err = -ENOMEM; - goto unlock_out; + break; } entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); if (IS_ERR(entry->inode)) { err = PTR_ERR(entry->inode); kmem_cache_free(fsync_entry_slab, entry); - goto unlock_out; + break; } list_add_tail(&entry->list, head); } entry->blkaddr = blkaddr; - if (IS_INODE(page)) { - err = recover_inode(entry->inode, page); - if (err == -ENOENT) { - goto next; - } else if (err) { - err = -EINVAL; - goto unlock_out; - } - } + err = recover_inode(entry->inode, page); + if (err && err != -ENOENT) + break; next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); } -unlock_out: unlock_page(page); out: __free_pages(page, 0); @@ -248,7 +249,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, struct dnode_of_data dn; struct f2fs_summary sum; struct node_info ni; - int err = 0; + int err = 0, recovered = 0; int ilock; start = start_bidx_of_node(ofs_of_node(page)); @@ -293,6 +294,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, /* write dummy data page */ recover_data_page(sbi, NULL, &sum, src, dest); update_extent_cache(dest, &dn); + recovered++; } dn.ofs_in_node++; } @@ -310,6 +312,10 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); f2fs_put_dnode(&dn); mutex_unlock_op(sbi, ilock); + + f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " + "recovered_data = %d blocks", + inode->i_ino, recovered); return 0; } -- cgit v0.10.2 From bfe35965ecdc6038314d03456b94d9ba451c289d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 16 May 2013 20:03:12 +0200 Subject: f2fs, lockdep: annotate mutex_lock_all() Majianpeng reported a lockdep splat for f2fs. It turns out mutex_lock_all() acquires an array of locks (in global/local lock style). Any such operation is always serialized using cp_mutex, therefore there is no fs_lock[] lock-order issue; tell lockdep about this using the mutex_lock_nest_lock() primitive. Reported-by: majianpeng Signed-off-by: Peter Zijlstra Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9360a03..9182b27 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -495,9 +495,17 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) static inline void mutex_lock_all(struct f2fs_sb_info *sbi) { - int i = 0; - for (; i < NR_GLOBAL_LOCKS; i++) - mutex_lock(&sbi->fs_lock[i]); + int i; + + for (i = 0; i < NR_GLOBAL_LOCKS; i++) { + /* + * This is the only time we take multiple fs_lock[] + * instances; the order is immaterial since we + * always hold cp_mutex, which serializes multiple + * such operations. + */ + mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex); + } } static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) -- cgit v0.10.2 From 81fb5e874675517c57e9edd913065f1e17ebd362 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Tue, 14 May 2013 18:20:28 +0800 Subject: f2fs: remove unecessary variable and code Code cleanup without behavior changed. Signed-off-by: Haicheng Li Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d8e84e4..3a0d027 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -126,17 +126,16 @@ void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno, offset = 0; + unsigned int segno = -1; unsigned int total_segs = TOTAL_SEGS(sbi); mutex_lock(&dirty_i->seglist_lock); while (1) { segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, - offset); + segno + 1); if (segno >= total_segs) break; __set_test_and_free(sbi, segno); - offset = segno + 1; } mutex_unlock(&dirty_i->seglist_lock); } @@ -144,17 +143,16 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned int segno, offset = 0; + unsigned int segno = -1; unsigned int total_segs = TOTAL_SEGS(sbi); mutex_lock(&dirty_i->seglist_lock); while (1) { segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, - offset); + segno + 1); if (segno >= total_segs) break; - offset = segno + 1; if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) dirty_i->nr_dirty[PRE]--; @@ -364,11 +362,11 @@ next: static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); - unsigned int segno = curseg->segno; + unsigned int segno = curseg->segno + 1; struct free_segmap_info *free_i = FREE_I(sbi); - if (segno + 1 < TOTAL_SEGS(sbi) && (segno + 1) % sbi->segs_per_sec) - return !test_bit(segno + 1, free_i->free_segmap); + if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) + return !test_bit(segno, free_i->free_segmap); return 0; } @@ -495,7 +493,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) int dir = ALLOC_LEFT; write_sum_page(sbi, curseg->sum_blk, - GET_SUM_BLOCK(sbi, curseg->segno)); + GET_SUM_BLOCK(sbi, segno)); if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) dir = ALLOC_RIGHT; -- cgit v0.10.2 From 145b04e5ed66033b5a3d315394dd1384e3f5f70a Mon Sep 17 00:00:00 2001 From: majianpeng Date: Tue, 14 May 2013 20:06:46 +0800 Subject: f2fs: use list_for_each_entry rather than list_for_each_entry_safe We can do this, since now we use a global mutex, f2fs_stat_mutex to protect its list operations. Signed-off-by: Jianpeng Ma [Jaegeuk Kim: add description] Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8d99437..0d6c6aa 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -175,12 +175,12 @@ get_cache: static int stat_show(struct seq_file *s, void *v) { - struct f2fs_stat_info *si, *next; + struct f2fs_stat_info *si; int i = 0; int j; mutex_lock(&f2fs_stat_mutex); - list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) { + list_for_each_entry(si, &f2fs_stat_list, stat_list) { char devname[BDEVNAME_SIZE]; update_general_status(si->sbi); -- cgit v0.10.2 From 9851e6e18943f2537acb44a4eb51c6958e8dbc3e Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 28 Apr 2013 09:04:18 +0900 Subject: f2fs: reorganize f2fs_vm_page_mkwrite Few things can be changed in the default mkwrite function 1) Make file_update_time at the start before acquiring any lock 2) the condition page_offset(page) >= i_size_read(inode) should be changed to page_offset(page) > i_size_read 3) Move wait_on_page_writeback. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b8e34db..9937ba1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -63,9 +63,10 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, f2fs_put_dnode(&dn); mutex_unlock_op(sbi, ilock); + file_update_time(vma->vm_file); lock_page(page); if (page->mapping != inode->i_mapping || - page_offset(page) >= i_size_read(inode) || + page_offset(page) > i_size_read(inode) || !PageUptodate(page)) { unlock_page(page); err = -EFAULT; @@ -76,10 +77,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, * check to see if the page is mapped already (no holes) */ if (PageMappedToDisk(page)) - goto out; - - /* fill the page */ - wait_on_page_writeback(page); + goto mapped; /* page is wholly or partially inside EOF */ if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { @@ -90,7 +88,9 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, set_page_dirty(page); SetPageUptodate(page); - file_update_time(vma->vm_file); +mapped: + /* fill the page */ + wait_on_page_writeback(page); out: sb_end_pagefault(inode->i_sb); return block_page_mkwrite_return(err); -- cgit v0.10.2 From 9a55ed656c9afbe41316ab2373bc063359b7683f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 10:23:40 +0900 Subject: f2fs: remove unnecessary kmap/kunmap operations The allocated page used by the recovery is not on HIGHMEM, so that we don't need to use kmap/kunmap. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 6ad4e53..f91ff0f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -40,11 +40,11 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, static int recover_dentry(struct page *ipage, struct inode *inode) { - struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage); + void *kaddr = page_address(ipage); + struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; struct f2fs_inode *raw_inode = &(raw_node->i); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct qstr name; - struct f2fs_dir_entry *de; struct page *page; struct inode *dir; int err = 0; @@ -62,8 +62,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode) name.len = le32_to_cpu(raw_inode->i_namelen); name.name = raw_inode->i_name; - de = f2fs_find_entry(dir, &name, &page); - if (de) { + if (f2fs_find_entry(dir, &name, &page)) { kunmap(page); f2fs_put_page(page, 0); } else { @@ -73,7 +72,6 @@ out: f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " "ino = %x, name = %s, dir = %lx, err = %d", ino_of_node(ipage), raw_inode->i_name, dir->i_ino, err); - kunmap(ipage); return err; } -- cgit v0.10.2 From 45856aff0d9091f4836e333951c66eca382a8573 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 10:26:09 +0900 Subject: f2fs: fix to unlock page before exit If we got an error after lock_page, we should unlock it before exit. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index f91ff0f..3a4b51c 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -347,7 +347,7 @@ static int recover_data(struct f2fs_sb_info *sbi, lock_page(page); if (cp_ver != cpver_of_node(page)) - goto unlock_out; + break; entry = get_fsync_inode(head, ino_of_node(page)); if (!entry) @@ -355,7 +355,7 @@ static int recover_data(struct f2fs_sb_info *sbi, err = do_recover_data(sbi, entry->inode, page, blkaddr); if (err) - goto out; + break; if (entry->blkaddr == blkaddr) { iput(entry->inode); @@ -366,7 +366,6 @@ next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); } -unlock_out: unlock_page(page); out: __free_pages(page, 0); -- cgit v0.10.2 From 2c2c149f7dabd5a4d41cae5d2c2ce1d130acf72c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 14:48:49 +0900 Subject: f2fs: don't do checkpoint if error is occurred If we met an error during the dentry recovery, we should not conduct checkpoint. Otherwise, some errorneous dentry blocks overwrites the existing blocks that contain the remaining recovery information. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 3a4b51c..5148d90 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -403,6 +403,7 @@ out: destroy_fsync_dnodes(sbi, &inode_list); kmem_cache_destroy(fsync_entry_slab); sbi->por_doing = 0; - write_checkpoint(sbi, false); + if (!err) + write_checkpoint(sbi, false); return err; } -- cgit v0.10.2 From 6f85b3520325a67ee4ac33e75bbcdbc25c79ce69 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 16:15:22 +0900 Subject: f2fs: avoid RECLAIM_FS-ON-W: deadlock This patch tries to avoid the following deadlock condition of which the reclaim path can trigger f2fs_balance_fs again. ================================= [ INFO: inconsistent lock state ] --------------------------------- inconsistent {RECLAIM_FS-ON-W} -> {IN-RECLAIM_FS-W} usage. kswapd0/41 [HC0[0]:SC0[0]:HE1:SE1] takes: (&sbi->gc_mutex){+.+.?.}, at: f2fs_balance_fs+0xe6/0x100 [f2fs] {RECLAIM_FS-ON-W} state was registered at: [] mark_held_locks+0xb9/0x140 [] lockdep_trace_alloc+0x85/0xf0 [] __alloc_pages_nodemask+0x7c/0x9b0 [] alloc_pages_current+0xb8/0x180 [] __page_cache_alloc+0xaf/0xd0 [] find_or_create_page+0x4c/0xb0 [] find_data_page+0x14e/0x210 [f2fs] [] f2fs_gc+0x9eb/0xd90 [f2fs] [] f2fs_balance_fs+0xee/0x100 [f2fs] [] f2fs_setattr+0x6c/0x200 [f2fs] [] notify_change+0x1db/0x3a0 [] do_truncate+0x60/0xa0 [] vfs_truncate+0x185/0x1b0 [] do_sys_truncate+0x5c/0xa0 [] SyS_truncate+0xe/0x10 [] system_call_fastpath+0x16/0x1b Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c320f7f..1644fff 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -199,7 +199,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) if (dn.data_blkaddr == NEW_ADDR) return ERR_PTR(-EINVAL); - page = grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); if (!page) return ERR_PTR(-ENOMEM); @@ -234,7 +234,7 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) int err; repeat: - page = grab_cache_page(mapping, index); + page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); if (!page) return ERR_PTR(-ENOMEM); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 91ac7f9..a18946e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -130,8 +130,7 @@ make_now: inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER_MOVABLE | - __GFP_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; -- cgit v0.10.2 From 77888c1e42e8c76e16204cd99c19a01829421402 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 May 2013 20:28:47 +0900 Subject: f2fs: add f2fs_readonly() Introduce a simple macro function for readability. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9182b27..6594ce1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -887,6 +887,11 @@ static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) return 0; } +static inline int f2fs_readonly(struct super_block *sb) +{ + return sb->s_flags & MS_RDONLY; +} + /* * file.c */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9937ba1..316bcfe 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -114,7 +114,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) .for_reclaim = 0, }; - if (inode->i_sb->s_flags & MS_RDONLY) + if (f2fs_readonly(inode->i_sb)) return 0; trace_f2fs_sync_file_enter(inode); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8555f7d..3ac305d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -170,7 +170,7 @@ static int f2fs_freeze(struct super_block *sb) { int err; - if (sb->s_flags & MS_RDONLY) + if (f2fs_readonly(sb)) return 0; err = f2fs_sync_fs(sb, 1); -- cgit v0.10.2 From b638f0c4b8fca9d2f82805a2d6601b09283e0d32 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 21 May 2013 10:17:56 +0900 Subject: f2fs: fix wrong condition check While an orphan inode has zero link_count, f2fs_gc is able to select the inode for foreground gc. - f2fs_gc - do_garbage_collect - gc_data_segment : f2fs_iget is failed : get_valid_blocks() != 0, so that retry --> here we got the infinite loop. This patch resolved this issue. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index a18946e..b44a4c1 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -109,12 +109,6 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) ret = do_read_inode(inode); if (ret) goto bad_inode; - - if (!sbi->por_doing && inode->i_nlink == 0) { - ret = -ENOENT; - goto bad_inode; - } - make_now: if (ino == F2FS_NODE_INO(sbi)) { inode->i_mapping->a_ops = &f2fs_node_aops; -- cgit v0.10.2 From b292dcab068e141d8a820b77cbcc88d98c610eb4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 May 2013 08:02:02 +0900 Subject: f2fs: reuse the locked dnode page and its inode This patch fixes the following deadlock bug during the recovery. INFO: task mount:1322 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. mount D ffffffff81125870 0 1322 1266 0x00000000 ffff8801207e39d8 0000000000000046 ffff88012ab1dee0 0000000000000046 ffff8801207e3a08 ffff880115903f40 ffff8801207e3fd8 ffff8801207e3fd8 ffff8801207e3fd8 ffff880115903f40 ffff8801207e39d8 ffff88012fc94520 Call Trace: [] ? __lock_page+0x70/0x70 [] schedule+0x29/0x70 [] io_schedule+0x8f/0xd0 [] sleep_on_page+0xe/0x20 [] __wait_on_bit_lock+0x5a/0xc0 [] __lock_page+0x67/0x70 [] ? autoremove_wake_function+0x40/0x40 [] find_lock_page+0x67/0x80 [] find_or_create_page+0x3f/0xb0 [] ? sync_inode_page+0xa8/0xd0 [f2fs] [] get_node_page+0x67/0x180 [f2fs] [] recover_fsync_data+0xacb/0xff0 [f2fs] [] ? _raw_spin_unlock+0x3e/0x40 [] f2fs_fill_super+0x7d4/0x850 [f2fs] [] mount_bdev+0x1c9/0x210 [] ? validate_superblock+0x180/0x180 [f2fs] [] f2fs_mount+0x15/0x20 [f2fs] [] mount_fs+0x43/0x1b0 [] ? __alloc_percpu+0x10/0x20 [] vfs_kern_mount+0x76/0x120 [] do_mount+0x237/0xa10 [] ? strndup_user+0x5b/0x80 [] SyS_mount+0x90/0xe0 [] system_call_fastpath+0x16/0x1b The bug is triggered when check_index_in_prev_nodes tries to get the direct node page by calling get_node_page. At this point, if the direct node page is already locked by get_dnode_of_data, its caller, we got a deadlock condition. This patch adds additional condition check for the reuse of locked direct node pages prior to the get_node_page call. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6594ce1..7b05029 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -900,6 +900,7 @@ void truncate_data_blocks(struct dnode_of_data *); void f2fs_truncate(struct inode *); int f2fs_setattr(struct dentry *, struct iattr *); int truncate_hole(struct inode *, pgoff_t, pgoff_t); +int truncate_data_blocks_range(struct dnode_of_data *, int); long f2fs_ioctl(struct file *, unsigned int, unsigned long); long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 316bcfe..deefd25 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -168,7 +168,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) return 0; } -static int truncate_data_blocks_range(struct dnode_of_data *dn, int count) +int truncate_data_blocks_range(struct dnode_of_data *dn, int count) { int nr_free = 0, ofs = dn->ofs_in_node; struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 5148d90..eceb665 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -189,14 +189,14 @@ static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, } static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, - block_t blkaddr) + block_t blkaddr, struct dnode_of_data *dn) { struct seg_entry *sentry; unsigned int segno = GET_SEGNO(sbi, blkaddr); unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); struct f2fs_summary sum; - nid_t ino; + nid_t ino, nid; void *kaddr; struct inode *inode; struct page *node_page; @@ -224,10 +224,26 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, f2fs_put_page(sum_page, 1); } + /* Use the locked dnode page and inode */ + nid = le32_to_cpu(sum.nid); + if (dn->inode->i_ino == nid) { + struct dnode_of_data tdn = *dn; + tdn.nid = nid; + tdn.node_page = dn->inode_page; + tdn.ofs_in_node = sum.ofs_in_node; + truncate_data_blocks_range(&tdn, 1); + return; + } else if (dn->nid == nid) { + struct dnode_of_data tdn = *dn; + tdn.ofs_in_node = sum.ofs_in_node; + truncate_data_blocks_range(&tdn, 1); + return; + } + /* Get the node page */ - node_page = get_node_page(sbi, le32_to_cpu(sum.nid)); + node_page = get_node_page(sbi, nid); bidx = start_bidx_of_node(ofs_of_node(node_page)) + - le16_to_cpu(sum.ofs_in_node); + le16_to_cpu(sum.ofs_in_node); ino = ino_of_node(node_page); f2fs_put_page(node_page, 1); @@ -285,7 +301,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } /* Check the previous node page having this index */ - check_index_in_prev_nodes(sbi, dest); + check_index_in_prev_nodes(sbi, dest, &dn); set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); -- cgit v0.10.2 From 39cf72cf09c8f36a383919e7675bdb15bd4db53b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 May 2013 08:20:01 +0900 Subject: f2fs: fix to handle do_recover_data errors This patch adds error handling codes of check_index_in_prev_nodes and its caller, do_recover_data. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index eceb665..dcd8e86 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -188,7 +188,7 @@ static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, } } -static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, +static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, block_t blkaddr, struct dnode_of_data *dn) { struct seg_entry *sentry; @@ -205,7 +205,7 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, sentry = get_seg_entry(sbi, segno); if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) - return; + return 0; /* Get the previous summary */ for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { @@ -232,16 +232,18 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, tdn.node_page = dn->inode_page; tdn.ofs_in_node = sum.ofs_in_node; truncate_data_blocks_range(&tdn, 1); - return; + return 0; } else if (dn->nid == nid) { struct dnode_of_data tdn = *dn; tdn.ofs_in_node = sum.ofs_in_node; truncate_data_blocks_range(&tdn, 1); - return; + return 0; } /* Get the node page */ node_page = get_node_page(sbi, nid); + if (IS_ERR(node_page)) + return PTR_ERR(node_page); bidx = start_bidx_of_node(ofs_of_node(node_page)) + le16_to_cpu(sum.ofs_in_node); ino = ino_of_node(node_page); @@ -250,10 +252,11 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, /* Deallocate previous index in the node page */ inode = f2fs_iget(sbi->sb, ino); if (IS_ERR(inode)) - return; + return PTR_ERR(inode); truncate_hole(inode, bidx, bidx + 1); iput(inode); + return 0; } static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, @@ -301,7 +304,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, } /* Check the previous node page having this index */ - check_index_in_prev_nodes(sbi, dest, &dn); + err = check_index_in_prev_nodes(sbi, dest, &dn); + if (err) + goto err; set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); @@ -324,13 +329,14 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, set_page_dirty(dn.node_page); recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); +err: f2fs_put_dnode(&dn); mutex_unlock_op(sbi, ilock); f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " - "recovered_data = %d blocks", - inode->i_ino, recovered); - return 0; + "recovered_data = %d blocks, err = %d", + inode->i_ino, recovered, err); + return err; } static int recover_data(struct f2fs_sb_info *sbi, -- cgit v0.10.2 From 93ff10d690ca536fdbd6b5d5d97e4ab54b2a421f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 May 2013 12:03:47 +0900 Subject: f2fs: should not make_bad_inode on f2fs_link failure If -ENOSPC is met during f2fs_link, we should not make the inode as bad. The inode is still alive. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 729b285..71aa305 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -191,7 +191,6 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, return 0; out: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); - make_bad_inode(inode); iput(inode); return err; } -- cgit v0.10.2 From 6f6fd833e1857e79a363fb20497237367bcfb7ee Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 22 May 2013 12:06:26 +0900 Subject: f2fs: use ihold Use the following helper function committed by Al. commit 7de9c6ee3ecffd99e1628e81a5ea5468f7581a1f Author: Al Viro Date: Sat Oct 23 11:11:40 2010 -0400 new helper: ihold() ... Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 71aa305..efe0a12 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -172,7 +172,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, f2fs_balance_fs(sbi); inode->i_ctime = CURRENT_TIME; - atomic_inc(&inode->i_count); + ihold(inode); set_inode_flag(F2FS_I(inode), FI_INC_LINK); ilock = mutex_lock_op(sbi); -- cgit v0.10.2 From f28c06fa6f3d3215a1ba5e62ebc5ce7229d7a895 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 May 2013 13:02:13 +0300 Subject: f2fs: dereferencing an ERR_PTR There is an error path where "dir" is an ERR_PTR. Signed-off-by: Dan Carpenter Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index dcd8e86..0dd2ce1 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -71,7 +71,8 @@ static int recover_dentry(struct page *ipage, struct inode *inode) out: f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " "ino = %x, name = %s, dir = %lx, err = %d", - ino_of_node(ipage), raw_inode->i_name, dir->i_ino, err); + ino_of_node(ipage), raw_inode->i_name, + IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } -- cgit v0.10.2 From a9841c4dbbdd8a2fb919ea305ffa95ab5ec80af2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 24 May 2013 12:41:04 +0900 Subject: f2fs: align data types between on-disk and in-memory block addresses The on-disk block address is defined as __le32, but in-memory block address, block_t, does as u64. Let's synchronize them to 32 bits. Reported-by: Dan Carpenter Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7b05029..92fd4e9 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -37,7 +37,10 @@ typecheck(unsigned long long, b) && \ ((long long)((a) - (b)) > 0)) -typedef u64 block_t; +typedef u32 block_t; /* + * should not change u32, since it is the on-disk block + * address format, __le32. + */ typedef u32 nid_t; struct f2fs_mount_info { diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index df6fab8..383d5e3 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -20,8 +20,8 @@ #define F2FS_BLKSIZE 4096 /* support only 4KB block */ #define F2FS_MAX_EXTENSION 64 /* # of extension entries */ -#define NULL_ADDR 0x0U -#define NEW_ADDR -1U +#define NULL_ADDR ((block_t)0) /* used as block_t addresses */ +#define NEW_ADDR ((block_t)-1) /* used as block_t addresses */ #define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) #define F2FS_NODE_INO(sbi) (sbi->node_ino_num) -- cgit v0.10.2 From 35b09d82c3cf3fc0b8b6d923e7fd82ff7926aafc Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 23 May 2013 22:57:53 +0900 Subject: f2fs: push some variables to debug part Some, counters are needed only for the statistical information while debugging. So, those can be controlled using CONFIG_F2FS_STAT_FS, pushing the usage for few variables under this flag. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3d11449..01ddc91 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -478,7 +478,9 @@ retry: } } list_add_tail(&new->list, head); +#ifdef CONFIG_F2FS_STAT_FS sbi->n_dirty_dirs++; +#endif BUG_ON(!S_ISDIR(inode->i_mode)); out: @@ -508,7 +510,9 @@ void remove_dirty_dir_inode(struct inode *inode) if (entry->inode == inode) { list_del(&entry->list); kmem_cache_free(inode_entry_slab, entry); +#ifdef CONFIG_F2FS_STAT_FS sbi->n_dirty_dirs--; +#endif break; } } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1644fff..93917e3 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -68,7 +68,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, struct buffer_head *bh_result) { struct f2fs_inode_info *fi = F2FS_I(inode); +#ifdef CONFIG_F2FS_STAT_FS struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); +#endif pgoff_t start_fofs, end_fofs; block_t start_blkaddr; @@ -78,7 +80,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, return 0; } +#ifdef CONFIG_F2FS_STAT_FS sbi->total_hit_ext++; +#endif start_fofs = fi->ext.fofs; end_fofs = fi->ext.fofs + fi->ext.len - 1; start_blkaddr = fi->ext.blk_addr; @@ -96,7 +100,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, else bh_result->b_size = UINT_MAX; +#ifdef CONFIG_F2FS_STAT_FS sbi->read_hit_ext++; +#endif read_unlock(&fi->ext.ext_lock); return 1; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 92fd4e9..40b137a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -372,7 +372,6 @@ struct f2fs_sb_info { /* for directory inode management */ struct list_head dir_inode_list; /* dir inode list */ spinlock_t dir_inode_lock; /* for dir inode list lock */ - unsigned int n_dirty_dirs; /* # of dir inodes */ /* basic file system units */ unsigned int log_sectors_per_block; /* log2 sectors per block */ @@ -409,12 +408,15 @@ struct f2fs_sb_info { * for stat information. * one is for the LFS mode, and the other is for the SSR mode. */ +#ifdef CONFIG_F2FS_STAT_FS struct f2fs_stat_info *stat_info; /* FS status information */ unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ - unsigned int last_victim[2]; /* last victim segment # */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ int bg_gc; /* background gc calls */ + unsigned int n_dirty_dirs; /* # of dir inodes */ +#endif + unsigned int last_victim[2]; /* last victim segment # */ spinlock_t stat_lock; /* lock for stat operations */ }; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 1496159..25b083c 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -76,7 +76,9 @@ static int gc_thread_func(void *data) else wait_ms = increase_sleep_time(wait_ms); +#ifdef CONFIG_F2FS_STAT_FS sbi->bg_gc++; +#endif /* if return value is not zero, no victim was selected */ if (f2fs_gc(sbi)) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3a0d027..be668ff 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -610,7 +610,10 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, else new_curseg(sbi, type, false); out: +#ifdef CONFIG_F2FS_STAT_FS sbi->segment_count[curseg->alloc_type]++; +#endif + return; } void allocate_new_segments(struct f2fs_sb_info *sbi) @@ -846,7 +849,9 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, mutex_lock(&sit_i->sentry_lock); __refresh_next_blkoff(sbi, curseg); +#ifdef CONFIG_F2FS_STAT_FS sbi->block_count[curseg->alloc_type]++; +#endif /* * SIT information should be updated before segment allocation, -- cgit v0.10.2 From 4777f86b7c0a587dde275a5c1ff3022b2e601313 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 23 May 2013 22:58:07 +0900 Subject: f2fs: remove unneeded initializations in f2fs_parent_dir There is no need to initialize few pointers in f2fs_parent_dir as the values are not checked and instead directly initialized values are used. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index fc1dacf..b278bfb 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -215,9 +215,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) { - struct page *page = NULL; - struct f2fs_dir_entry *de = NULL; - struct f2fs_dentry_block *dentry_blk = NULL; + struct page *page; + struct f2fs_dir_entry *de; + struct f2fs_dentry_block *dentry_blk; page = get_lock_data_page(dir, 0); if (IS_ERR(page)) -- cgit v0.10.2 From a06a2416038d317a6430e453f5bc5fd81834554d Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Thu, 23 May 2013 22:58:40 +0900 Subject: f2fs: optimize several routines in node.h There are various functions with common code which could be separated out to make common routines. So, made new routines and in order to retain the same call path and no major changes, written some macros to access those routines. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 0a2d72f..a503661 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -275,25 +275,20 @@ static inline nid_t get_nid(struct page *p, int off, bool i) * - Mark cold node blocks in their node footer * - Mark cold data pages in page cache */ -static inline int is_cold_file(struct inode *inode) +static inline int is_file(struct inode *inode, int type) { - return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT; + return F2FS_I(inode)->i_advise & type; } -static inline void set_cold_file(struct inode *inode) +static inline void set_file(struct inode *inode, int type) { - F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT; + F2FS_I(inode)->i_advise |= type; } -static inline int is_cp_file(struct inode *inode) -{ - return F2FS_I(inode)->i_advise & FADVISE_CP_BIT; -} - -static inline void set_cp_file(struct inode *inode) -{ - F2FS_I(inode)->i_advise |= FADVISE_CP_BIT; -} +#define is_cold_file(inode) is_file(inode, FADVISE_COLD_BIT) +#define is_cp_file(inode) is_file(inode, FADVISE_CP_BIT) +#define set_cold_file(inode) set_file(inode, FADVISE_COLD_BIT) +#define set_cp_file(inode) set_file(inode, FADVISE_CP_BIT) static inline int is_cold_data(struct page *page) { @@ -310,29 +305,16 @@ static inline void clear_cold_data(struct page *page) ClearPageChecked(page); } -static inline int is_cold_node(struct page *page) +static inline int is_node(struct page *page, int type) { void *kaddr = page_address(page); struct f2fs_node *rn = (struct f2fs_node *)kaddr; - unsigned int flag = le32_to_cpu(rn->footer.flag); - return flag & (0x1 << COLD_BIT_SHIFT); + return le32_to_cpu(rn->footer.flag) & (1 << type); } -static inline unsigned char is_fsync_dnode(struct page *page) -{ - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; - unsigned int flag = le32_to_cpu(rn->footer.flag); - return flag & (0x1 << FSYNC_BIT_SHIFT); -} - -static inline unsigned char is_dent_dnode(struct page *page) -{ - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; - unsigned int flag = le32_to_cpu(rn->footer.flag); - return flag & (0x1 << DENT_BIT_SHIFT); -} +#define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) +#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) +#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) static inline void set_cold_node(struct inode *inode, struct page *page) { @@ -346,26 +328,15 @@ static inline void set_cold_node(struct inode *inode, struct page *page) rn->footer.flag = cpu_to_le32(flag); } -static inline void set_fsync_mark(struct page *page, int mark) -{ - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; - unsigned int flag = le32_to_cpu(rn->footer.flag); - if (mark) - flag |= (0x1 << FSYNC_BIT_SHIFT); - else - flag &= ~(0x1 << FSYNC_BIT_SHIFT); - rn->footer.flag = cpu_to_le32(flag); -} - -static inline void set_dentry_mark(struct page *page, int mark) +static inline void set_mark(struct page *page, int mark, int type) { - void *kaddr = page_address(page); - struct f2fs_node *rn = (struct f2fs_node *)kaddr; + struct f2fs_node *rn = (struct f2fs_node *)page_address(page); unsigned int flag = le32_to_cpu(rn->footer.flag); if (mark) - flag |= (0x1 << DENT_BIT_SHIFT); + flag |= (0x1 << type); else - flag &= ~(0x1 << DENT_BIT_SHIFT); + flag &= ~(0x1 << type); rn->footer.flag = cpu_to_le32(flag); } +#define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) +#define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) -- cgit v0.10.2 From 7a267f8d7463346a139e49c8beac1b8bfe32ef97 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 26 May 2013 11:05:32 +0900 Subject: f2fs: return proper error from start_gc_thread when there is an error from kthread_run, then return proper error rather than returning -ENOMEM. Signed-off-by: Namjae Jeon Signed-off-by: Amit Sahrawat Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 25b083c..ddc2c67 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -91,23 +91,28 @@ int start_gc_thread(struct f2fs_sb_info *sbi) { struct f2fs_gc_kthread *gc_th; dev_t dev = sbi->sb->s_bdev->bd_dev; + int err = 0; if (!test_opt(sbi, BG_GC)) - return 0; + goto out; gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); - if (!gc_th) - return -ENOMEM; + if (!gc_th) { + err = -ENOMEM; + goto out; + } sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { + err = PTR_ERR(gc_th->f2fs_gc_task); kfree(gc_th); sbi->gc_thread = NULL; - return -ENOMEM; } - return 0; + +out: + return err; } void stop_gc_thread(struct f2fs_sb_info *sbi) -- cgit v0.10.2 From 3b10b1fd2b6bc82eeb346ff6a6621d065908ea6d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 May 2013 10:32:01 +0900 Subject: f2fs: iput only if whole data blocks are flushed If there remains some unwritten blocks from the recovery, we should not call iput on that directory inode. Otherwise, we can loose some dentry blocks after the recovery. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 01ddc91..0d3701d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -501,8 +501,10 @@ void remove_dirty_dir_inode(struct inode *inode) return; spin_lock(&sbi->dir_inode_lock); - if (atomic_read(&F2FS_I(inode)->dirty_dents)) - goto out; + if (atomic_read(&F2FS_I(inode)->dirty_dents)) { + spin_unlock(&sbi->dir_inode_lock); + return; + } list_for_each(this, head) { struct dir_inode_entry *entry; @@ -516,7 +518,6 @@ void remove_dirty_dir_inode(struct inode *inode) break; } } -out: spin_unlock(&sbi->dir_inode_lock); /* Only from the recovery routine */ -- cgit v0.10.2 From 6b8213d9a4ca0d7a02a38757068ba79cd96206f0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 28 May 2013 09:19:22 +0900 Subject: f2fs: fix dentry recovery routine The error scenario is: 1. create /a (1.a link /a /b) 2. sync 3. unlinke /a 4. create /a 5. fsync /a 6. Sudden power-off When the f2fs recovers the fsynced dentry, /a, we discover an exsiting dentry at f2fs_find_entry() in recover_dentry(). In such the case, we should unlink the existing dentry and its inode and then recover newly created dentry. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 0dd2ce1..539ca32 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -44,9 +44,10 @@ static int recover_dentry(struct page *ipage, struct inode *inode) struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; struct f2fs_inode *raw_inode = &(raw_node->i); nid_t pino = le32_to_cpu(raw_inode->i_pino); + struct f2fs_dir_entry *de; struct qstr name; struct page *page; - struct inode *dir; + struct inode *dir, *einode; int err = 0; dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); @@ -61,13 +62,26 @@ static int recover_dentry(struct page *ipage, struct inode *inode) name.len = le32_to_cpu(raw_inode->i_namelen); name.name = raw_inode->i_name; - - if (f2fs_find_entry(dir, &name, &page)) { +retry: + de = f2fs_find_entry(dir, &name, &page); + if (de && inode->i_ino == le32_to_cpu(de->ino)) { kunmap(page); f2fs_put_page(page, 0); - } else { - err = __f2fs_add_link(dir, &name, inode); + goto out; + } + if (de) { + einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); + if (IS_ERR(einode)) { + WARN_ON(1); + if (PTR_ERR(einode) == -ENOENT) + err = -EEXIST; + goto out; + } + f2fs_delete_entry(de, page, einode); + iput(einode); + goto retry; } + err = __f2fs_add_link(dir, &name, inode); out: f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " "ino = %x, name = %s, dir = %lx, err = %d", -- cgit v0.10.2 From afc3eda2a897b402e59f42f22eb89bba52297dd3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 28 May 2013 09:59:27 +0900 Subject: f2fs: fix incorrect iputs during the dentry recovery - iget/iput flow in the dentry recovery process 1. *dir* = f2fs_iget 2. set FI_DELAY_IPUT to *dir* 3. add *dir* to the dirty_dir_list - __f2fs_add_link - recover_dentry) 4. iput *dir* by remove_dirty_dir_inode - sync_dirty_dir_inodes - write_chekcpoint If *dir*'s i_count is not 1 (i.e., root dir), remove_dirty_dir_inode is called later and then iput is triggered again due to the FI_DELAY_IPUT flag. So, let's unset the flag properly once iput is triggered. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0d3701d..6f56e57 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -521,8 +521,10 @@ void remove_dirty_dir_inode(struct inode *inode) spin_unlock(&sbi->dir_inode_lock); /* Only from the recovery routine */ - if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) + if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { + clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); iput(inode); + } } struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) -- cgit v0.10.2 From 83d5d6f66b375f21bee4c2e17178f7c073a66301 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 28 May 2013 12:25:47 +0900 Subject: f2fs: cover cp_file information with ilock If a file is linked with other files, it should be checkpointed at every fsync calls. For this, we use set_cp_file() with FADVISE_CP_BIT, but previously we didn't cover the flag by the global lock. This patch fixes that the inode page stores this correctly. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b278bfb..67e2d13 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -346,8 +346,14 @@ static struct page *init_inode_metadata(struct inode *inode, init_dent_inode(name, page); - if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) + /* + * This file should be checkpointed during fsync. + * We lost i_pino from now on. + */ + if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { + set_cp_file(inode); inc_nlink(inode); + } return page; error: diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index efe0a12..1fe1502 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -181,12 +181,6 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, if (err) goto out; - /* - * This file should be checkpointed during fsync. - * We lost i_pino from now on. - */ - set_cp_file(inode); - d_instantiate(dentry, inode); return 0; out: -- cgit v0.10.2 From 1e03e38b35b8e72d65fd5d931627bd6ff02926c1 Mon Sep 17 00:00:00 2001 From: Jason Hrycay Date: Fri, 31 May 2013 12:45:11 -0500 Subject: f2fs: handle errors from get_node_page calls Add check for error pointers returned from get_node_page in order to avoid dereferencing a bad address on the next use. Signed-off-by: Jason Hrycay Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 0b02dce..ae61f35 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -218,6 +218,8 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name, return -ENODATA; page = get_node_page(sbi, fi->i_xattr_nid); + if (IS_ERR(page)) + return PTR_ERR(page); base_addr = page_address(page); list_for_each_xattr(entry, base_addr) { @@ -268,6 +270,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) return 0; page = get_node_page(sbi, fi->i_xattr_nid); + if (IS_ERR(page)) + return PTR_ERR(page); base_addr = page_address(page); list_for_each_xattr(entry, base_addr) { -- cgit v0.10.2 From b2b3460a9404136e0a99b9f7cb56e08ec41ea933 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 1 Jun 2013 16:20:26 +0900 Subject: f2fs: reorganise the function get_victim_by_default Fix the function get_victim_by_default, where it checks for the condition that p.min_segno != NULL_SEGNO as shown: if (p.min_segno != NULL_SEGNO) goto got_it; and if above condition is true then got_it: if (p.min_segno != NULL_SEGNO) { So this condition is being checked twice. Hence move the goto statement after the if condition so that duplication of condition check is avoided. Also this function makes a call to get_max_cost() to compute the max cost based on the f2fs_sbi_info and victim policy. Since get_max_cost depends on on three parameters of victim_sel_policy => alloc_mode, gc_mode & ofs_unit, once this victim policy is initialised, these value will not change till the execution time of get_victim_by_default() & also f2fs_sbi_info structure parameters will not change. Hence making calls to get_max_cost() in while loop does not seems to be a good point. Instead we can call it once in begining and store the results in local variable, which later can serve our purpose for comparing the cost with max cost inside the while loop. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ddc2c67..3a9df36 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -241,14 +241,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct victim_sel_policy p; - unsigned int secno; + unsigned int secno, max_cost; int nsearched = 0; p.alloc_mode = alloc_mode; select_policy(sbi, gc_type, type, &p); p.min_segno = NULL_SEGNO; - p.min_cost = get_max_cost(sbi, &p); + p.min_cost = max_cost = get_max_cost(sbi, &p); mutex_lock(&dirty_i->seglist_lock); @@ -287,7 +287,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_cost = cost; } - if (cost == get_max_cost(sbi, &p)) + if (cost == max_cost) continue; if (nsearched++ >= MAX_VICTIM_SEARCH) { @@ -295,8 +295,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, break; } } -got_it: if (p.min_segno != NULL_SEGNO) { +got_it: if (p.alloc_mode == LFS) { secno = GET_SECNO(sbi, p.min_segno); if (gc_type == FG_GC) -- cgit v0.10.2 From 5deb82671ae344b28b4e744020afcbc76df1779b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 5 Jun 2013 17:42:45 +0900 Subject: f2fs: fix iget/iput of dir during recovery It is possible that iput is skipped after iget during the recovery. In recover_dentry(), dir = f2fs_iget(); ... if (de && inode->i_ino == le32_to_cpu(de->ino)) goto out; In this case, this dir is not able to be added in dirty_dir_inode_list. The actual linking is done only when set_page_dirty() is called. So let's add this newly got inode into the list explicitly, and put it at the end of the recovery routine. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6f56e57..9a77509 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -450,13 +450,30 @@ fail_no_cp: return -EINVAL; } -void set_dirty_dir_page(struct inode *inode, struct page *page) +static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct list_head *head = &sbi->dir_inode_list; - struct dir_inode_entry *new; struct list_head *this; + list_for_each(this, head) { + struct dir_inode_entry *entry; + entry = list_entry(this, struct dir_inode_entry, list); + if (entry->inode == inode) + return -EEXIST; + } + list_add_tail(&new->list, head); +#ifdef CONFIG_F2FS_STAT_FS + sbi->n_dirty_dirs++; +#endif + return 0; +} + +void set_dirty_dir_page(struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct dir_inode_entry *new; + if (!S_ISDIR(inode->i_mode)) return; retry: @@ -469,25 +486,31 @@ retry: INIT_LIST_HEAD(&new->list); spin_lock(&sbi->dir_inode_lock); - list_for_each(this, head) { - struct dir_inode_entry *entry; - entry = list_entry(this, struct dir_inode_entry, list); - if (entry->inode == inode) { - kmem_cache_free(inode_entry_slab, new); - goto out; - } - } - list_add_tail(&new->list, head); -#ifdef CONFIG_F2FS_STAT_FS - sbi->n_dirty_dirs++; -#endif + if (__add_dirty_inode(inode, new)) + kmem_cache_free(inode_entry_slab, new); - BUG_ON(!S_ISDIR(inode->i_mode)); -out: inc_page_count(sbi, F2FS_DIRTY_DENTS); inode_inc_dirty_dents(inode); SetPagePrivate(page); + spin_unlock(&sbi->dir_inode_lock); +} + +void add_dirty_dir_inode(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct dir_inode_entry *new; +retry: + new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); + if (!new) { + cond_resched(); + goto retry; + } + new->inode = inode; + INIT_LIST_HEAD(&new->list); + spin_lock(&sbi->dir_inode_lock); + if (__add_dirty_inode(inode, new)) + kmem_cache_free(inode_entry_slab, new); spin_unlock(&sbi->dir_inode_lock); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 40b137a..d6e63da 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1030,6 +1030,7 @@ void remove_orphan_inode(struct f2fs_sb_info *, nid_t); int recover_orphan_inodes(struct f2fs_sb_info *); int get_valid_checkpoint(struct f2fs_sb_info *); void set_dirty_dir_page(struct inode *, struct page *); +void add_dirty_dir_inode(struct inode *); void remove_dirty_dir_inode(struct inode *); struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); void sync_dirty_dir_inodes(struct f2fs_sb_info *); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 539ca32..ddde14f 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -58,6 +58,7 @@ static int recover_dentry(struct page *ipage, struct inode *inode) goto out; } set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); + add_dirty_dir_inode(dir); } name.len = le32_to_cpu(raw_inode->i_namelen); -- cgit v0.10.2 From 8ae8f1627f39bae505b90cade50cd8a911b8bda6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 3 Jun 2013 19:46:19 +0900 Subject: f2fs: support xattr security labels This patch adds the support of security labels for f2fs, which will be used by Linus Security Models (LSMs). Quote from http://en.wikipedia.org/wiki/Linux_Security_Modules: "Linux Security Modules (LSM) is a framework that allows the Linux kernel to support a variety of computer security models while avoiding favoritism toward any single security implementation. The framework is licensed under the terms of the GNU General Public License and is standard part of the Linux kernel since Linux 2.6. AppArmor, SELinux, Smack and TOMOYO Linux are the currently accepted modules in the official kernel.". Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index fd27e7e..e06e099 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -51,3 +51,15 @@ config F2FS_FS_POSIX_ACL Linux website . If you don't know what Access Control Lists are, say N + +config F2FS_FS_SECURITY + bool "F2FS Security Labels" + depends on F2FS_FS_XATTR + help + Security labels provide an access control facility to support Linux + Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO + Linux. This option enables an extended attribute handler for file + security labels in the f2fs filesystem, so that it requires enabling + the extended attribute support in advance. + + If you are not using a security module, say N. diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 44abc2f..b7826ec 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -250,7 +250,7 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) } } - error = f2fs_setxattr(inode, name_index, "", value, size); + error = f2fs_setxattr(inode, name_index, "", value, size, NULL); kfree(value); if (!error) diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 67e2d13..eaea5b5 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -13,6 +13,7 @@ #include "f2fs.h" #include "node.h" #include "acl.h" +#include "xattr.h" static unsigned long dir_blocks(struct inode *inode) { @@ -334,6 +335,10 @@ static struct page *init_inode_metadata(struct inode *inode, if (err) goto error; + err = f2fs_init_security(inode, dir, name, page); + if (err) + goto error; + wait_on_page_writeback(page); } else { page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d6e63da..4f2c209 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -968,7 +968,7 @@ int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); int truncate_inode_blocks(struct inode *, pgoff_t); int remove_inode_page(struct inode *); struct page *new_inode_page(struct inode *, const struct qstr *); -struct page *new_node_page(struct dnode_of_data *, unsigned int); +struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); void ra_node_page(struct f2fs_sb_info *, nid_t); struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); struct page *get_node_page_ra(struct page *, int); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 5a59780..b02440c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -433,7 +433,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) } dn->nid = nids[i]; - npage[i] = new_node_page(dn, noffset[i]); + npage[i] = new_node_page(dn, noffset[i], NULL); if (IS_ERR(npage[i])) { alloc_nid_failed(sbi, nids[i]); err = PTR_ERR(npage[i]); @@ -814,10 +814,11 @@ struct page *new_inode_page(struct inode *inode, const struct qstr *name) set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); /* caller should f2fs_put_page(page, 1); */ - return new_node_page(&dn, 0); + return new_node_page(&dn, 0, NULL); } -struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) +struct page *new_node_page(struct dnode_of_data *dn, + unsigned int ofs, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); struct address_space *mapping = sbi->node_inode->i_mapping; @@ -850,7 +851,10 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) set_cold_node(dn->inode, page); dn->node_page = page; - sync_inode_page(dn); + if (ipage) + update_inode(dn->inode, ipage); + else + sync_inode_page(dn); set_page_dirty(page); if (ofs == 0) inc_valid_inode_count(sbi); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index ae61f35..3ab07ec 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -20,6 +20,7 @@ */ #include #include +#include #include "f2fs.h" #include "xattr.h" @@ -43,6 +44,10 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, prefix = XATTR_TRUSTED_PREFIX; prefix_len = XATTR_TRUSTED_PREFIX_LEN; break; + case F2FS_XATTR_INDEX_SECURITY: + prefix = XATTR_SECURITY_PREFIX; + prefix_len = XATTR_SECURITY_PREFIX_LEN; + break; default: return -EINVAL; } @@ -50,7 +55,7 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, total_len = prefix_len + name_len + 1; if (list && total_len <= list_size) { memcpy(list, prefix, prefix_len); - memcpy(list+prefix_len, name, name_len); + memcpy(list + prefix_len, name, name_len); list[prefix_len + name_len] = '\0'; } return total_len; @@ -70,13 +75,14 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, if (!capable(CAP_SYS_ADMIN)) return -EPERM; break; + case F2FS_XATTR_INDEX_SECURITY: + break; default: return -EINVAL; } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_getxattr(dentry->d_inode, type, name, - buffer, size); + return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); } static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, @@ -93,13 +99,15 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, if (!capable(CAP_SYS_ADMIN)) return -EPERM; break; + case F2FS_XATTR_INDEX_SECURITY: + break; default: return -EINVAL; } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_setxattr(dentry->d_inode, type, name, value, size); + return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); } static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, @@ -145,6 +153,31 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, return 0; } +#ifdef CONFIG_F2FS_FS_SECURITY +static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, + void *page) +{ + const struct xattr *xattr; + int err = 0; + + for (xattr = xattr_array; xattr->name != NULL; xattr++) { + err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, + xattr->name, xattr->value, + xattr->value_len, (struct page *)page); + if (err < 0) + break; + } + return err; +} + +int f2fs_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct page *ipage) +{ + return security_inode_init_security(inode, dir, qstr, + &f2fs_initxattrs, ipage); +} +#endif + const struct xattr_handler f2fs_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, .flags = F2FS_XATTR_INDEX_USER, @@ -169,6 +202,14 @@ const struct xattr_handler f2fs_xattr_advise_handler = { .set = f2fs_xattr_advise_set, }; +const struct xattr_handler f2fs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .flags = F2FS_XATTR_INDEX_SECURITY, + .list = f2fs_xattr_generic_list, + .get = f2fs_xattr_generic_get, + .set = f2fs_xattr_generic_set, +}; + static const struct xattr_handler *f2fs_xattr_handler_map[] = { [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, #ifdef CONFIG_F2FS_FS_POSIX_ACL @@ -176,6 +217,9 @@ static const struct xattr_handler *f2fs_xattr_handler_map[] = { [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, #endif [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, +#ifdef CONFIG_F2FS_FS_SECURITY + [F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler, +#endif [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, }; @@ -186,6 +230,9 @@ const struct xattr_handler *f2fs_xattr_handlers[] = { &f2fs_xattr_acl_default_handler, #endif &f2fs_xattr_trusted_handler, +#ifdef CONFIG_F2FS_FS_SECURITY + &f2fs_xattr_security_handler, +#endif &f2fs_xattr_advise_handler, NULL, }; @@ -300,7 +347,7 @@ cleanup: } int f2fs_setxattr(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len) + const void *value, size_t value_len, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct f2fs_inode_info *fi = F2FS_I(inode); @@ -339,7 +386,7 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); mark_inode_dirty(inode); - page = new_node_page(&dn, XATTR_NODE_OFFSET); + page = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); if (IS_ERR(page)) { alloc_nid_failed(sbi, fi->i_xattr_nid); fi->i_xattr_nid = 0; @@ -439,7 +486,10 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, inode->i_ctime = CURRENT_TIME; clear_inode_flag(fi, FI_ACL_MODE); } - update_inode_page(inode); + if (ipage) + update_inode(inode, ipage); + else + update_inode_page(inode); mutex_unlock_op(sbi, ilock); return 0; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 49c9558..3c0817b 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -112,21 +112,19 @@ extern const struct xattr_handler f2fs_xattr_trusted_handler; extern const struct xattr_handler f2fs_xattr_acl_access_handler; extern const struct xattr_handler f2fs_xattr_acl_default_handler; extern const struct xattr_handler f2fs_xattr_advise_handler; +extern const struct xattr_handler f2fs_xattr_security_handler; extern const struct xattr_handler *f2fs_xattr_handlers[]; -extern int f2fs_setxattr(struct inode *inode, int name_index, const char *name, - const void *value, size_t value_len); -extern int f2fs_getxattr(struct inode *inode, int name_index, const char *name, - void *buffer, size_t buffer_size); -extern ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, - size_t buffer_size); - +extern int f2fs_setxattr(struct inode *, int, const char *, + const void *, size_t, struct page *); +extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); +extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); #else #define f2fs_xattr_handlers NULL static inline int f2fs_setxattr(struct inode *inode, int name_index, - const char *name, const void *value, size_t value_len) + const char *name, const void *value, size_t value_len) { return -EOPNOTSUPP; } @@ -142,4 +140,14 @@ static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, } #endif +#ifdef CONFIG_F2FS_FS_SECURITY +extern int f2fs_init_security(struct inode *, struct inode *, + const struct qstr *, struct page *); +#else +static inline int f2fs_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr, struct page *ipage) +{ + return 0; +} +#endif #endif /* __F2FS_XATTR_H__ */ -- cgit v0.10.2 From 5fb08372a689360b7db51b0cfb9a068fddf279a2 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Fri, 7 Jun 2013 14:16:53 +0800 Subject: f2fs: set sb->s_fs_info before calling parse_options() In f2fs_fill_super(), set sb->s_fs_info before calling parse_options(), then we can get f2fs_sb_info via F2FS_SB(sb) in parse_options(). So that the second argument "sbi" of func parse_options() is no longer needed. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 3ac305d..4fdcdff 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -303,9 +303,9 @@ static const struct export_operations f2fs_export_ops = { .get_parent = f2fs_get_parent, }; -static int parse_options(struct super_block *sb, struct f2fs_sb_info *sbi, - char *options) +static int parse_options(struct super_block *sb, char *options) { + struct f2fs_sb_info *sbi = F2FS_SB(sb); substring_t args[MAX_OPT_ARGS]; char *p; int arg = 0; @@ -541,6 +541,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) if (err) goto free_sb_buf; } + sb->s_fs_info = sbi; /* init some FS parameters */ sbi->active_logs = NR_CURSEG_TYPE; @@ -553,7 +554,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) set_opt(sbi, POSIX_ACL); #endif /* parse mount options */ - err = parse_options(sb, sbi, (char *)data); + err = parse_options(sb, (char *)data); if (err) goto free_sb_buf; @@ -565,7 +566,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sb->s_xattr = f2fs_xattr_handlers; sb->s_export_op = &f2fs_export_ops; sb->s_magic = F2FS_SUPER_MAGIC; - sb->s_fs_info = sbi; sb->s_time_gran = 1; sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); -- cgit v0.10.2 From 2d4d9fb591fe83d9f0559afaa9736ebc8edad0aa Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Jun 2013 16:33:07 +0900 Subject: f2fs: fix i_blocks translation on various types of files Basically an inode manages the number of allocated blocks with inode->i_blocks which is represented in a unit of sectors, not file system blocks. But, f2fs has used i_blocks in a unit of file system blocks, and f2fs_getattr translates it to the number of sectors when fstat is called. However, previously f2fs_file_inode_operations only has this, so this patch adds it to all the types of inode_operations. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4f2c209..c344a4d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -903,6 +903,7 @@ static inline int f2fs_readonly(struct super_block *sb) int f2fs_sync_file(struct file *, loff_t, loff_t, int); void truncate_data_blocks(struct dnode_of_data *); void f2fs_truncate(struct inode *); +int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); int f2fs_setattr(struct dentry *, struct iattr *); int truncate_hole(struct inode *, pgoff_t, pgoff_t); int truncate_data_blocks_range(struct dnode_of_data *, int); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index deefd25..8d2fce9 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -291,7 +291,7 @@ void f2fs_truncate(struct inode *inode) } } -static int f2fs_getattr(struct vfsmount *mnt, +int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 1fe1502..810444e 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -490,6 +490,7 @@ const struct inode_operations f2fs_dir_inode_operations = { .rmdir = f2fs_rmdir, .mknod = f2fs_mknod, .rename = f2fs_rename, + .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, #ifdef CONFIG_F2FS_FS_XATTR @@ -504,6 +505,7 @@ const struct inode_operations f2fs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, + .getattr = f2fs_getattr, .setattr = f2fs_setattr, #ifdef CONFIG_F2FS_FS_XATTR .setxattr = generic_setxattr, @@ -514,6 +516,7 @@ const struct inode_operations f2fs_symlink_inode_operations = { }; const struct inode_operations f2fs_special_inode_operations = { + .getattr = f2fs_getattr, .setattr = f2fs_setattr, .get_acl = f2fs_get_acl, #ifdef CONFIG_F2FS_FS_XATTR -- cgit v0.10.2 From 699489bbbea4fc3b9b735d69941cf4fca91ce1d5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Jun 2013 22:08:23 +0900 Subject: f2fs: sync dir->i_size with its block allocation If new dentry block is allocated and its i_size is updated, we should update its inode block together in order to sync i_size and its block allocation. Otherwise, we can loose additional dentry block due to the unconsistent i_size. Errorneous Scenario ------------------- In the recovery routine, - recovery_dentry | - __f2fs_add_link | | - get_new_data_page | | | - i_size_write(new_i_size) | | | - mark_inode_dirty_sync(dir) | | - update_parent_metadata | | | - mark_inode_dirty(dir) | - write_checkpoint - sync_dirty_dir_inodes - filemap_flush(dentry_blocks) - f2fs_write_data_page - skip to write the last dentry block due to index < i_size In the above flow, new_i_size is not updated to its inode block so that the last dentry block will be lost accordingly. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 93917e3..5b145fc 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -339,6 +339,8 @@ repeat: if (new_i_size && i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); + /* Only the directory inode sets new_i_size */ + set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); mark_inode_dirty_sync(inode); } return page; diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index eaea5b5..69ca049 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -370,22 +370,20 @@ error: static void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { - bool need_dir_update = false; - if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { if (S_ISDIR(inode->i_mode)) { inc_nlink(dir); - need_dir_update = true; + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); } dir->i_mtime = dir->i_ctime = CURRENT_TIME; if (F2FS_I(dir)->i_current_depth != current_depth) { F2FS_I(dir)->i_current_depth = current_depth; - need_dir_update = true; + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); } - if (need_dir_update) + if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) update_inode_page(dir); else mark_inode_dirty(dir); @@ -502,6 +500,7 @@ add_dentry: update_parent_metadata(dir, inode, current_depth); fail: + clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); kunmap(dentry_page); f2fs_put_page(dentry_page, 1); return err; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c344a4d..27edf59 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -859,6 +859,7 @@ enum { FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ + FI_UPDATE_DIR, /* should update inode block for consistency */ FI_DELAY_IPUT, /* used for the recovery */ }; -- cgit v0.10.2 From 6a3e8ef0de1e548d1cf9bcf51d9b7b6f4141fec5 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 8 Jun 2013 21:25:28 +0900 Subject: f2fs: use the F2FS specific flags in f2fs_ioctl() In f2fs_ioctl() function, it is using generic flags. Since F2FS specific flags are defined. So lets use those flags. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8d2fce9..85b665d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -575,10 +575,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) int ret; switch (cmd) { - case FS_IOC_GETFLAGS: + case F2FS_IOC_GETFLAGS: flags = fi->i_flags & FS_FL_USER_VISIBLE; return put_user(flags, (int __user *) arg); - case FS_IOC_SETFLAGS: + case F2FS_IOC_SETFLAGS: { unsigned int oldflags; -- cgit v0.10.2 From d7cc950b4c910e4440485be784493880a0d09086 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sat, 8 Jun 2013 21:25:40 +0900 Subject: f2fs: optimise the truncate_data_blocks_range() range The function truncate_data_blocks_range() decrements the valid block count of inode via dec_valid_block_count(). Since this function updates the i_blocks field of inode, we can update this field once we have calculated total the number of blocks to be freed. Therefore we can decrement valid blocks outside of the for loop. if (nr_free) { + dec_valid_block_count(sbi, dn->inode, nr_free); set_page_dirty(dn->node_page); sync_inode_page(dn); } 'nr_free' tells the total number of blocks freed. So, we can just directly pass this value to dec_valid_block_count() and update the i_blocks. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 85b665d..2f649b8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -185,10 +185,10 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count) update_extent_cache(NULL_ADDR, dn); invalidate_blocks(sbi, blkaddr); - dec_valid_block_count(sbi, dn->inode, 1); nr_free++; } if (nr_free) { + dec_valid_block_count(sbi, dn->inode, nr_free); set_page_dirty(dn->node_page); sync_inode_page(dn); } -- cgit v0.10.2 From b3783873cc2214542d3da9a1aa800b20919d5889 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 10 Jun 2013 09:17:01 +0900 Subject: f2fs: avoid freqeunt write_inode calls If update_inode is called, we don't need to do write_inode. So, let's use a *dirty* flag for each inode. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 27edf59..a05aa65 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -856,6 +856,7 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ + FI_DIRTY_INODE, /* indicate inode is dirty or not */ FI_INC_LINK, /* need to increment i_nlink */ FI_ACL_MODE, /* indicate acl mode */ FI_NO_ALLOC, /* should not allocate any blocks */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2f649b8..fda226f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -147,6 +147,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } else { /* if there is no written node page, write its inode page */ while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { + mark_inode_dirty_sync(inode); ret = f2fs_write_inode(inode, NULL); if (ret) goto out; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index b44a4c1..2b2d45d1 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -192,6 +192,7 @@ void update_inode(struct inode *inode, struct page *node_page) set_cold_node(inode, node_page); set_page_dirty(node_page); + clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); } int update_inode_page(struct inode *inode) @@ -217,6 +218,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) inode->i_ino == F2FS_META_INO(sbi)) return 0; + if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) + return 0; + if (wbc) f2fs_balance_fs(sbi); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4fdcdff..ba56549 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -112,6 +112,17 @@ static int f2fs_drop_inode(struct inode *inode) return generic_drop_inode(inode); } +/* + * f2fs_dirty_inode() is called from __mark_inode_dirty() + * + * We should call set_dirty_inode to write the dirty inode through write_inode. + */ +static void f2fs_dirty_inode(struct inode *inode, int flags) +{ + set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); + return; +} + static void f2fs_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); @@ -249,6 +260,7 @@ static struct super_operations f2fs_sops = { .drop_inode = f2fs_drop_inode, .destroy_inode = f2fs_destroy_inode, .write_inode = f2fs_write_inode, + .dirty_inode = f2fs_dirty_inode, .show_options = f2fs_show_options, .evict_inode = f2fs_evict_inode, .put_super = f2fs_put_super, -- cgit v0.10.2 From e79efe3b69d6454eb8ec734a24d49f0f4c7d26f5 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Thu, 13 Jun 2013 16:59:27 +0800 Subject: f2fs: remove unnecessary parameter "offset" from __add_sum_entry() We can get the value directly from pointer "curseg". Signed-off-by: Haicheng Li Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index be668ff..77f31c0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -255,11 +255,11 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) * This function should be resided under the curseg_mutex lock */ static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, - struct f2fs_summary *sum, unsigned short offset) + struct f2fs_summary *sum) { struct curseg_info *curseg = CURSEG_I(sbi, type); void *addr = curseg->sum_blk; - addr += offset * sizeof(struct f2fs_summary); + addr += curseg->next_blkoff * sizeof(struct f2fs_summary); memcpy(addr, sum, sizeof(struct f2fs_summary)); return; } @@ -845,7 +845,7 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, * because, this function updates a summary entry in the * current summary block. */ - __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + __add_sum_entry(sbi, type, sum); mutex_lock(&sit_i->sentry_lock); __refresh_next_blkoff(sbi, curseg); @@ -946,7 +946,7 @@ void recover_data_page(struct f2fs_sb_info *sbi, curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & (sbi->blocks_per_seg - 1); - __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + __add_sum_entry(sbi, type, sum); refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); @@ -983,7 +983,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, } curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & (sbi->blocks_per_seg - 1); - __add_sum_entry(sbi, type, sum, curseg->next_blkoff); + __add_sum_entry(sbi, type, sum); /* change the current log to the next block addr in advance */ if (next_segno != segno) { -- cgit v0.10.2 From 8d8451af6875f8841dc20987d1363405020a9172 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Thu, 13 Jun 2013 16:59:28 +0800 Subject: f2fs: make locate_dirty_segment() as static It's used only locally and could be static. Signed-off-by: Haicheng Li Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a05aa65..3e7cb33 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -996,7 +996,6 @@ void destroy_node_manager_caches(void); */ void f2fs_balance_fs(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); -void locate_dirty_segment(struct f2fs_sb_info *, unsigned int); void clear_prefree_segments(struct f2fs_sb_info *); int npages_for_summary_flush(struct f2fs_sb_info *); void allocate_new_segments(struct f2fs_sb_info *); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 77f31c0..b15debc 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -94,7 +94,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, * Adding dirty entry into seglist is not critical operation. * If a given segment is one of current working segments, it won't be added. */ -void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) +static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned short valid_blocks; -- cgit v0.10.2 From b25958b6ecf1dce087e62b9aa27cf8f2fe9b5c86 Mon Sep 17 00:00:00 2001 From: Haicheng Li Date: Thu, 13 Jun 2013 16:59:29 +0800 Subject: f2fs: optimize do_write_data_page() Since "need_inplace_update() == true" is a very rare case, using unlikely() to give compiler a chance to optimize the code. Signed-off-by: Haicheng Li Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5b145fc..6d4a743 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -497,8 +497,9 @@ int do_write_data_page(struct page *page) * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (old_blk_addr != NEW_ADDR && !is_cold_data(page) && - need_inplace_update(inode)) { + if (unlikely(old_blk_addr != NEW_ADDR && + !is_cold_data(page) && + need_inplace_update(inode))) { rewrite_data_page(F2FS_SB(inode->i_sb), page, old_blk_addr); } else { -- cgit v0.10.2 From 354a3399dc6f7e556d04e1c731cd50e08eeb44bd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 14 Jun 2013 08:52:35 +0900 Subject: f2fs: recover wrong pino after checkpoint during fsync If a file is linked, f2fs loose its parent inode number so that fsync calls for the linked file should do checkpoint all the time. But, if we can recover its parent inode number after the checkpoint, we can adjust roll-forward mechanism for the further fsync calls, which is able to improve the fsync performance significatly. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 69ca049..4f21452 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -356,7 +356,7 @@ static struct page *init_inode_metadata(struct inode *inode, * We lost i_pino from now on. */ if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { - set_cp_file(inode); + file_lost_pino(inode); inc_nlink(inode); } return page; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3e7cb33..863a5e91 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -151,7 +151,7 @@ struct extent_info { * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. */ #define FADVISE_COLD_BIT 0x01 -#define FADVISE_CP_BIT 0x02 +#define FADVISE_LOST_PINO_BIT 0x02 struct f2fs_inode_info { struct inode vfs_inode; /* serve a vfs inode */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index fda226f..d2d2b7d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -102,6 +102,24 @@ static const struct vm_operations_struct f2fs_file_vm_ops = { .remap_pages = generic_file_remap_pages, }; +static int get_parent_ino(struct inode *inode, nid_t *pino) +{ + struct dentry *dentry; + + inode = igrab(inode); + dentry = d_find_any_alias(inode); + iput(inode); + if (!dentry) + return 0; + + inode = igrab(dentry->d_parent->d_inode); + dput(dentry); + + *pino = inode->i_ino; + iput(inode); + return 1; +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -134,7 +152,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; - else if (is_cp_file(inode)) + else if (file_wrong_pino(inode)) need_cp = true; else if (!space_for_roll_forward(sbi)) need_cp = true; @@ -142,8 +160,19 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) need_cp = true; if (need_cp) { + nid_t pino; + /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); + if (file_wrong_pino(inode) && inode->i_nlink == 1 && + get_parent_ino(inode, &pino)) { + F2FS_I(inode)->i_pino = pino; + file_got_pino(inode); + mark_inode_dirty_sync(inode); + ret = f2fs_write_inode(inode, NULL); + if (ret) + goto out; + } } else { /* if there is no written node page, write its inode page */ while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 810444e..64c0716 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -112,7 +112,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, int count = le32_to_cpu(sbi->raw_super->extension_count); for (i = 0; i < count; i++) { if (is_multimedia_file(name, extlist[i])) { - set_cold_file(inode); + file_set_cold(inode); break; } } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index a503661..c65fb4f 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -285,10 +285,17 @@ static inline void set_file(struct inode *inode, int type) F2FS_I(inode)->i_advise |= type; } -#define is_cold_file(inode) is_file(inode, FADVISE_COLD_BIT) -#define is_cp_file(inode) is_file(inode, FADVISE_CP_BIT) -#define set_cold_file(inode) set_file(inode, FADVISE_COLD_BIT) -#define set_cp_file(inode) set_file(inode, FADVISE_CP_BIT) +static inline void clear_file(struct inode *inode, int type) +{ + F2FS_I(inode)->i_advise &= ~type; +} + +#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) +#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) +#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) +#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) +#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) +#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) static inline int is_cold_data(struct page *page) { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b15debc..0e1a60a 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -796,7 +796,7 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type) if (S_ISDIR(inode->i_mode)) return CURSEG_HOT_DATA; - else if (is_cold_data(page) || is_cold_file(inode)) + else if (is_cold_data(page) || file_is_cold(inode)) return CURSEG_COLD_DATA; else return CURSEG_WARM_DATA; -- cgit v0.10.2 From 696c018c7718f5e33e1107da19c4d64a25018878 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 16 Jun 2013 09:48:48 +0900 Subject: f2fs: add remount_fs callback support Add the f2fs_remount function call which will be used during the filesystem remounting. This function will help us to change the mount options specific to f2fs. Also modify the f2fs background_gc mount option, which will allow the user to dynamically trun on/off the garbage collection in f2fs based on the background_gc value. If background_gc=on, Garbage collection will be turned off & if background_gc=off, Garbage collection will be truned on. By default the garbage collection is on in f2fs. Change Log: v2: Incorporated the review comments by Gu Zheng. Removing the restore part for VFS flags Updating comments with proper flag conditions Display GC background option as ON/OFF Revised conditions to stop GC in case of remount v1: Initial changes for adding remount_fs callback support. Cc: Gu Zheng Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Reviewed-by: Gu Zheng [Jaegeuk Kim: change /** with /* for the coding style] Signed-off-by: Jaegeuk Kim diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index bd3c56c..b91e2f2 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -98,8 +98,13 @@ Cleaning Overhead MOUNT OPTIONS ================================================================================ -background_gc_off Turn off cleaning operations, namely garbage collection, - triggered in background when I/O subsystem is idle. +background_gc=%s Turn on/off cleaning operations, namely garbage + collection, triggered in background when I/O subsystem is + idle. If background_gc=on, it will turn on the garbage + collection and if background_gc=off, garbage collection + will be truned off. + Default value for this option is on. So garbage + collection is on by default. disable_roll_forward Disable the roll-forward recovery routine discard Issue discard/TRIM commands when a segment is cleaned. no_heap Disable heap-style segment allocation which finds free diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ba56549..75c7dc3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -34,7 +34,7 @@ static struct kmem_cache *f2fs_inode_cachep; enum { - Opt_gc_background_off, + Opt_gc_background, Opt_disable_roll_forward, Opt_discard, Opt_noheap, @@ -46,7 +46,7 @@ enum { }; static match_table_t f2fs_tokens = { - {Opt_gc_background_off, "background_gc_off"}, + {Opt_gc_background, "background_gc=%s"}, {Opt_disable_roll_forward, "disable_roll_forward"}, {Opt_discard, "discard"}, {Opt_noheap, "no_heap"}, @@ -76,6 +76,91 @@ static void init_once(void *foo) inode_init_once(&fi->vfs_inode); } +static int parse_options(struct super_block *sb, char *options) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + substring_t args[MAX_OPT_ARGS]; + char *p, *name; + int arg = 0; + + if (!options) + return 0; + + while ((p = strsep(&options, ",")) != NULL) { + int token; + if (!*p) + continue; + /* + * Initialize args struct so we know whether arg was + * found; some options take optional arguments. + */ + args[0].to = args[0].from = NULL; + token = match_token(p, f2fs_tokens, args); + + switch (token) { + case Opt_gc_background: + name = match_strdup(&args[0]); + + if (!name) + return -ENOMEM; + if (!strncmp(name, "on", 2)) + set_opt(sbi, BG_GC); + else if (!strncmp(name, "off", 3)) + clear_opt(sbi, BG_GC); + else { + kfree(name); + return -EINVAL; + } + kfree(name); + break; + case Opt_disable_roll_forward: + set_opt(sbi, DISABLE_ROLL_FORWARD); + break; + case Opt_discard: + set_opt(sbi, DISCARD); + break; + case Opt_noheap: + set_opt(sbi, NOHEAP); + break; +#ifdef CONFIG_F2FS_FS_XATTR + case Opt_nouser_xattr: + clear_opt(sbi, XATTR_USER); + break; +#else + case Opt_nouser_xattr: + f2fs_msg(sb, KERN_INFO, + "nouser_xattr options not supported"); + break; +#endif +#ifdef CONFIG_F2FS_FS_POSIX_ACL + case Opt_noacl: + clear_opt(sbi, POSIX_ACL); + break; +#else + case Opt_noacl: + f2fs_msg(sb, KERN_INFO, "noacl options not supported"); + break; +#endif + case Opt_active_logs: + if (args->from && match_int(args, &arg)) + return -EINVAL; + if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) + return -EINVAL; + sbi->active_logs = arg; + break; + case Opt_disable_ext_identify: + set_opt(sbi, DISABLE_EXT_IDENTIFY); + break; + default: + f2fs_msg(sb, KERN_ERR, + "Unrecognized mount option \"%s\" or missing value", + p); + return -EINVAL; + } + } + return 0; +} + static struct inode *f2fs_alloc_inode(struct super_block *sb) { struct f2fs_inode_info *fi; @@ -225,10 +310,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) { struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); - if (test_opt(sbi, BG_GC)) - seq_puts(seq, ",background_gc_on"); + if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) + seq_printf(seq, ",background_gc=%s", "on"); else - seq_puts(seq, ",background_gc_off"); + seq_printf(seq, ",background_gc=%s", "off"); if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, DISCARD)) @@ -255,6 +340,58 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) return 0; } +static int f2fs_remount(struct super_block *sb, int *flags, char *data) +{ + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct f2fs_mount_info org_mount_opt; + int err, active_logs; + + /* + * Save the old mount options in case we + * need to restore them. + */ + org_mount_opt = sbi->mount_opt; + active_logs = sbi->active_logs; + + /* parse mount options */ + err = parse_options(sb, data); + if (err) + goto restore_opts; + + /* + * Previous and new state of filesystem is RO, + * so no point in checking GC conditions. + */ + if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) + goto skip; + + /* + * We stop the GC thread if FS is mounted as RO + * or if background_gc = off is passed in mount + * option. Also sync the filesystem. + */ + if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { + if (sbi->gc_thread) { + stop_gc_thread(sbi); + f2fs_sync_fs(sb, 1); + } + } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { + err = start_gc_thread(sbi); + if (err) + goto restore_opts; + } +skip: + /* Update the POSIXACL Flag */ + sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | + (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); + return 0; + +restore_opts: + sbi->mount_opt = org_mount_opt; + sbi->active_logs = active_logs; + return err; +} + static struct super_operations f2fs_sops = { .alloc_inode = f2fs_alloc_inode, .drop_inode = f2fs_drop_inode, @@ -268,6 +405,7 @@ static struct super_operations f2fs_sops = { .freeze_fs = f2fs_freeze, .unfreeze_fs = f2fs_unfreeze, .statfs = f2fs_statfs, + .remount_fs = f2fs_remount, }; static struct inode *f2fs_nfs_get_inode(struct super_block *sb, @@ -315,79 +453,6 @@ static const struct export_operations f2fs_export_ops = { .get_parent = f2fs_get_parent, }; -static int parse_options(struct super_block *sb, char *options) -{ - struct f2fs_sb_info *sbi = F2FS_SB(sb); - substring_t args[MAX_OPT_ARGS]; - char *p; - int arg = 0; - - if (!options) - return 0; - - while ((p = strsep(&options, ",")) != NULL) { - int token; - if (!*p) - continue; - /* - * Initialize args struct so we know whether arg was - * found; some options take optional arguments. - */ - args[0].to = args[0].from = NULL; - token = match_token(p, f2fs_tokens, args); - - switch (token) { - case Opt_gc_background_off: - clear_opt(sbi, BG_GC); - break; - case Opt_disable_roll_forward: - set_opt(sbi, DISABLE_ROLL_FORWARD); - break; - case Opt_discard: - set_opt(sbi, DISCARD); - break; - case Opt_noheap: - set_opt(sbi, NOHEAP); - break; -#ifdef CONFIG_F2FS_FS_XATTR - case Opt_nouser_xattr: - clear_opt(sbi, XATTR_USER); - break; -#else - case Opt_nouser_xattr: - f2fs_msg(sb, KERN_INFO, - "nouser_xattr options not supported"); - break; -#endif -#ifdef CONFIG_F2FS_FS_POSIX_ACL - case Opt_noacl: - clear_opt(sbi, POSIX_ACL); - break; -#else - case Opt_noacl: - f2fs_msg(sb, KERN_INFO, "noacl options not supported"); - break; -#endif - case Opt_active_logs: - if (args->from && match_int(args, &arg)) - return -EINVAL; - if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) - return -EINVAL; - sbi->active_logs = arg; - break; - case Opt_disable_ext_identify: - set_opt(sbi, DISABLE_EXT_IDENTIFY); - break; - default: - f2fs_msg(sb, KERN_ERR, - "Unrecognized mount option \"%s\" or missing value", - p); - return -EINVAL; - } - } - return 0; -} - static loff_t max_file_size(unsigned bits) { loff_t result = ADDRS_PER_INODE; @@ -686,10 +751,16 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) "Cannot recover all fsync data errno=%ld", err); } - /* After POR, we can run background GC thread */ - err = start_gc_thread(sbi); - if (err) - goto fail; + /* + * If filesystem is not mounted as read-only then + * do start the gc_thread. + */ + if (!(sb->s_flags & MS_RDONLY)) { + /* After POR, we can run background GC thread.*/ + err = start_gc_thread(sbi); + if (err) + goto fail; + } err = f2fs_build_stats(sbi); if (err) -- cgit v0.10.2 From 7e586fa0244578320fcced9cc08c6b124f727c35 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Jun 2013 20:47:19 +0900 Subject: f2fs: fix crc endian conversion While calculating CRC for the checkpoint block, we use __u32, but when storing the crc value to the disk, we use __le32. Let's fix the inconsistency. Reported-and-Tested-by: Oded Gabbay Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9a77509..66a6b85 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -357,8 +357,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, unsigned long blk_size = sbi->blocksize; struct f2fs_checkpoint *cp_block; unsigned long long cur_version = 0, pre_version = 0; - unsigned int crc = 0; size_t crc_offset; + __u32 crc = 0; /* Read the 1st cp block in this CP pack */ cp_page_1 = get_meta_page(sbi, cp_addr); @@ -369,7 +369,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp1; - crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset); + crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp1; @@ -384,7 +384,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, if (crc_offset >= blk_size) goto invalid_cp2; - crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset); + crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); if (!f2fs_crc_valid(crc, cp_block, crc_offset)) goto invalid_cp2; @@ -648,7 +648,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) block_t start_blk; struct page *cp_page; unsigned int data_sum_blocks, orphan_blocks; - unsigned int crc32 = 0; + __u32 crc32 = 0; void *kaddr; int i; @@ -717,8 +717,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); - *(__le32 *)((unsigned char *)ckpt + - le32_to_cpu(ckpt->checksum_offset)) + *((__le32 *)((unsigned char *)ckpt + + le32_to_cpu(ckpt->checksum_offset))) = cpu_to_le32(crc32); start_blk = __start_cp_addr(sbi); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 863a5e91..467d42d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -47,14 +47,25 @@ struct f2fs_mount_info { unsigned int opt; }; -static inline __u32 f2fs_crc32(void *buff, size_t len) +#define CRCPOLY_LE 0xedb88320 + +static inline __u32 f2fs_crc32(void *buf, size_t len) { - return crc32_le(F2FS_SUPER_MAGIC, buff, len); + unsigned char *p = (unsigned char *)buf; + __u32 crc = F2FS_SUPER_MAGIC; + int i; + + while (len--) { + crc ^= *p++; + for (i = 0; i < 8; i++) + crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); + } + return crc; } -static inline bool f2fs_crc_valid(__u32 blk_crc, void *buff, size_t buff_size) +static inline bool f2fs_crc_valid(__u32 blk_crc, void *buf, size_t buf_size) { - return f2fs_crc32(buff, buff_size) == blk_crc; + return f2fs_crc32(buf, buf_size) == blk_crc; } /* -- cgit v0.10.2 From 060dd67b3c0d451ea2c41e6a87811b4736a984e4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 24 Jun 2013 07:47:23 +0900 Subject: f2fs: fix an endian conversion bug detected by sparse This patch should fix the following bug reported by kbuild test robot. fs/f2fs/recovery.c:233:33: sparse: incorrect type in assignment (different base types) parse warnings: (new ones prefixed by >>) >> recovery.c:233: sparse: incorrect type in assignment (different base types) recovery.c:233: expected unsigned int [unsigned] [assigned] ofs_in_node recovery.c:233: got restricted __le16 [assigned] [usertype] ofs_in_node >> recovery.c:238: sparse: incorrect type in assignment (different base types) recovery.c:238: expected unsigned int [unsigned] ofs_in_node recovery.c:238: got restricted __le16 [assigned] [usertype] ofs_in_node Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ddde14f..9db8239 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -246,12 +246,12 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, struct dnode_of_data tdn = *dn; tdn.nid = nid; tdn.node_page = dn->inode_page; - tdn.ofs_in_node = sum.ofs_in_node; + tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); truncate_data_blocks_range(&tdn, 1); return 0; } else if (dn->nid == nid) { struct dnode_of_data tdn = *dn; - tdn.ofs_in_node = sum.ofs_in_node; + tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); truncate_data_blocks_range(&tdn, 1); return 0; } -- cgit v0.10.2 From 8736fbf00372dcc0bc7b04b86d737eb5db31fff6 Mon Sep 17 00:00:00 2001 From: Namjae Jeon Date: Sun, 16 Jun 2013 09:49:11 +0900 Subject: f2fs: optimize the init_dirty_segmap function Optimize the while loop condition Since this condition will always be true and while loop will be terminated by the following condition in code: if (segno >= TOTAL_SEGS(sbi)) break; Hence we can replace the while loop condition with while(1) instead of always checking for segno to be less than Total segs. Also we do not need to use TOTAL_SEGS() everytime. We can store this value in a local variable since this value is constant. Signed-off-by: Namjae Jeon Signed-off-by: Pankaj Kumar Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 0e1a60a..3ac4d29 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1582,13 +1582,13 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int segno = 0, offset = 0; + unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); unsigned short valid_blocks; - while (segno < TOTAL_SEGS(sbi)) { + while (1) { /* find dirty segment based on free segmap */ - segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset); - if (segno >= TOTAL_SEGS(sbi)) + segno = find_next_inuse(free_i, total_segs, offset); + if (segno >= total_segs) break; offset = segno + 1; valid_blocks = get_valid_blocks(sbi, segno, 0); -- cgit v0.10.2 From 6cc4af56066d8e9c62584cf61c6ce50fd0ab139a Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Thu, 20 Jun 2013 17:52:39 +0800 Subject: f2fs: code cleanup and simplify in func {find/add}_gc_inode This patch simplifies list operations in find_gc_inode and add_gc_inode. Just simple code cleanup. Signed-off-by: Gu Zheng [Jaegeuk Kim: add description] Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 3a9df36..35f9b1a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -321,28 +321,21 @@ static const struct victim_selection default_v_ops = { static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) { - struct list_head *this; struct inode_entry *ie; - list_for_each(this, ilist) { - ie = list_entry(this, struct inode_entry, list); + list_for_each_entry(ie, ilist, list) if (ie->inode->i_ino == ino) return ie->inode; - } return NULL; } static void add_gc_inode(struct inode *inode, struct list_head *ilist) { - struct list_head *this; - struct inode_entry *new_ie, *ie; + struct inode_entry *new_ie; - list_for_each(this, ilist) { - ie = list_entry(this, struct inode_entry, list); - if (ie->inode == inode) { - iput(inode); - return; - } + if (inode == find_gc_inode(inode->i_ino, ilist)) { + iput(inode); + return; } repeat: new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); -- cgit v0.10.2 From 763bfe1bc575dcce56dc5c570dc005d94911705f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Jun 2013 09:59:40 +0900 Subject: f2fs: remove reusing any prefree segments This patch removes check_prefree_segments initially designed to enhance the performance by narrowing the range of LBA usage across the whole block device. When allocating a new segment, previous f2fs tries to find proper prefree segments, and then, if finds a segment, it reuses the segment for further data or node block allocation. However, I found that this was totally wrong approach since the prefree segments have several data or node blocks that will be used by the roll-forward mechanism operated after sudden-power-off. Let's assume the following scenario. /* write 8MB with fsync */ for (i = 0; i < 2048; i++) { offset = i * 4096; write(fd, offset, 4KB); fsync(fd); } In this case, naive segment allocation sequence will be like: data segment: x, x+1, x+2, x+3 node segment: y, y+1, y+2, y+3. But, if we can reuse prefree segments, the sequence can be like: data segment: x, x+1, y, y+1 node segment: y, y+1, y+2, y+3. Because, y, y+1, and y+2 became prefree segments one by one, and those are reused by data allocation. After conducting this workload, we should consider how to recover the latest inode with its data. If we reuse the prefree segments such as y or y+1, we lost the old node blocks so that f2fs even cannot start roll-forward recovery. Therefore, I suggest that we should remove reusing prefree segments. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3ac4d29..a86d125 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -309,56 +309,6 @@ static void write_sum_page(struct f2fs_sb_info *sbi, f2fs_put_page(page, 1); } -static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, int type) -{ - struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); - unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE]; - unsigned int segno; - unsigned int ofs = 0; - - /* - * If there is not enough reserved sections, - * we should not reuse prefree segments. - */ - if (has_not_enough_free_secs(sbi, 0)) - return NULL_SEGNO; - - /* - * NODE page should not reuse prefree segment, - * since those information is used for SPOR. - */ - if (IS_NODESEG(type)) - return NULL_SEGNO; -next: - segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs); - ofs += sbi->segs_per_sec; - - if (segno < TOTAL_SEGS(sbi)) { - int i; - - /* skip intermediate segments in a section */ - if (segno % sbi->segs_per_sec) - goto next; - - /* skip if the section is currently used */ - if (sec_usage_check(sbi, GET_SECNO(sbi, segno))) - goto next; - - /* skip if whole section is not prefree */ - for (i = 1; i < sbi->segs_per_sec; i++) - if (!test_bit(segno + i, prefree_segmap)) - goto next; - - /* skip if whole section was not free at the last checkpoint */ - for (i = 0; i < sbi->segs_per_sec; i++) - if (get_seg_entry(sbi, segno + i)->ckpt_valid_blocks) - goto next; - - return segno; - } - return NULL_SEGNO; -} - static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -597,11 +547,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, goto out; } - curseg->next_segno = check_prefree_segments(sbi, type); - - if (curseg->next_segno != NULL_SEGNO) - change_curseg(sbi, type, false); - else if (type == CURSEG_WARM_NODE) + if (type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) new_curseg(sbi, type, false); -- cgit v0.10.2 From 5ebefc5b409a194a09da7ad1962b4bfce10a6859 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Thu, 27 Jun 2013 09:28:54 +0800 Subject: f2fs: remove the unused argument "sbi" of func destroy_fsync_dnodes() As destroy_fsync_dnodes() is a simple list-cleanup func, so delete the unused and unrelated f2fs_sb_info argument of it. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9db8239..d56d951 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -192,8 +192,7 @@ out: return err; } -static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, - struct list_head *head) +static void destroy_fsync_dnodes(struct list_head *head) { struct fsync_inode_entry *entry, *tmp; @@ -438,7 +437,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); BUG_ON(!list_empty(&inode_list)); out: - destroy_fsync_dnodes(sbi, &inode_list); + destroy_fsync_dnodes(&inode_list); kmem_cache_destroy(fsync_entry_slab); sbi->por_doing = 0; if (!err) -- cgit v0.10.2 From a1dd3c13ce65b726fddfe72b9d2f1009db983ce6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Jun 2013 13:04:08 +0900 Subject: f2fs: fix to recover i_size from roll-forward If user requests many data writes and fsync together, the last updated i_size should be stored to the inode block consistently. But, previous write_end just marks the inode as dirty and doesn't update its metadata into its inode block. After that, fsync just writes the inode block with newly updated data index excluding inode metadata updates. So, this patch introduces write_end in which updates inode block too when the i_size is changed. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 6d4a743..e88f46f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -701,6 +701,27 @@ err: return err; } +static int f2fs_write_end(struct file *file, + struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata) +{ + struct inode *inode = page->mapping->host; + + SetPageUptodate(page); + set_page_dirty(page); + + if (pos + copied > i_size_read(inode)) { + i_size_write(inode, pos + copied); + mark_inode_dirty(inode); + update_inode_page(inode); + } + + unlock_page(page); + page_cache_release(page); + return copied; +} + static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs) { @@ -757,7 +778,7 @@ const struct address_space_operations f2fs_dblock_aops = { .writepage = f2fs_write_data_page, .writepages = f2fs_write_data_pages, .write_begin = f2fs_write_begin, - .write_end = nobh_write_end, + .write_end = f2fs_write_end, .set_page_dirty = f2fs_set_data_page_dirty, .invalidatepage = f2fs_invalidate_data_page, .releasepage = f2fs_release_data_page, -- cgit v0.10.2