From 34ba94bac938be14ffe2a639a4688b81a37d0f58 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Oct 2014 13:19:53 -0700 Subject: f2fs: do not make dirty any inmemory pages This patch let inmemory pages be clean all the time. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8e58c4c..84f20e9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1052,10 +1052,7 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) - register_inmem_page(inode, page); - else - set_page_dirty(page); + set_page_dirty(page); if (pos + copied > i_size_read(inode)) { i_size_write(inode, pos + copied); @@ -1138,6 +1135,12 @@ static int f2fs_set_data_page_dirty(struct page *page) trace_f2fs_set_page_dirty(page, DATA); SetPageUptodate(page); + + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) { + register_inmem_page(inode, page); + return 1; + } + mark_inode_dirty(inode); if (!PageDirty(page)) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8171e80..28f24ea 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -269,6 +269,7 @@ struct f2fs_inode_info { struct extent_info ext; /* in-memory extent cache entry */ struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ + struct radix_tree_root inmem_root; /* radix tree for inmem pages */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 923cb76..9d4a7ab 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -178,7 +178,8 @@ void register_inmem_page(struct inode *inode, struct page *page) { struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; - + int err; +retry: new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); /* add atomic page indices to the list */ @@ -187,6 +188,16 @@ void register_inmem_page(struct inode *inode, struct page *page) /* increase reference count with clean state */ mutex_lock(&fi->inmem_lock); + err = radix_tree_insert(&fi->inmem_root, page->index, new); + if (err == -EEXIST) { + mutex_unlock(&fi->inmem_lock); + kmem_cache_free(inmem_entry_slab, new); + return; + } else if (err) { + mutex_unlock(&fi->inmem_lock); + kmem_cache_free(inmem_entry_slab, new); + goto retry; + } get_page(page); list_add_tail(&new->list, &fi->inmem_pages); mutex_unlock(&fi->inmem_lock); @@ -216,6 +227,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) do_write_data_page(cur->page, &fio); submit_bio = true; } + radix_tree_delete(&fi->inmem_root, cur->page->index); f2fs_put_page(cur->page, 1); list_del(&cur->list); kmem_cache_free(inmem_entry_slab, cur); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 41d6f70..76b14c8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -373,6 +373,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) fi->i_advise = 0; rwlock_init(&fi->ext.ext_lock); init_rwsem(&fi->i_sem); + INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); -- cgit v0.10.2 From cbcb2872e37ba0511f21b3ab5d65973b2055440c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Oct 2014 13:39:06 -0700 Subject: f2fs: invalidate inmemory page If user truncates file's data, we should truncate inmemory pages too. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 84f20e9..5b80ada 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1116,6 +1116,9 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) return; + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + invalidate_inmem_page(inode, page); + if (PageDirty(page)) inode_dec_dirty_pages(inode); ClearPagePrivate(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 28f24ea..d41d1b7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1297,6 +1297,7 @@ void destroy_node_manager_caches(void); * segment.c */ void register_inmem_page(struct inode *, struct page *); +void invalidate_inmem_page(struct inode *, struct page *); void commit_inmem_pages(struct inode *, bool); void f2fs_balance_fs(struct f2fs_sb_info *); void f2fs_balance_fs_bg(struct f2fs_sb_info *); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9d4a7ab..902c4c3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -203,6 +203,22 @@ retry: mutex_unlock(&fi->inmem_lock); } +void invalidate_inmem_page(struct inode *inode, struct page *page) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct inmem_pages *cur; + + mutex_lock(&fi->inmem_lock); + cur = radix_tree_lookup(&fi->inmem_root, page->index); + if (cur) { + radix_tree_delete(&fi->inmem_root, cur->page->index); + f2fs_put_page(cur->page, 0); + list_del(&cur->list); + kmem_cache_free(inmem_entry_slab, cur); + } + mutex_unlock(&fi->inmem_lock); +} + void commit_inmem_pages(struct inode *inode, bool abort) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); -- cgit v0.10.2 From c08a690b46919e6b531c1a2bb74389323e5f5b1c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 Oct 2014 10:16:54 -0700 Subject: f2fs: should truncate any allocated block for inline_data write When trying to write inline_data, we should truncate any data block allocated and pointed by the inode block. We should consider the data index is not 0. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 88036fd..e3abcfb 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -166,6 +166,14 @@ int f2fs_write_inline_data(struct inode *inode, return err; ipage = dn.inode_page; + /* Release any data block if it is allocated */ + if (!f2fs_has_inline_data(inode)) { + int count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); + truncate_data_blocks_range(&dn, count); + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + stat_inc_inline_inode(inode); + } + f2fs_wait_on_page_writeback(ipage, NODE); zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); @@ -174,13 +182,6 @@ int f2fs_write_inline_data(struct inode *inode, memcpy(dst_addr, src_addr, size); kunmap(page); - /* Release the first data block if it is allocated */ - if (!f2fs_has_inline_data(inode)) { - truncate_data_blocks_range(&dn, 1); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_inc_inline_inode(inode); - } - set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); sync_inode_page(&dn); f2fs_put_dnode(&dn); -- cgit v0.10.2 From 1ce86bf6f882381013e12b16bbb3921608c0f238 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 Oct 2014 10:24:34 -0700 Subject: f2fs: fix race conditon on truncation with inline_data Let's consider the following scenario. blkaddr[0] inline_data i_size i_blocks writepage truncate NEW X 4096 2 dirty page #0 NEW X 0 change i_size NEW X 0 2 f2fs_write_inline_data NEW X 0 2 get_dnode_of_data NEW X 0 2 truncate_data_blocks_range NULL O 0 1 memcpy(inline_data) NULL O 0 1 f2fs_put_dnode NULL O 0 1 f2fs_truncate NULL O 0 1 get_dnode_of_data NULL O 0 1 *invalid block addr* This patch adds checking inline_data flag during f2fs_truncate not to refer corrupted block indices. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8e68bb6..543d8c6 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -473,6 +473,12 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) return err; } + /* writepage can convert inline_data under get_donde_of_data */ + if (f2fs_has_inline_data(inode)) { + f2fs_put_dnode(&dn); + goto done; + } + count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); count -= dn.ofs_in_node; -- cgit v0.10.2 From a78186ebe516b6d7df43636603f0998803ab356a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 17 Oct 2014 17:57:29 -0700 Subject: f2fs: use highmem for directory pages This patch fixes to use highmem for directory pages. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0deead4..52d6f54 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -156,7 +156,7 @@ make_now: inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 860313a..6d7381b 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -33,7 +33,8 @@ #define F2FS_META_INO(sbi) (sbi->meta_ino_num) /* This flag is used by node and meta inodes, and by recovery */ -#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) +#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) +#define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM) /* * For further optimization on multi-head logs, on-disk layout supports maximum -- cgit v0.10.2 From 9ba69cf9877384baebd16c6fb51ceccd13677b37 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 17 Oct 2014 20:33:55 -0700 Subject: f2fs: avoid to allocate when inline_data was written The sceanrio is like this. inline_data i_size page write_begin/vm_page_mkwrite X 30 dirty_page X 30 write to #4096 position X 30 get_dnode_of_data wait for get_dnode_of_data O 30 write inline_data O 30 get_dnode_of_data O 30 reserve data block .. In this case, we have #0 = NEW_ADDR and inline_data as well. We should not allow this condition for further access. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 5b80ada..973fd77 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -257,9 +257,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) bool need_put = dn->inode_page ? false : true; int err; - /* if inode_page exists, index should be zero */ - f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index); - err = get_dnode_of_data(dn, index, ALLOC_NODE); if (err) return err; @@ -951,7 +948,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *page; + struct page *page, *ipage; pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; struct dnode_of_data dn; int err = 0; @@ -979,13 +976,26 @@ repeat: goto inline_data; f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_reserve_block(&dn, index); - f2fs_unlock_op(sbi); - if (err) { + + /* check inline_data */ + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + goto unlock_fail; + + if (f2fs_has_inline_data(inode)) { + f2fs_put_page(ipage, 1); + f2fs_unlock_op(sbi); f2fs_put_page(page, 0); - goto fail; + goto repeat; } + + set_new_dnode(&dn, inode, ipage, NULL, 0); + err = f2fs_reserve_block(&dn, index); + if (err) + goto unlock_fail; + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + inline_data: lock_page(page); if (unlikely(page->mapping != mapping)) { @@ -1038,6 +1048,10 @@ out: SetPageUptodate(page); clear_cold_data(page); return 0; + +unlock_fail: + f2fs_unlock_op(sbi); + f2fs_put_page(page, 0); fail: f2fs_write_failed(mapping, pos + len); return err; diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 543d8c6..456df07 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -35,12 +35,13 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct inode *inode = file_inode(vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; + struct page *ipage; int err; f2fs_balance_fs(sbi); sb_start_pagefault(inode->i_sb); - +retry: /* force to convert with normal data indices */ err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page); if (err) @@ -48,11 +49,28 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, /* block allocation */ f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); + + /* check inline_data */ + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + f2fs_unlock_op(sbi); + goto out; + } + + if (f2fs_has_inline_data(inode)) { + f2fs_put_page(ipage, 1); + f2fs_unlock_op(sbi); + goto retry; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); err = f2fs_reserve_block(&dn, page->index); - f2fs_unlock_op(sbi); - if (err) + if (err) { + f2fs_unlock_op(sbi); goto out; + } + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); file_update_time(vma->vm_file); lock_page(page); -- cgit v0.10.2 From 13fd8f89f6623b348dfb7ed6dce50fab733f4c91 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 19 Oct 2014 01:43:23 -0700 Subject: f2fs: fix to call f2fs_unlock_op This patch fixes to call f2fs_unlock_op, which was missing before. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 456df07..80d9a04 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -494,7 +494,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) /* writepage can convert inline_data under get_donde_of_data */ if (f2fs_has_inline_data(inode)) { f2fs_put_dnode(&dn); - goto done; + goto unlock_done; } count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); @@ -510,6 +510,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); +unlock_done: if (lock) f2fs_unlock_op(sbi); done: -- cgit v0.10.2 From 4a257ed677cb68a2096358e58b61efacf85d5f16 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 Oct 2014 11:43:30 -0700 Subject: f2fs: avoid build warning This patch removes build warning. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 902c4c3..2c1e608 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1715,7 +1715,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * #2, flush sit entries to sit page. */ list_for_each_entry_safe(ses, tmp, head, set_list) { - struct page *page; + struct page *page = NULL; struct f2fs_sit_block *raw_sit = NULL; unsigned int start_segno = ses->start_segno; unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, -- cgit v0.10.2 From af41d3ee00bf97bfc3bd64bce6d7a92e85955e98 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 17 Oct 2014 14:14:16 -0700 Subject: f2fs: avoid infinite loop at cp_error This patch avoids an infinite loop in sync_dirty_inode_page when -EIO was detected. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index dd10a03..ca514d5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -731,6 +731,9 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) struct dir_inode_entry *entry; struct inode *inode; retry: + if (unlikely(f2fs_cp_error(sbi))) + return; + spin_lock(&sbi->dir_inode_lock); head = &sbi->dir_inode_list; -- cgit v0.10.2 From 34d67debe02b3b2b035b5bdce0fab75800f9a344 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Sep 2014 18:15:19 +0800 Subject: f2fs: add infra struct and helper for inline dir This patch defines macro/inline dentry structure, and adds some helpers for inline dir infrastructure. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d41d1b7..4fa0df5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -46,8 +46,9 @@ #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 #define F2FS_MOUNT_INLINE_XATTR 0x00000080 #define F2FS_MOUNT_INLINE_DATA 0x00000100 -#define F2FS_MOUNT_FLUSH_MERGE 0x00000200 -#define F2FS_MOUNT_NOBARRIER 0x00000400 +#define F2FS_MOUNT_INLINE_DENTRY 0x00000200 +#define F2FS_MOUNT_FLUSH_MERGE 0x00000400 +#define F2FS_MOUNT_NOBARRIER 0x00000800 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) @@ -1058,6 +1059,7 @@ enum { FI_NO_EXTENT, /* not to use the extent cache */ FI_INLINE_XATTR, /* used for inline xattr */ FI_INLINE_DATA, /* used for inline data*/ + FI_INLINE_DENTRY, /* used for inline dentry */ FI_APPEND_WRITE, /* inode has appended data */ FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ @@ -1104,6 +1106,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, set_inode_flag(fi, FI_INLINE_XATTR); if (ri->i_inline & F2FS_INLINE_DATA) set_inode_flag(fi, FI_INLINE_DATA); + if (ri->i_inline & F2FS_INLINE_DENTRY) + set_inode_flag(fi, FI_INLINE_DENTRY); } static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -1115,6 +1119,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, ri->i_inline |= F2FS_INLINE_XATTR; if (is_inode_flag_set(fi, FI_INLINE_DATA)) ri->i_inline |= F2FS_INLINE_DATA; + if (is_inode_flag_set(fi, FI_INLINE_DENTRY)) + ri->i_inline |= F2FS_INLINE_DENTRY; } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -1165,6 +1171,17 @@ static inline void *inline_data_addr(struct page *page) return (void *)&(ri->i_addr[1]); } +static inline int f2fs_has_inline_dentry(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); +} + +static inline void *inline_dentry_addr(struct page *page) +{ + struct f2fs_inode *ri = F2FS_INODE(page); + return (void *)&(ri->i_addr[1]); +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 6d7381b..63f8303 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -171,6 +171,7 @@ struct f2fs_extent { #define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ +#define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) @@ -436,6 +437,24 @@ struct f2fs_dentry_block { __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; } __packed; +/* for inline dir */ +#define NR_INLINE_DENTRY (MAX_INLINE_DATA * BITS_PER_BYTE / \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + BITS_PER_BYTE + 1)) +#define INLINE_DENTRY_BITMAP_SIZE ((NR_INLINE_DENTRY + \ + BITS_PER_BYTE - 1) / BITS_PER_BYTE) +#define INLINE_RESERVED_SIZE (MAX_INLINE_DATA - \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + NR_INLINE_DENTRY + INLINE_DENTRY_BITMAP_SIZE)) + +/* inline directory entry structure */ +struct f2fs_inline_dentry { + __u8 dentry_bitmap[INLINE_DENTRY_BITMAP_SIZE]; + __u8 reserved[INLINE_RESERVED_SIZE]; + struct f2fs_dir_entry dentry[NR_INLINE_DENTRY]; + __u8 filename[NR_INLINE_DENTRY][F2FS_SLOT_LEN]; +} __packed; + /* file types used in inode_info->flags */ enum { F2FS_FT_UNKNOWN, -- cgit v0.10.2 From 5efd3c6f1be9cfbc621c9445cedd159998ed3ea6 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Sep 2014 18:16:13 +0800 Subject: f2fs: add a new mount option for inline dir Adds a new mount option 'inline_dentry' for inline dir. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 76b14c8..73993a9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -51,6 +51,7 @@ enum { Opt_disable_ext_identify, Opt_inline_xattr, Opt_inline_data, + Opt_inline_dentry, Opt_flush_merge, Opt_nobarrier, Opt_err, @@ -69,6 +70,7 @@ static match_table_t f2fs_tokens = { {Opt_disable_ext_identify, "disable_ext_identify"}, {Opt_inline_xattr, "inline_xattr"}, {Opt_inline_data, "inline_data"}, + {Opt_inline_dentry, "inline_dentry"}, {Opt_flush_merge, "flush_merge"}, {Opt_nobarrier, "nobarrier"}, {Opt_err, NULL}, @@ -340,6 +342,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_inline_data: set_opt(sbi, INLINE_DATA); break; + case Opt_inline_dentry: + set_opt(sbi, INLINE_DENTRY); + break; case Opt_flush_merge: set_opt(sbi, FLUSH_MERGE); break; @@ -563,6 +568,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_ext_identify"); if (test_opt(sbi, INLINE_DATA)) seq_puts(seq, ",inline_data"); + if (test_opt(sbi, INLINE_DENTRY)) + seq_puts(seq, ",inline_dentry"); if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) seq_puts(seq, ",flush_merge"); if (test_opt(sbi, NOBARRIER)) -- cgit v0.10.2 From dbeacf02ebfed8161ac0b9379892262593c9a734 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Sep 2014 18:17:04 +0800 Subject: f2fs: export dir operations for inline dir This patch exports some dir operations for inline dir, additionally introduces f2fs_drop_nlink from f2fs_delete_entry for reusing by inline dir function. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b54f871..99e944e 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -37,7 +37,7 @@ static unsigned int bucket_blocks(unsigned int level) return 4; } -static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { +unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { [F2FS_FT_UNKNOWN] = DT_UNKNOWN, [F2FS_FT_REG_FILE] = DT_REG, [F2FS_FT_DIR] = DT_DIR, @@ -59,7 +59,7 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, }; -static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) +void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) { umode_t mode = inode->i_mode; de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; @@ -77,7 +77,7 @@ static unsigned long dir_block_index(unsigned int level, return bidx; } -static bool early_match_name(size_t namelen, f2fs_hash_t namehash, +bool early_match_name(size_t namelen, f2fs_hash_t namehash, struct f2fs_dir_entry *de) { if (le16_to_cpu(de->name_len) != namelen) @@ -307,7 +307,6 @@ static int make_empty_dir(struct inode *inode, if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); - dentry_blk = kmap_atomic(dentry_page); de = &dentry_blk->dentry[0]; @@ -333,7 +332,7 @@ static int make_empty_dir(struct inode *inode, return 0; } -static struct page *init_inode_metadata(struct inode *inode, +struct page *init_inode_metadata(struct inode *inode, struct inode *dir, const struct qstr *name) { struct page *page; @@ -395,7 +394,7 @@ error: return ERR_PTR(err); } -static void update_parent_metadata(struct inode *dir, struct inode *inode, +void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { @@ -560,16 +559,44 @@ fail: return err; } +void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + + down_write(&F2FS_I(inode)->i_sem); + + if (S_ISDIR(inode->i_mode)) { + drop_nlink(dir); + if (page) + update_inode(dir, page); + else + update_inode_page(dir); + } + inode->i_ctime = CURRENT_TIME; + + drop_nlink(inode); + if (S_ISDIR(inode->i_mode)) { + drop_nlink(inode); + i_size_write(inode, 0); + } + up_write(&F2FS_I(inode)->i_sem); + update_inode_page(inode); + + if (inode->i_nlink == 0) + add_orphan_inode(sbi, inode->i_ino); + else + release_orphan_inode(sbi); +} + /* * It only removes the dentry from the dentry page, corresponding name * entry in name page does not need to be touched during deletion. */ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, - struct inode *inode) + struct inode *dir, struct inode *inode) { struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; - struct inode *dir = page->mapping->host; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); int i; @@ -590,29 +617,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, dir->i_ctime = dir->i_mtime = CURRENT_TIME; - if (inode) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - - down_write(&F2FS_I(inode)->i_sem); - - if (S_ISDIR(inode->i_mode)) { - drop_nlink(dir); - update_inode_page(dir); - } - inode->i_ctime = CURRENT_TIME; - drop_nlink(inode); - if (S_ISDIR(inode->i_mode)) { - drop_nlink(inode); - i_size_write(inode, 0); - } - up_write(&F2FS_I(inode)->i_sem); - update_inode_page(inode); - - if (inode->i_nlink == 0) - add_orphan_inode(sbi, inode->i_ino); - else - release_orphan_inode(sbi); - } + if (inode) + f2fs_drop_nlink(dir, inode, NULL); if (bit_pos == NR_DENTRY_IN_BLOCK) { truncate_hole(dir, page->index, page->index + 1); @@ -628,7 +634,7 @@ bool f2fs_empty_dir(struct inode *dir) unsigned long bidx; struct page *dentry_page; unsigned int bit_pos; - struct f2fs_dentry_block *dentry_blk; + struct f2fs_dentry_block *dentry_blk; unsigned long nblock = dir_blocks(dir); for (bidx = 0; bidx < nblock; bidx++) { @@ -640,7 +646,6 @@ bool f2fs_empty_dir(struct inode *dir) return false; } - dentry_blk = kmap_atomic(dentry_page); if (bidx == 0) bit_pos = 2; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4fa0df5..97d90ed 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1242,6 +1242,13 @@ struct dentry *f2fs_get_parent(struct dentry *child); /* * dir.c */ +extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; +bool early_match_name(size_t, f2fs_hash_t, struct f2fs_dir_entry *); +void set_de_type(struct f2fs_dir_entry *, struct inode *); +struct page *init_inode_metadata(struct inode *, struct inode *, + const struct qstr *); +void update_parent_metadata(struct inode *, struct inode *, unsigned int); +void f2fs_drop_nlink(struct inode *, struct inode *, struct page *); struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, struct page **); struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); @@ -1250,7 +1257,8 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); int update_dent_inode(struct inode *, const struct qstr *); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); -void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, + struct inode *); int f2fs_do_tmpfile(struct inode *, struct inode *); int f2fs_make_empty(struct inode *, struct inode *); bool f2fs_empty_dir(struct inode *); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0d2526e..6e67f2b 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -223,7 +223,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) f2fs_put_page(page, 0); goto fail; } - f2fs_delete_entry(de, page, inode); + f2fs_delete_entry(de, page, dir, inode); f2fs_unlock_op(sbi); /* In order to evict this inode, we set it dirty */ @@ -435,7 +435,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_inode->i_ctime = CURRENT_TIME; mark_inode_dirty(old_inode); - f2fs_delete_entry(old_entry, old_page, NULL); + f2fs_delete_entry(old_entry, old_page, old_dir, NULL); if (old_dir_entry) { if (old_dir != new_dir) { diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ebd0132..843da53 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -111,7 +111,7 @@ retry: iput(einode); goto out_unmap_put; } - f2fs_delete_entry(de, page, einode); + f2fs_delete_entry(de, page, dir, einode); iput(einode); goto retry; } -- cgit v0.10.2 From 201a05be9628ae58efe7638e0c7ae3937ec85273 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Sep 2014 18:17:53 +0800 Subject: f2fs: add key function to handle inline dir Adds Functions to implement inline dir init/lookup/insert/delete/convert ops. Signed-off-by: Chao Yu [Jaegeuk Kim: remove needless reserved area copy, pointed by Dan Carpenter] Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 97d90ed..c537699 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1551,4 +1551,13 @@ int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *); int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); void truncate_inline_data(struct inode *, u64); bool recover_inline_data(struct inode *, struct page *); +struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, + struct page **); +struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); +int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); +int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); +void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, + struct inode *, struct inode *); +bool f2fs_empty_inline_dir(struct inode *); +int f2fs_read_inline_dir(struct file *, struct dir_context *); #endif diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e3abcfb..b3be96b 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -258,3 +258,349 @@ process_inline: } return false; } + +struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, + struct qstr *name, struct page **res_page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct page *ipage; + struct f2fs_dir_entry *de; + f2fs_hash_t namehash; + unsigned long bit_pos = 0; + struct f2fs_inline_dentry *dentry_blk; + const void *dentry_bits; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return NULL; + + namehash = f2fs_dentry_hash(name); + + dentry_blk = inline_data_addr(ipage); + dentry_bits = &dentry_blk->dentry_bitmap; + + while (bit_pos < NR_INLINE_DENTRY) { + if (!test_bit_le(bit_pos, dentry_bits)) { + bit_pos++; + continue; + } + de = &dentry_blk->dentry[bit_pos]; + if (early_match_name(name->len, namehash, de)) { + if (!memcmp(dentry_blk->filename[bit_pos], + name->name, + name->len)) { + *res_page = ipage; + goto found; + } + } + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs are occurred. + */ + f2fs_bug_on(F2FS_P_SB(ipage), !de->name_len); + + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + } + + de = NULL; +found: + unlock_page(ipage); + return de; +} + +struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir, + struct page **p) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + struct f2fs_dir_entry *de; + struct f2fs_inline_dentry *dentry_blk; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return NULL; + + dentry_blk = inline_data_addr(ipage); + de = &dentry_blk->dentry[1]; + *p = ipage; + unlock_page(ipage); + return de; +} + +int make_empty_inline_dir(struct inode *inode, struct inode *parent, + struct page *ipage) +{ + struct f2fs_inline_dentry *dentry_blk; + struct f2fs_dir_entry *de; + + dentry_blk = inline_data_addr(ipage); + + de = &dentry_blk->dentry[0]; + de->name_len = cpu_to_le16(1); + de->hash_code = 0; + de->ino = cpu_to_le32(inode->i_ino); + memcpy(dentry_blk->filename[0], ".", 1); + set_de_type(de, inode); + + de = &dentry_blk->dentry[1]; + de->hash_code = 0; + de->name_len = cpu_to_le16(2); + de->ino = cpu_to_le32(parent->i_ino); + memcpy(dentry_blk->filename[1], "..", 2); + set_de_type(de, inode); + + test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); + test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); + + set_page_dirty(ipage); + + /* update i_size to MAX_INLINE_DATA */ + if (i_size_read(inode) < MAX_INLINE_DATA) { + i_size_write(inode, MAX_INLINE_DATA); + set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); + } + return 0; +} + +int room_in_inline_dir(struct f2fs_inline_dentry *dentry_blk, int slots) +{ + int bit_start = 0; + int zero_start, zero_end; +next: + zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, + NR_INLINE_DENTRY, + bit_start); + if (zero_start >= NR_INLINE_DENTRY) + return NR_INLINE_DENTRY; + + zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_INLINE_DENTRY, + zero_start); + if (zero_end - zero_start >= slots) + return zero_start; + + bit_start = zero_end + 1; + + if (zero_end + 1 >= NR_INLINE_DENTRY) + return NR_INLINE_DENTRY; + goto next; +} + +int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, + struct f2fs_inline_dentry *inline_dentry) +{ + struct page *page; + struct dnode_of_data dn; + struct f2fs_dentry_block *dentry_blk; + int err; + + page = grab_cache_page(dir->i_mapping, 0); + if (!page) + return -ENOMEM; + + set_new_dnode(&dn, dir, ipage, NULL, 0); + err = f2fs_reserve_block(&dn, 0); + if (err) + goto out; + + f2fs_wait_on_page_writeback(page, DATA); + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + + dentry_blk = kmap(page); + + /* copy data from inline dentry block to new dentry block */ + memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, + INLINE_DENTRY_BITMAP_SIZE); + memcpy(dentry_blk->dentry, inline_dentry->dentry, + sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY); + memcpy(dentry_blk->filename, inline_dentry->filename, + NR_INLINE_DENTRY * F2FS_SLOT_LEN); + + kunmap(page); + SetPageUptodate(page); + set_page_dirty(page); + + /* clear inline dir and flag after data writeback */ + zero_user_segment(ipage, INLINE_DATA_OFFSET, + INLINE_DATA_OFFSET + MAX_INLINE_DATA); + + stat_dec_inline_inode(dir); + + if (i_size_read(dir) < PAGE_CACHE_SIZE) { + i_size_write(dir, PAGE_CACHE_SIZE); + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } + + sync_inode_page(&dn); +out: + f2fs_put_page(page, 1); + return err; +} + +int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, + struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + unsigned int bit_pos; + f2fs_hash_t name_hash; + struct f2fs_dir_entry *de; + size_t namelen = name->len; + struct f2fs_inline_dentry *dentry_blk = NULL; + int slots = GET_DENTRY_SLOTS(namelen); + struct page *page; + int err = 0; + int i; + + name_hash = f2fs_dentry_hash(name); + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + dentry_blk = inline_data_addr(ipage); + bit_pos = room_in_inline_dir(dentry_blk, slots); + if (bit_pos >= NR_INLINE_DENTRY) { + err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); + if (!err) + err = -EAGAIN; + goto out; + } + + f2fs_wait_on_page_writeback(ipage, DATA); + + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } + de = &dentry_blk->dentry[bit_pos]; + de->hash_code = name_hash; + de->name_len = cpu_to_le16(namelen); + memcpy(dentry_blk->filename[bit_pos], name->name, name->len); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + for (i = 0; i < slots; i++) + test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + set_page_dirty(ipage); + + /* we don't need to mark_inode_dirty now */ + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + + update_parent_metadata(dir, inode, 0); +fail: + up_write(&F2FS_I(inode)->i_sem); + + if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { + update_inode(dir, ipage); + clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } +out: + f2fs_put_page(ipage, 1); + return err; +} + +void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, + struct inode *dir, struct inode *inode) +{ + struct f2fs_inline_dentry *inline_dentry; + int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + unsigned int bit_pos; + int i; + + lock_page(page); + f2fs_wait_on_page_writeback(page, DATA); + + inline_dentry = inline_data_addr(page); + bit_pos = dentry - inline_dentry->dentry; + for (i = 0; i < slots; i++) + test_and_clear_bit_le(bit_pos + i, + &inline_dentry->dentry_bitmap); + + set_page_dirty(page); + + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + + if (inode) + f2fs_drop_nlink(dir, inode, page); + + f2fs_put_page(page, 1); +} + +bool f2fs_empty_inline_dir(struct inode *dir) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + unsigned int bit_pos = 2; + struct f2fs_inline_dentry *dentry_blk; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return false; + + dentry_blk = inline_data_addr(ipage); + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_INLINE_DENTRY, + bit_pos); + + f2fs_put_page(ipage, 1); + + if (bit_pos < NR_INLINE_DENTRY) + return false; + + return true; +} + +int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx) +{ + struct inode *inode = file_inode(file); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int bit_pos = 0; + struct f2fs_inline_dentry *inline_dentry = NULL; + struct f2fs_dir_entry *de = NULL; + struct page *ipage = NULL; + unsigned char d_type = DT_UNKNOWN; + + if (ctx->pos == NR_INLINE_DENTRY) + return 0; + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + bit_pos = ((unsigned long)ctx->pos % NR_INLINE_DENTRY); + + inline_dentry = inline_data_addr(ipage); + while (bit_pos < NR_INLINE_DENTRY) { + bit_pos = find_next_bit_le(&inline_dentry->dentry_bitmap, + NR_INLINE_DENTRY, + bit_pos); + if (bit_pos >= NR_INLINE_DENTRY) + break; + + de = &inline_dentry->dentry[bit_pos]; + if (de->file_type < F2FS_FT_MAX) + d_type = f2fs_filetype_table[de->file_type]; + else + d_type = DT_UNKNOWN; + + if (!dir_emit(ctx, + inline_dentry->filename[bit_pos], + le16_to_cpu(de->name_len), + le32_to_cpu(de->ino), d_type)) + goto out; + + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + ctx->pos = bit_pos; + } + + ctx->pos = NR_INLINE_DENTRY; +out: + f2fs_put_page(ipage, 1); + + return 0; +} -- cgit v0.10.2 From 622f28ae9ba4fa89b4ff0f4a6cf75d153ea838ce Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Sep 2014 18:19:10 +0800 Subject: f2fs: enable inline dir handling Add inline dir functions into normal dir ops' function to handle inline ops. Besides, we enable inline dir mode when a new dir inode is created if inline_data option is on. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 99e944e..cc6474a 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -201,6 +201,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; + if (f2fs_has_inline_dentry(dir)) + return find_in_inline_dir(dir, child, res_page); + if (npages == 0) return NULL; @@ -227,6 +230,9 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) struct f2fs_dir_entry *de; struct f2fs_dentry_block *dentry_blk; + if (f2fs_has_inline_dentry(dir)) + return f2fs_parent_inline_dir(dir, p); + page = get_lock_data_page(dir, 0); if (IS_ERR(page)) return NULL; @@ -247,7 +253,8 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) de = f2fs_find_entry(dir, qstr, &page); if (de) { res = le32_to_cpu(de->ino); - kunmap(page); + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); f2fs_put_page(page, 0); } @@ -261,7 +268,8 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, f2fs_wait_on_page_writeback(page, DATA); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); - kunmap(page); + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); @@ -303,6 +311,9 @@ static int make_empty_dir(struct inode *inode, struct f2fs_dentry_block *dentry_blk; struct f2fs_dir_entry *de; + if (f2fs_has_inline_dentry(inode)) + return make_empty_inline_dir(inode, parent, page); + dentry_page = get_new_data_page(inode, page, 0, true); if (IS_ERR(dentry_page)) return PTR_ERR(dentry_page); @@ -462,6 +473,14 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, int err = 0; int i; + if (f2fs_has_inline_dentry(dir)) { + err = f2fs_add_inline_entry(dir, name, inode); + if (!err || err != -EAGAIN) + return err; + else + err = 0; + } + dentry_hash = f2fs_dentry_hash(name); level = 0; current_depth = F2FS_I(dir)->i_current_depth; @@ -600,6 +619,9 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); int i; + if (f2fs_has_inline_dentry(dir)) + return f2fs_delete_inline_entry(dentry, page, dir, inode); + lock_page(page); f2fs_wait_on_page_writeback(page, DATA); @@ -637,6 +659,9 @@ bool f2fs_empty_dir(struct inode *dir) struct f2fs_dentry_block *dentry_blk; unsigned long nblock = dir_blocks(dir); + if (f2fs_has_inline_dentry(dir)) + return f2fs_empty_inline_dir(dir); + for (bidx = 0; bidx < nblock; bidx++) { dentry_page = get_lock_data_page(dir, bidx); if (IS_ERR(dentry_page)) { @@ -676,6 +701,9 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); unsigned char d_type = DT_UNKNOWN; + if (f2fs_has_inline_dentry(inode)) + return f2fs_read_inline_dir(file, ctx); + bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); /* readahead for multi pages of dir */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 80d9a04..d054e0e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -314,7 +314,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) goto fail; /* handle inline data case */ - if (f2fs_has_inline_data(inode)) { + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) { if (whence == SEEK_HOLE) data_ofs = isize; goto found; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index b3be96b..1fafc85 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -424,7 +424,7 @@ int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, /* clear inline dir and flag after data writeback */ zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); - + clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); stat_dec_inline_inode(dir); if (i_size_read(dir) < PAGE_CACHE_SIZE) { diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6e67f2b..c8290da 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -54,6 +54,10 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_free = true; goto out; } + + if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode)) + set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); + trace_f2fs_new_inode(inode, 0); mark_inode_dirty(inode); return inode; @@ -187,7 +191,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, de = f2fs_find_entry(dir, &dentry->d_name, &page); if (de) { nid_t ino = le32_to_cpu(de->ino); - kunmap(page); + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); @@ -219,7 +224,8 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) err = acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - kunmap(page); + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); f2fs_put_page(page, 0); goto fail; } @@ -443,7 +449,8 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir_page, new_dir); update_inode_page(old_inode); } else { - kunmap(old_dir_page); + if (!f2fs_has_inline_dentry(old_inode)) + kunmap(old_dir_page); f2fs_put_page(old_dir_page, 0); } drop_nlink(old_dir); @@ -456,15 +463,18 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, put_out_dir: f2fs_unlock_op(sbi); - kunmap(new_page); + if (!f2fs_has_inline_dentry(new_dir)) + kunmap(new_page); f2fs_put_page(new_page, 0); out_dir: if (old_dir_entry) { - kunmap(old_dir_page); + if (!f2fs_has_inline_dentry(old_inode)) + kunmap(old_dir_page); f2fs_put_page(old_dir_page, 0); } out_old: - kunmap(old_page); + if (!f2fs_has_inline_dentry(old_dir)) + kunmap(old_page); f2fs_put_page(old_page, 0); out: return err; @@ -596,19 +606,23 @@ out_unlock: f2fs_unlock_op(sbi); out_new_dir: if (new_dir_entry) { - kunmap(new_dir_page); + if (!f2fs_has_inline_dentry(new_inode)) + kunmap(new_dir_page); f2fs_put_page(new_dir_page, 0); } out_old_dir: if (old_dir_entry) { - kunmap(old_dir_page); + if (!f2fs_has_inline_dentry(old_inode)) + kunmap(old_dir_page); f2fs_put_page(old_dir_page, 0); } out_new: - kunmap(new_page); + if (!f2fs_has_inline_dentry(new_dir)) + kunmap(new_page); f2fs_put_page(new_page, 0); out_old: - kunmap(old_page); + if (!f2fs_has_inline_dentry(old_dir)) + kunmap(old_page); f2fs_put_page(old_page, 0); out: return err; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 843da53..4b180bb 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -129,7 +129,8 @@ retry: goto out; out_unmap_put: - kunmap(page); + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); f2fs_put_page(page, 0); out_err: iput(dir); -- cgit v0.10.2 From d37a868ffca325509af87f59c142d9dc717772bd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 24 Sep 2014 18:20:23 +0800 Subject: f2fs: update f2fs documentation for inline dir support This patch adds descriptions for the inline dir support in f2fs document. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 2cca5a2..4bb9f27 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -122,6 +122,10 @@ disable_ext_identify Disable the extension list configured by mkfs, so f2fs inline_xattr Enable the inline xattrs feature. inline_data Enable the inline data feature: New created small(<~3.4k) files can be written into inode block. +inline_dentry Enable the inline dir feature: data in new created + directory entries can be written into inode block. The + space of inode block which is used to store inline + dentries is limited to ~3.4k. flush_merge Merge concurrent cache_flush commands as much as possible to eliminate redundant command issues. If the underlying device handles the cache_flush command relatively slowly, -- cgit v0.10.2 From a82afa20197a2ed289dd8fd18208a9e8b9af0130 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Oct 2014 16:28:13 -0700 Subject: f2fs: reuse room_for_filename for inline dentry operation This patch introduces to reuse the existing room_for_filename for inline dentry operation. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index cc6474a..164c6c9 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -427,27 +427,23 @@ void update_parent_metadata(struct inode *dir, struct inode *inode, clear_inode_flag(F2FS_I(inode), FI_INC_LINK); } -static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots) +int room_for_filename(const void *bitmap, int slots, int max_slots) { int bit_start = 0; int zero_start, zero_end; next: - zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_start); - if (zero_start >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; + zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start); + if (zero_start >= max_slots) + return max_slots; - zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - zero_start); + zero_end = find_next_bit_le(bitmap, max_slots, zero_start); if (zero_end - zero_start >= slots) return zero_start; bit_start = zero_end + 1; - if (zero_end + 1 >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; + if (zero_end + 1 >= max_slots) + return max_slots; goto next; } @@ -509,7 +505,8 @@ start: return PTR_ERR(dentry_page); dentry_blk = kmap(dentry_page); - bit_pos = room_for_filename(dentry_blk, slots); + bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c537699..aa055e3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1248,6 +1248,7 @@ void set_de_type(struct f2fs_dir_entry *, struct inode *); struct page *init_inode_metadata(struct inode *, struct inode *, const struct qstr *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); +int room_for_filename(const void *, int, int); void f2fs_drop_nlink(struct inode *, struct inode *, struct page *); struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, struct page **); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 1fafc85..29090b3 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -363,30 +363,6 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, return 0; } -int room_in_inline_dir(struct f2fs_inline_dentry *dentry_blk, int slots) -{ - int bit_start = 0; - int zero_start, zero_end; -next: - zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, - NR_INLINE_DENTRY, - bit_start); - if (zero_start >= NR_INLINE_DENTRY) - return NR_INLINE_DENTRY; - - zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_INLINE_DENTRY, - zero_start); - if (zero_end - zero_start >= slots) - return zero_start; - - bit_start = zero_end + 1; - - if (zero_end + 1 >= NR_INLINE_DENTRY) - return NR_INLINE_DENTRY; - goto next; -} - int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, struct f2fs_inline_dentry *inline_dentry) { @@ -460,7 +436,8 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, return PTR_ERR(ipage); dentry_blk = inline_data_addr(ipage); - bit_pos = room_in_inline_dir(dentry_blk, slots); + bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + slots, NR_INLINE_DENTRY); if (bit_pos >= NR_INLINE_DENTRY) { err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); if (!err) -- cgit v0.10.2 From 4e6ebf6d493591403237400e94e6fc17b7cb1c62 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Oct 2014 17:26:14 -0700 Subject: f2fs: reuse find_in_block code for find_in_inline_dir This patch removes redundant copied code in find_in_inline_dir. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 164c6c9..0b97b19 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -77,7 +77,7 @@ static unsigned long dir_block_index(unsigned int level, return bidx; } -bool early_match_name(size_t namelen, f2fs_hash_t namehash, +static bool early_match_name(size_t namelen, f2fs_hash_t namehash, struct f2fs_dir_entry *de) { if (le16_to_cpu(de->name_len) != namelen) @@ -90,49 +90,69 @@ bool early_match_name(size_t namelen, f2fs_hash_t namehash, } static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, - struct qstr *name, int *max_slots, - f2fs_hash_t namehash, struct page **res_page) + struct qstr *name, int *max_slots, + struct page **res_page) +{ + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dir_entry *de; + + *max_slots = NR_DENTRY_IN_BLOCK; + + dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); + de = find_target_dentry(name, max_slots, &dentry_blk->dentry_bitmap, + dentry_blk->dentry, + dentry_blk->filename); + if (de) + *res_page = dentry_page; + else + kunmap(dentry_page); + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs has occurred. + */ + f2fs_bug_on(F2FS_P_SB(dentry_page), *max_slots < 0); + return de; +} + +struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, + const void *bitmap, struct f2fs_dir_entry *dentry, + __u8 (*filenames)[F2FS_SLOT_LEN]) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; - struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); - const void *dentry_bits = &dentry_blk->dentry_bitmap; + f2fs_hash_t namehash = f2fs_dentry_hash(name); + int max_bits = *max_slots; int max_len = 0; - while (bit_pos < NR_DENTRY_IN_BLOCK) { - if (!test_bit_le(bit_pos, dentry_bits)) { + *max_slots = 0; + while (bit_pos < max_bits) { + if (!test_bit_le(bit_pos, bitmap)) { if (bit_pos == 0) max_len = 1; - else if (!test_bit_le(bit_pos - 1, dentry_bits)) + else if (!test_bit_le(bit_pos - 1, bitmap)) max_len++; bit_pos++; continue; } - de = &dentry_blk->dentry[bit_pos]; - if (early_match_name(name->len, namehash, de)) { - if (!memcmp(dentry_blk->filename[bit_pos], - name->name, - name->len)) { - *res_page = dentry_page; - goto found; - } - } - if (max_len > *max_slots) { + de = &dentry[bit_pos]; + if (early_match_name(name->len, namehash, de) && + !memcmp(filenames[bit_pos], name->name, name->len)) + goto found; + + if (*max_slots >= 0 && max_len > *max_slots) { *max_slots = max_len; max_len = 0; } - /* - * For the most part, it should be a bug when name_len is zero. - * We stop here for figuring out where the bugs has occurred. - */ - f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len); + /* remain bug on condition */ + if (unlikely(!de->name_len)) + *max_slots = -1; bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); } de = NULL; - kunmap(dentry_page); found: if (max_len > *max_slots) *max_slots = max_len; @@ -149,7 +169,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, struct page *dentry_page; struct f2fs_dir_entry *de = NULL; bool room = false; - int max_slots = 0; + int max_slots; f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); @@ -168,8 +188,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, continue; } - de = find_in_block(dentry_page, name, &max_slots, - namehash, res_page); + de = find_in_block(dentry_page, name, &max_slots, res_page); if (de) break; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index aa055e3..6aad6e0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1243,8 +1243,9 @@ struct dentry *f2fs_get_parent(struct dentry *child); * dir.c */ extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; -bool early_match_name(size_t, f2fs_hash_t, struct f2fs_dir_entry *); void set_de_type(struct f2fs_dir_entry *, struct inode *); +struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, const void *, + struct f2fs_dir_entry *, __u8 (*)[F2FS_SLOT_LEN]); struct page *init_inode_metadata(struct inode *, struct inode *, const struct qstr *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 29090b3..4cdce00 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -263,49 +263,31 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, struct qstr *name, struct page **res_page) { struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); - struct page *ipage; + struct f2fs_inline_dentry *inline_dentry; struct f2fs_dir_entry *de; - f2fs_hash_t namehash; - unsigned long bit_pos = 0; - struct f2fs_inline_dentry *dentry_blk; - const void *dentry_bits; + struct page *ipage; + int max_slots = NR_INLINE_DENTRY; ipage = get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) return NULL; - namehash = f2fs_dentry_hash(name); - - dentry_blk = inline_data_addr(ipage); - dentry_bits = &dentry_blk->dentry_bitmap; - - while (bit_pos < NR_INLINE_DENTRY) { - if (!test_bit_le(bit_pos, dentry_bits)) { - bit_pos++; - continue; - } - de = &dentry_blk->dentry[bit_pos]; - if (early_match_name(name->len, namehash, de)) { - if (!memcmp(dentry_blk->filename[bit_pos], - name->name, - name->len)) { - *res_page = ipage; - goto found; - } - } - - /* - * For the most part, it should be a bug when name_len is zero. - * We stop here for figuring out where the bugs are occurred. - */ - f2fs_bug_on(F2FS_P_SB(ipage), !de->name_len); - - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - } + inline_dentry = inline_data_addr(ipage); - de = NULL; -found: + de = find_target_dentry(name, &max_slots, &inline_dentry->dentry_bitmap, + inline_dentry->dentry, + inline_dentry->filename); unlock_page(ipage); + if (de) + *res_page = ipage; + else + f2fs_put_page(ipage, 0); + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs has occurred. + */ + f2fs_bug_on(sbi, max_slots < 0); return de; } -- cgit v0.10.2 From 59a0615540812297e4ff9673de5c1269413d0c40 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Oct 2014 19:34:26 -0700 Subject: f2fs: fix to wait correct block type The inode page needs to wait NODE block io. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 0b97b19..35cb1b3 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -283,8 +283,9 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode) { + enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; lock_page(page); - f2fs_wait_on_page_writeback(page, DATA); + f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); if (!f2fs_has_inline_dentry(dir)) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4cdce00..175f38a 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -427,7 +427,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, goto out; } - f2fs_wait_on_page_writeback(ipage, DATA); + f2fs_wait_on_page_writeback(ipage, NODE); down_write(&F2FS_I(inode)->i_sem); page = init_inode_metadata(inode, dir, name); @@ -472,7 +472,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, int i; lock_page(page); - f2fs_wait_on_page_writeback(page, DATA); + f2fs_wait_on_page_writeback(page, NODE); inline_dentry = inline_data_addr(page); bit_pos = dentry - inline_dentry->dentry; -- cgit v0.10.2 From bce8d1120707c06088928b2ee52a58703d74ac29 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Oct 2014 19:42:53 -0700 Subject: f2fs: avoid deadlock on init_inode_metadata Previously, init_inode_metadata does not hold any parent directory's inode page. So, f2fs_init_acl can grab its parent inode page without any problem. But, when we use inline_dentry, that page is grabbed during f2fs_add_link, so that we can fall into deadlock condition like below. INFO: task mknod:11006 blocked for more than 120 seconds. Tainted: G OE 3.17.0-rc1+ #13 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. mknod D ffff88003fc94580 0 11006 11004 0x00000000 ffff880007717b10 0000000000000002 ffff88003c323220 ffff880007717fd8 0000000000014580 0000000000014580 ffff88003daecb30 ffff88003c323220 ffff88003fc94e80 ffff88003ffbb4e8 ffff880007717ba0 0000000000000002 Call Trace: [] ? bit_wait+0x50/0x50 [] io_schedule+0x9d/0x130 [] bit_wait_io+0x2c/0x50 [] __wait_on_bit_lock+0x4b/0xb0 [] __lock_page+0x67/0x70 [] ? autoremove_wake_function+0x40/0x40 [] pagecache_get_page+0x14c/0x1e0 [] get_node_page+0x59/0x130 [f2fs] [] read_all_xattrs+0x24d/0x430 [f2fs] [] f2fs_getxattr+0x52/0xe0 [f2fs] [] f2fs_get_acl+0x41/0x2d0 [f2fs] [] get_acl+0x47/0x70 [] posix_acl_create+0x5a/0x150 [] f2fs_init_acl+0x29/0xcb [f2fs] [] init_inode_metadata+0x5d/0x340 [f2fs] [] f2fs_add_inline_entry+0x12a/0x2e0 [f2fs] [] __f2fs_add_link+0x45/0x4a0 [f2fs] [] ? f2fs_new_inode+0x146/0x220 [f2fs] [] f2fs_mknod+0x86/0xf0 [f2fs] [] vfs_mknod+0xe1/0x160 [] SyS_mknod+0x1f6/0x200 [] tracesys+0xe1/0xe6 Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 83b9b5a..6207455 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -162,7 +162,8 @@ fail: return ERR_PTR(-EINVAL); } -struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, + struct page *dpage) { int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; void *value = NULL; @@ -172,12 +173,13 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) if (type == ACL_TYPE_ACCESS) name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - retval = f2fs_getxattr(inode, name_index, "", NULL, 0); + retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage); if (retval > 0) { value = kmalloc(retval, GFP_F2FS_ZERO); if (!value) return ERR_PTR(-ENOMEM); - retval = f2fs_getxattr(inode, name_index, "", value, retval); + retval = f2fs_getxattr(inode, name_index, "", value, + retval, dpage); } if (retval > 0) @@ -194,6 +196,11 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) return acl; } +struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +{ + return __f2fs_get_acl(inode, type, NULL); +} + static int __f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { @@ -249,12 +256,137 @@ int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type) return __f2fs_set_acl(inode, type, acl, NULL); } -int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage) +/* + * Most part of f2fs_acl_clone, f2fs_acl_create_masq, f2fs_acl_create + * are copied from posix_acl.c + */ +static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl, + gfp_t flags) +{ + struct posix_acl *clone = NULL; + + if (acl) { + int size = sizeof(struct posix_acl) + acl->a_count * + sizeof(struct posix_acl_entry); + clone = kmemdup(acl, size, flags); + if (clone) + atomic_set(&clone->a_refcount, 1); + } + return clone; +} + +static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) +{ + struct posix_acl_entry *pa, *pe; + struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; + umode_t mode = *mode_p; + int not_equiv = 0; + + /* assert(atomic_read(acl->a_refcount) == 1); */ + + FOREACH_ACL_ENTRY(pa, acl, pe) { + switch(pa->e_tag) { + case ACL_USER_OBJ: + pa->e_perm &= (mode >> 6) | ~S_IRWXO; + mode &= (pa->e_perm << 6) | ~S_IRWXU; + break; + + case ACL_USER: + case ACL_GROUP: + not_equiv = 1; + break; + + case ACL_GROUP_OBJ: + group_obj = pa; + break; + + case ACL_OTHER: + pa->e_perm &= mode | ~S_IRWXO; + mode &= pa->e_perm | ~S_IRWXO; + break; + + case ACL_MASK: + mask_obj = pa; + not_equiv = 1; + break; + + default: + return -EIO; + } + } + + if (mask_obj) { + mask_obj->e_perm &= (mode >> 3) | ~S_IRWXO; + mode &= (mask_obj->e_perm << 3) | ~S_IRWXG; + } else { + if (!group_obj) + return -EIO; + group_obj->e_perm &= (mode >> 3) | ~S_IRWXO; + mode &= (group_obj->e_perm << 3) | ~S_IRWXG; + } + + *mode_p = (*mode_p & ~S_IRWXUGO) | mode; + return not_equiv; +} + +static int f2fs_acl_create(struct inode *dir, umode_t *mode, + struct posix_acl **default_acl, struct posix_acl **acl, + struct page *dpage) +{ + struct posix_acl *p; + int ret; + + if (S_ISLNK(*mode) || !IS_POSIXACL(dir)) + goto no_acl; + + p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage); + if (IS_ERR(p)) { + if (p == ERR_PTR(-EOPNOTSUPP)) + goto apply_umask; + return PTR_ERR(p); + } + + if (!p) + goto apply_umask; + + *acl = f2fs_acl_clone(p, GFP_NOFS); + if (!*acl) + return -ENOMEM; + + ret = f2fs_acl_create_masq(*acl, mode); + if (ret < 0) { + posix_acl_release(*acl); + return -ENOMEM; + } + + if (ret == 0) { + posix_acl_release(*acl); + *acl = NULL; + } + + if (!S_ISDIR(*mode)) { + posix_acl_release(p); + *default_acl = NULL; + } else { + *default_acl = p; + } + return 0; + +apply_umask: + *mode &= ~current_umask(); +no_acl: + *default_acl = NULL; + *acl = NULL; + return 0; +} + +int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, + struct page *dpage) { - struct posix_acl *default_acl, *acl; + struct posix_acl *default_acl = NULL, *acl = NULL; int error = 0; - error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); + error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage); if (error) return error; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index e086465..997ca8e 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -38,14 +38,15 @@ struct f2fs_acl_header { extern struct posix_acl *f2fs_get_acl(struct inode *, int); extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type); -extern int f2fs_init_acl(struct inode *, struct inode *, struct page *); +extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, + struct page *); #else #define f2fs_check_acl NULL #define f2fs_get_acl NULL #define f2fs_set_acl NULL static inline int f2fs_init_acl(struct inode *inode, struct inode *dir, - struct page *page) + struct page *ipage, struct page *dpage) { return 0; } diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 35cb1b3..e5439dd 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -363,8 +363,8 @@ static int make_empty_dir(struct inode *inode, return 0; } -struct page *init_inode_metadata(struct inode *inode, - struct inode *dir, const struct qstr *name) +struct page *init_inode_metadata(struct inode *inode, struct inode *dir, + const struct qstr *name, struct page *dpage) { struct page *page; int err; @@ -380,7 +380,7 @@ struct page *init_inode_metadata(struct inode *inode, goto error; } - err = f2fs_init_acl(inode, dir, page); + err = f2fs_init_acl(inode, dir, page, dpage); if (err) goto put_error; @@ -541,7 +541,7 @@ add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA); down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name); + page = init_inode_metadata(inode, dir, name, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -580,7 +580,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) int err = 0; down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, NULL); + page = init_inode_metadata(inode, dir, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6aad6e0..d4dcd93 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1247,7 +1247,7 @@ void set_de_type(struct f2fs_dir_entry *, struct inode *); struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, const void *, struct f2fs_dir_entry *, __u8 (*)[F2FS_SLOT_LEN]); struct page *init_inode_metadata(struct inode *, struct inode *, - const struct qstr *); + const struct qstr *, struct page *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); int room_for_filename(const void *, int, int); void f2fs_drop_nlink(struct inode *, struct inode *, struct page *); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 175f38a..46c5542 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -427,14 +427,14 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, goto out; } - f2fs_wait_on_page_writeback(ipage, NODE); - down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name); + page = init_inode_metadata(inode, dir, name, ipage); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; } + + f2fs_wait_on_page_writeback(ipage, NODE); de = &dentry_blk->dentry[bit_pos]; de->hash_code = name_hash; de->name_len = cpu_to_le16(namelen); diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index deca872..5072bf9 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -83,7 +83,7 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); + return f2fs_getxattr(dentry->d_inode, type, name, buffer, size, NULL); } static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, @@ -398,7 +398,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } int f2fs_getxattr(struct inode *inode, int index, const char *name, - void *buffer, size_t buffer_size) + void *buffer, size_t buffer_size, struct page *ipage) { struct f2fs_xattr_entry *entry; void *base_addr; @@ -412,7 +412,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - base_addr = read_all_xattrs(inode, NULL); + base_addr = read_all_xattrs(inode, ipage); if (!base_addr) return -ENOMEM; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 34ab7db..969d792 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -115,7 +115,8 @@ extern const struct xattr_handler *f2fs_xattr_handlers[]; extern int f2fs_setxattr(struct inode *, int, const char *, const void *, size_t, struct page *, int); -extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); +extern int f2fs_getxattr(struct inode *, int, const char *, void *, + size_t, struct page *); extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); #else @@ -126,7 +127,8 @@ static inline int f2fs_setxattr(struct inode *inode, int index, return -EOPNOTSUPP; } static inline int f2fs_getxattr(struct inode *inode, int index, - const char *name, void *buffer, size_t buffer_size) + const char *name, void *buffer, + size_t buffer_size, struct page *dpage) { return -EOPNOTSUPP; } -- cgit v0.10.2 From 3289c061c5aaf914c6eb7bdfadb58a7fdd611d30 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 13 Oct 2014 20:00:16 -0700 Subject: f2fs: add stat info for inline_dentry inodes This patch adds status information for inline_dentry inodes. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0a91ab8..86e6e92 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -46,6 +46,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->valid_node_count = valid_node_count(sbi); si->valid_inode_count = valid_inode_count(sbi); si->inline_inode = sbi->inline_inode; + si->inline_dir = sbi->inline_dir; si->utilization = utilization(sbi); si->free_segs = free_segments(sbi); @@ -200,6 +201,8 @@ static int stat_show(struct seq_file *s, void *v) si->valid_count - si->valid_node_count); seq_printf(s, " - Inline_data Inode: %u\n", si->inline_inode); + seq_printf(s, " - Inline_dentry Inode: %u\n", + si->inline_dir); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", si->main_area_segs, si->main_area_sections, si->main_area_zones); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d4dcd93..9c4c8d1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -560,6 +560,7 @@ struct f2fs_sb_info { unsigned int block_count[2]; /* # of allocated blocks */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ int inline_inode; /* # of inline_data inodes */ + int inline_dir; /* # of inline_dentry inodes */ int bg_gc; /* background gc calls */ unsigned int n_dirty_dirs; /* # of dir inodes */ #endif @@ -1434,7 +1435,7 @@ struct f2fs_stat_info { int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int nats, sits, fnids; int total_count, utilization; - int bg_gc, inline_inode; + int bg_gc, inline_inode, inline_dir; unsigned int valid_count, valid_node_count, valid_inode_count; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; @@ -1474,7 +1475,16 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (f2fs_has_inline_data(inode)) \ ((F2FS_I_SB(inode))->inline_inode--); \ } while (0) - +#define stat_inc_inline_dir(inode) \ + do { \ + if (f2fs_has_inline_dentry(inode)) \ + ((F2FS_I_SB(inode))->inline_dir++); \ + } while (0) +#define stat_dec_inline_dir(inode) \ + do { \ + if (f2fs_has_inline_dentry(inode)) \ + ((F2FS_I_SB(inode))->inline_dir--); \ + } while (0) #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ @@ -1521,6 +1531,8 @@ void f2fs_destroy_root_stats(void); #define stat_inc_read_hit(sb) #define stat_inc_inline_inode(inode) #define stat_dec_inline_inode(inode) +#define stat_inc_inline_dir(inode) +#define stat_dec_inline_dir(inode) #define stat_inc_seg_type(sbi, curseg) #define stat_inc_block_count(sbi, curseg) #define stat_inc_seg_count(si, type) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 46c5542..4219bf4 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -382,8 +382,8 @@ int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, /* clear inline dir and flag after data writeback */ zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); + stat_dec_inline_dir(dir); clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); - stat_dec_inline_inode(dir); if (i_size_read(dir) < PAGE_CACHE_SIZE) { i_size_write(dir, PAGE_CACHE_SIZE); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 52d6f54..b9b1d6b 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -169,6 +169,7 @@ make_now: goto bad_inode; } unlock_new_inode(inode); + stat_inc_inline_dir(inode); trace_f2fs_iget(inode); return inode; @@ -300,6 +301,7 @@ void f2fs_evict_inode(struct inode *inode) sb_end_intwrite(inode->i_sb); no_delete: + stat_dec_inline_dir(inode); invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); @@ -327,6 +329,7 @@ void handle_failed_inode(struct inode *inode) remove_inode_page(inode); stat_dec_inline_inode(inode); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); alloc_nid_failed(sbi, inode->i_ino); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index c8290da..94ba291 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -297,6 +297,7 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; f2fs_unlock_op(sbi); + stat_inc_inline_dir(inode); alloc_nid_done(sbi, inode->i_ino); d_instantiate(dentry, inode); -- cgit v0.10.2 From e7a2bf2283d368ada40ae52152b7ab2304a76d95 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 14 Oct 2014 10:29:50 -0700 Subject: f2fs: fix counting inline_data inode numbers This patch fixes wrongly counting inline_data inode numbers. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index b9b1d6b..4131e3c 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -169,6 +169,7 @@ make_now: goto bad_inode; } unlock_new_inode(inode); + stat_inc_inline_inode(inode); stat_inc_inline_dir(inode); trace_f2fs_iget(inode); return inode; @@ -296,12 +297,12 @@ void f2fs_evict_inode(struct inode *inode) f2fs_lock_op(sbi); remove_inode_page(inode); - stat_dec_inline_inode(inode); f2fs_unlock_op(sbi); sb_end_intwrite(inode->i_sb); no_delete: stat_dec_inline_dir(inode); + stat_dec_inline_inode(inode); invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); @@ -327,7 +328,6 @@ void handle_failed_inode(struct inode *inode) f2fs_truncate(inode); remove_inode_page(inode); - stat_dec_inline_inode(inode); clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); alloc_nid_failed(sbi, inode->i_ino); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 94ba291..a004a97 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -198,8 +198,6 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, inode = f2fs_iget(dir->i_sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); - - stat_inc_inline_inode(inode); } return d_splice_alias(inode, dentry); -- cgit v0.10.2 From 38594de767b32db62b7213631772d050690d3803 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 15 Oct 2014 21:29:51 -0700 Subject: f2fs: reuse core function in f2fs_readdir for inline_dentry This patch introduces a core function, f2fs_fill_dentries, to remove redundant code in f2fs_readdir and f2fs_read_inline_dir. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index e5439dd..23a5da8 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -706,23 +706,50 @@ bool f2fs_empty_dir(struct inode *dir) return true; } +bool f2fs_fill_dentries(struct dir_context *ctx, + const void *bitmap, struct f2fs_dir_entry *dentry, + __u8 (*filenames)[F2FS_SLOT_LEN], int max, + unsigned int start_pos) +{ + unsigned char d_type = DT_UNKNOWN; + unsigned int bit_pos; + struct f2fs_dir_entry *de = NULL; + + bit_pos = ((unsigned long)ctx->pos % max); + + while (bit_pos < max) { + bit_pos = find_next_bit_le(bitmap, max, bit_pos); + if (bit_pos >= max) + break; + + de = &dentry[bit_pos]; + if (de->file_type < F2FS_FT_MAX) + d_type = f2fs_filetype_table[de->file_type]; + else + d_type = DT_UNKNOWN; + if (!dir_emit(ctx, filenames[bit_pos], + le16_to_cpu(de->name_len), + le32_to_cpu(de->ino), d_type)) + return true; + + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + ctx->pos = start_pos + bit_pos; + } + return false; +} + static int f2fs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); unsigned long npages = dir_blocks(inode); - unsigned int bit_pos = 0; struct f2fs_dentry_block *dentry_blk = NULL; - struct f2fs_dir_entry *de = NULL; struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); - unsigned char d_type = DT_UNKNOWN; if (f2fs_has_inline_dentry(inode)) return f2fs_read_inline_dir(file, ctx); - bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); - /* readahead for multi pages of dir */ if (npages - n > 1 && !ra_has_index(ra, n)) page_cache_sync_readahead(inode->i_mapping, ra, file, n, @@ -734,28 +761,13 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) continue; dentry_blk = kmap(dentry_page); - while (bit_pos < NR_DENTRY_IN_BLOCK) { - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_pos); - if (bit_pos >= NR_DENTRY_IN_BLOCK) - break; - - de = &dentry_blk->dentry[bit_pos]; - if (de->file_type < F2FS_FT_MAX) - d_type = f2fs_filetype_table[de->file_type]; - else - d_type = DT_UNKNOWN; - if (!dir_emit(ctx, - dentry_blk->filename[bit_pos], - le16_to_cpu(de->name_len), - le32_to_cpu(de->ino), d_type)) - goto stop; - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - ctx->pos = n * NR_DENTRY_IN_BLOCK + bit_pos; - } - bit_pos = 0; + if (f2fs_fill_dentries(ctx, + &dentry_blk->dentry_bitmap, dentry_blk->dentry, + dentry_blk->filename, + NR_DENTRY_IN_BLOCK, n * NR_DENTRY_IN_BLOCK)) + goto stop; + ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; kunmap(dentry_page); f2fs_put_page(dentry_page, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9c4c8d1..3b0f490 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1247,6 +1247,9 @@ extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; void set_de_type(struct f2fs_dir_entry *, struct inode *); struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, const void *, struct f2fs_dir_entry *, __u8 (*)[F2FS_SLOT_LEN]); +bool f2fs_fill_dentries(struct dir_context *, + const void *, struct f2fs_dir_entry *, + __u8 (*)[F2FS_SLOT_LEN], int, unsigned int); struct page *init_inode_metadata(struct inode *, struct inode *, const struct qstr *, struct page *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4219bf4..38a6aa7 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -517,49 +517,24 @@ bool f2fs_empty_inline_dir(struct inode *dir) int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - unsigned int bit_pos = 0; struct f2fs_inline_dentry *inline_dentry = NULL; - struct f2fs_dir_entry *de = NULL; struct page *ipage = NULL; - unsigned char d_type = DT_UNKNOWN; if (ctx->pos == NR_INLINE_DENTRY) return 0; - ipage = get_node_page(sbi, inode->i_ino); + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) return PTR_ERR(ipage); - bit_pos = ((unsigned long)ctx->pos % NR_INLINE_DENTRY); - inline_dentry = inline_data_addr(ipage); - while (bit_pos < NR_INLINE_DENTRY) { - bit_pos = find_next_bit_le(&inline_dentry->dentry_bitmap, - NR_INLINE_DENTRY, - bit_pos); - if (bit_pos >= NR_INLINE_DENTRY) - break; - - de = &inline_dentry->dentry[bit_pos]; - if (de->file_type < F2FS_FT_MAX) - d_type = f2fs_filetype_table[de->file_type]; - else - d_type = DT_UNKNOWN; - - if (!dir_emit(ctx, - inline_dentry->filename[bit_pos], - le16_to_cpu(de->name_len), - le32_to_cpu(de->ino), d_type)) - goto out; - - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - ctx->pos = bit_pos; - } - ctx->pos = NR_INLINE_DENTRY; -out: - f2fs_put_page(ipage, 1); + if (!f2fs_fill_dentries(ctx, &inline_dentry->dentry_bitmap, + inline_dentry->dentry, + inline_dentry->filename, + NR_INLINE_DENTRY, 0)) + ctx->pos = NR_INLINE_DENTRY; + f2fs_put_page(ipage, 1); return 0; } -- cgit v0.10.2 From 5ab18570b85cf3071875a36b88bc5ed27d0b6ef7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 16 Oct 2014 12:24:14 -0700 Subject: f2fs: should not truncate any inline_dentry If the inode has inline_dentry, we should not truncate any block indices. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index d054e0e..402e381 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -471,7 +471,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) trace_f2fs_truncate_blocks_enter(inode, from); - if (f2fs_has_inline_data(inode)) + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) goto done; free_from = (pgoff_t) -- cgit v0.10.2 From 7b3cd7d6f026784b1a2a74b6e207b26253d9d780 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 18 Oct 2014 22:52:52 -0700 Subject: f2fs: introduce f2fs_dentry_ptr structure for code clean-up This patch introduces f2fs_dentry_ptr structure for the use of a function parameter in inline_dentry operations. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 23a5da8..4f029a1 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -95,13 +95,13 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, { struct f2fs_dentry_block *dentry_blk; struct f2fs_dir_entry *de; - - *max_slots = NR_DENTRY_IN_BLOCK; + struct f2fs_dentry_ptr d; dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); - de = find_target_dentry(name, max_slots, &dentry_blk->dentry_bitmap, - dentry_blk->dentry, - dentry_blk->filename); + + make_dentry_ptr(&d, (void *)dentry_blk, 1); + de = find_target_dentry(name, max_slots, &d); + if (de) *res_page = dentry_page; else @@ -111,50 +111,49 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, * For the most part, it should be a bug when name_len is zero. * We stop here for figuring out where the bugs has occurred. */ - f2fs_bug_on(F2FS_P_SB(dentry_page), *max_slots < 0); + f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0); return de; } struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, - const void *bitmap, struct f2fs_dir_entry *dentry, - __u8 (*filenames)[F2FS_SLOT_LEN]) + struct f2fs_dentry_ptr *d) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; f2fs_hash_t namehash = f2fs_dentry_hash(name); - int max_bits = *max_slots; int max_len = 0; - *max_slots = 0; - while (bit_pos < max_bits) { - if (!test_bit_le(bit_pos, bitmap)) { + if (max_slots) + *max_slots = 0; + while (bit_pos < d->max) { + if (!test_bit_le(bit_pos, d->bitmap)) { if (bit_pos == 0) max_len = 1; - else if (!test_bit_le(bit_pos - 1, bitmap)) + else if (!test_bit_le(bit_pos - 1, d->bitmap)) max_len++; bit_pos++; continue; } - de = &dentry[bit_pos]; + de = &d->dentry[bit_pos]; if (early_match_name(name->len, namehash, de) && - !memcmp(filenames[bit_pos], name->name, name->len)) + !memcmp(d->filename[bit_pos], name->name, name->len)) goto found; - if (*max_slots >= 0 && max_len > *max_slots) { + if (max_slots && *max_slots >= 0 && max_len > *max_slots) { *max_slots = max_len; max_len = 0; } /* remain bug on condition */ if (unlikely(!de->name_len)) - *max_slots = -1; + d->max = -1; bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); } de = NULL; found: - if (max_len > *max_slots) + if (max_slots && max_len > *max_slots) *max_slots = max_len; return de; } @@ -706,28 +705,26 @@ bool f2fs_empty_dir(struct inode *dir) return true; } -bool f2fs_fill_dentries(struct dir_context *ctx, - const void *bitmap, struct f2fs_dir_entry *dentry, - __u8 (*filenames)[F2FS_SLOT_LEN], int max, - unsigned int start_pos) +bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, + unsigned int start_pos) { unsigned char d_type = DT_UNKNOWN; unsigned int bit_pos; struct f2fs_dir_entry *de = NULL; - bit_pos = ((unsigned long)ctx->pos % max); + bit_pos = ((unsigned long)ctx->pos % d->max); - while (bit_pos < max) { - bit_pos = find_next_bit_le(bitmap, max, bit_pos); - if (bit_pos >= max) + while (bit_pos < d->max) { + bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos); + if (bit_pos >= d->max) break; - de = &dentry[bit_pos]; + de = &d->dentry[bit_pos]; if (de->file_type < F2FS_FT_MAX) d_type = f2fs_filetype_table[de->file_type]; else d_type = DT_UNKNOWN; - if (!dir_emit(ctx, filenames[bit_pos], + if (!dir_emit(ctx, d->filename[bit_pos], le16_to_cpu(de->name_len), le32_to_cpu(de->ino), d_type)) return true; @@ -746,6 +743,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); + struct f2fs_dentry_ptr d; if (f2fs_has_inline_dentry(inode)) return f2fs_read_inline_dir(file, ctx); @@ -762,10 +760,9 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) dentry_blk = kmap(dentry_page); - if (f2fs_fill_dentries(ctx, - &dentry_blk->dentry_bitmap, dentry_blk->dentry, - dentry_blk->filename, - NR_DENTRY_IN_BLOCK, n * NR_DENTRY_IN_BLOCK)) + make_dentry_ptr(&d, (void *)dentry_blk, 1); + + if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK)) goto stop; ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3b0f490..f7bbfc7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -212,6 +212,32 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, /* * For INODE and NODE manager */ +/* for directory operations */ +struct f2fs_dentry_ptr { + const void *bitmap; + struct f2fs_dir_entry *dentry; + __u8 (*filename)[F2FS_SLOT_LEN]; + int max; +}; + +static inline void make_dentry_ptr(struct f2fs_dentry_ptr *d, + void *src, int type) +{ + if (type == 1) { + struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; + d->max = NR_DENTRY_IN_BLOCK; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; + } else { + struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src; + d->max = NR_INLINE_DENTRY; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; + } +} + /* * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1 * as its node offset to distinguish from index node blocks. @@ -1245,11 +1271,10 @@ struct dentry *f2fs_get_parent(struct dentry *child); */ extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; void set_de_type(struct f2fs_dir_entry *, struct inode *); -struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, const void *, - struct f2fs_dir_entry *, __u8 (*)[F2FS_SLOT_LEN]); -bool f2fs_fill_dentries(struct dir_context *, - const void *, struct f2fs_dir_entry *, - __u8 (*)[F2FS_SLOT_LEN], int, unsigned int); +struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, + struct f2fs_dentry_ptr *); +bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, + unsigned int); struct page *init_inode_metadata(struct inode *, struct inode *, const struct qstr *, struct page *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 38a6aa7..445e99d 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -265,8 +265,8 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); struct f2fs_inline_dentry *inline_dentry; struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; struct page *ipage; - int max_slots = NR_INLINE_DENTRY; ipage = get_node_page(sbi, dir->i_ino); if (IS_ERR(ipage)) @@ -274,9 +274,9 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, inline_dentry = inline_data_addr(ipage); - de = find_target_dentry(name, &max_slots, &inline_dentry->dentry_bitmap, - inline_dentry->dentry, - inline_dentry->filename); + make_dentry_ptr(&d, (void *)inline_dentry, 2); + de = find_target_dentry(name, NULL, &d); + unlock_page(ipage); if (de) *res_page = ipage; @@ -287,7 +287,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, * For the most part, it should be a bug when name_len is zero. * We stop here for figuring out where the bugs has occurred. */ - f2fs_bug_on(sbi, max_slots < 0); + f2fs_bug_on(sbi, d.max < 0); return de; } @@ -519,6 +519,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx) struct inode *inode = file_inode(file); struct f2fs_inline_dentry *inline_dentry = NULL; struct page *ipage = NULL; + struct f2fs_dentry_ptr d; if (ctx->pos == NR_INLINE_DENTRY) return 0; @@ -529,10 +530,9 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx) inline_dentry = inline_data_addr(ipage); - if (!f2fs_fill_dentries(ctx, &inline_dentry->dentry_bitmap, - inline_dentry->dentry, - inline_dentry->filename, - NR_INLINE_DENTRY, 0)) + make_dentry_ptr(&d, (void *)inline_dentry, 2); + + if (!f2fs_fill_dentries(ctx, &d, 0)) ctx->pos = NR_INLINE_DENTRY; f2fs_put_page(ipage, 1); -- cgit v0.10.2 From 062a3e7ba7eebcd7d44e49e9510135f901f93f48 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 18 Oct 2014 23:06:41 -0700 Subject: f2fs: reuse make_empty_dir code for inline_dentry This patch introduces do_make_empty_dir to mitigate code redundancy for inline_dentry. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 4f029a1..4e62bde 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -323,12 +323,35 @@ int update_dent_inode(struct inode *inode, const struct qstr *name) return 0; } +void do_make_empty_dir(struct inode *inode, struct inode *parent, + struct f2fs_dentry_ptr *d) +{ + struct f2fs_dir_entry *de; + + de = &d->dentry[0]; + de->name_len = cpu_to_le16(1); + de->hash_code = 0; + de->ino = cpu_to_le32(inode->i_ino); + memcpy(d->filename[0], ".", 1); + set_de_type(de, inode); + + de = &d->dentry[1]; + de->hash_code = 0; + de->name_len = cpu_to_le16(2); + de->ino = cpu_to_le32(parent->i_ino); + memcpy(d->filename[1], "..", 2); + set_de_type(de, inode); + + test_and_set_bit_le(0, (void *)d->bitmap); + test_and_set_bit_le(1, (void *)d->bitmap); +} + static int make_empty_dir(struct inode *inode, struct inode *parent, struct page *page) { struct page *dentry_page; struct f2fs_dentry_block *dentry_blk; - struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; if (f2fs_has_inline_dentry(inode)) return make_empty_inline_dir(inode, parent, page); @@ -339,22 +362,9 @@ static int make_empty_dir(struct inode *inode, dentry_blk = kmap_atomic(dentry_page); - de = &dentry_blk->dentry[0]; - de->name_len = cpu_to_le16(1); - de->hash_code = 0; - de->ino = cpu_to_le32(inode->i_ino); - memcpy(dentry_blk->filename[0], ".", 1); - set_de_type(de, inode); - - de = &dentry_blk->dentry[1]; - de->hash_code = 0; - de->name_len = cpu_to_le16(2); - de->ino = cpu_to_le32(parent->i_ino); - memcpy(dentry_blk->filename[1], "..", 2); - set_de_type(de, inode); + make_dentry_ptr(&d, (void *)dentry_blk, 1); + do_make_empty_dir(inode, parent, &d); - test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); - test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); kunmap_atomic(dentry_blk); set_page_dirty(dentry_page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f7bbfc7..cb59ebb 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1275,6 +1275,8 @@ struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, struct f2fs_dentry_ptr *); bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, unsigned int); +void do_make_empty_dir(struct inode *, struct inode *, + struct f2fs_dentry_ptr *); struct page *init_inode_metadata(struct inode *, struct inode *, const struct qstr *, struct page *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 445e99d..a309845 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -314,26 +314,12 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, struct page *ipage) { struct f2fs_inline_dentry *dentry_blk; - struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; dentry_blk = inline_data_addr(ipage); - de = &dentry_blk->dentry[0]; - de->name_len = cpu_to_le16(1); - de->hash_code = 0; - de->ino = cpu_to_le32(inode->i_ino); - memcpy(dentry_blk->filename[0], ".", 1); - set_de_type(de, inode); - - de = &dentry_blk->dentry[1]; - de->hash_code = 0; - de->name_len = cpu_to_le16(2); - de->ino = cpu_to_le32(parent->i_ino); - memcpy(dentry_blk->filename[1], "..", 2); - set_de_type(de, inode); - - test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); - test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); + make_dentry_ptr(&d, (void *)dentry_blk, 2); + do_make_empty_dir(inode, parent, &d); set_page_dirty(ipage); -- cgit v0.10.2 From f1e33a041e39535571cd19f4799c3673868f2118 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 18 Oct 2014 23:41:38 -0700 Subject: f2fs: use kmap_atomic instead of kmap For better performance, we need to use kmap_atomic instead of kmap. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index a309845..024546b 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -55,11 +55,10 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) /* Copy the whole inline data block */ src_addr = inline_data_addr(ipage); - dst_addr = kmap(page); + dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - kunmap(page); + kunmap_atomic(dst_addr); f2fs_put_page(ipage, 1); - out: SetPageUptodate(page); unlock_page(page); @@ -105,9 +104,9 @@ static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) /* Copy the whole inline data block */ src_addr = inline_data_addr(ipage); - dst_addr = kmap(page); + dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - kunmap(page); + kunmap_atomic(dst_addr); SetPageUptodate(page); /* write data page to try to make data consistent */ @@ -177,10 +176,10 @@ int f2fs_write_inline_data(struct inode *inode, f2fs_wait_on_page_writeback(ipage, NODE); zero_user_segment(ipage, INLINE_DATA_OFFSET, INLINE_DATA_OFFSET + MAX_INLINE_DATA); - src_addr = kmap(page); + src_addr = kmap_atomic(page); dst_addr = inline_data_addr(ipage); memcpy(dst_addr, src_addr, size); - kunmap(page); + kunmap_atomic(src_addr); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); sync_inode_page(&dn); @@ -351,7 +350,7 @@ int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, f2fs_wait_on_page_writeback(page, DATA); zero_user_segment(page, 0, PAGE_CACHE_SIZE); - dentry_blk = kmap(page); + dentry_blk = kmap_atomic(page); /* copy data from inline dentry block to new dentry block */ memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, @@ -361,7 +360,7 @@ int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, memcpy(dentry_blk->filename, inline_dentry->filename, NR_INLINE_DENTRY * F2FS_SLOT_LEN); - kunmap(page); + kunmap_atomic(dentry_blk); SetPageUptodate(page); set_page_dirty(page); -- cgit v0.10.2 From d64948a4df9f5571b0efa81f71a59bc86d7056a7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 20 Oct 2014 20:28:49 -0700 Subject: f2fs: declare f2fs_convert_inline_dir as a static function This patch declares f2fs_convert_inline_dir as a static function, which was reported by kbuild test robot. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 024546b..d9daf76 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -330,7 +330,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, return 0; } -int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, +static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, struct f2fs_inline_dentry *inline_dentry) { struct page *page; -- cgit v0.10.2 From 9bd27ae4aafc9bfee6c8791f7d801ea16cc5622b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 21 Oct 2014 14:07:33 +0200 Subject: f2fs: avoid returning uninitialized value to userspace from f2fs_trim_fs() If user specifies too low end sector for trimming, f2fs_trim_fs() will use uninitialized value as a number of trimmed blocks and returns it to userspace. Initialize number of trimmed blocks early to avoid the problem. Coverity-id: 1248809 CC: stable@vger.kernel.org Signed-off-by: Jan Kara Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2c1e608..6b08504 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1032,6 +1032,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) range->len < sbi->blocksize) return -EINVAL; + cpc.trimmed = 0; if (end <= MAIN_BLKADDR(sbi)) goto out; @@ -1043,7 +1044,6 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.trim_start = start_segno; cpc.trim_end = end_segno; cpc.trim_minlen = range->minlen >> sbi->log_blocksize; - cpc.trimmed = 0; /* do checkpoint to issue discard commands safely */ write_checkpoint(sbi, &cpc); -- cgit v0.10.2 From 8a2d0ace3af74825e4f1e6fb962f7ee8ef4d9281 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Oct 2014 17:45:48 +0800 Subject: f2fs: remove the seems unneeded argument 'type' from __get_victim Remove the unneeded argument 'type' from __get_victim, use NO_CHECK_TYPE directly when calling v_ops->get_victim(). Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2a8f4ac..7151d7d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -646,12 +646,14 @@ next_iput: } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, - int gc_type, int type) + int gc_type) { struct sit_info *sit_i = SIT_I(sbi); int ret; + mutex_lock(&sit_i->sentry_lock); - ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS); + ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, + NO_CHECK_TYPE, LFS); mutex_unlock(&sit_i->sentry_lock); return ret; } @@ -709,7 +711,7 @@ gc_more: write_checkpoint(sbi, &cpc); } - if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) + if (!__get_victim(sbi, &segno, gc_type)) goto stop; ret = 0; -- cgit v0.10.2 From fa528722d06ecbee9d918b9eec58c5d4c2978839 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Oct 2014 17:45:52 +0800 Subject: f2fs: remove the redundant function cond_clear_inode_flag Use clear_inode_flag to replace the redundant cond_clear_inode_flag. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 6207455..1ccb26b 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -236,7 +236,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (acl) { value = f2fs_acl_to_disk(acl, &size); if (IS_ERR(value)) { - cond_clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(fi, FI_ACL_MODE); return (int)PTR_ERR(value); } } @@ -247,7 +247,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (!error) set_cached_acl(inode, type, acl); - cond_clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(fi, FI_ACL_MODE); return error; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cb59ebb..3608c13 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1117,15 +1117,6 @@ static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) set_inode_flag(fi, FI_ACL_MODE); } -static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) -{ - if (is_inode_flag_set(fi, FI_ACL_MODE)) { - clear_inode_flag(fi, FI_ACL_MODE); - return 1; - } - return 0; -} - static inline void get_inline_info(struct f2fs_inode_info *fi, struct f2fs_inode *ri) { -- cgit v0.10.2 From c6ac4c0ec416e77cab09cac6cee2d100fbd7fc82 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Oct 2014 17:45:50 +0800 Subject: f2fs: introduce f2fs_change_bit to simplify the change bit logic Introduce f2fs_change_bit to simplify the change bit logic in function set_to_next_nat{sit}. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3608c13..60045a2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1073,6 +1073,15 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) return ret; } +static inline void f2fs_change_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + *addr ^= mask; +} + /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 8d5e6e0d..acb71e5 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -192,10 +192,7 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) { unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); - if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) - f2fs_clear_bit(block_off, nm_i->nat_bitmap); - else - f2fs_set_bit(block_off, nm_i->nat_bitmap); + f2fs_change_bit(block_off, nm_i->nat_bitmap); } static inline void fill_node_footer(struct page *page, nid_t nid, diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 2495bec..6723ccc 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -657,10 +657,7 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) { unsigned int block_off = SIT_BLOCK_OFFSET(start); - if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) - f2fs_clear_bit(block_off, sit_i->sit_bitmap); - else - f2fs_set_bit(block_off, sit_i->sit_bitmap); + f2fs_change_bit(block_off, sit_i->sit_bitmap); } static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) -- cgit v0.10.2 From 1730663cb7f368ecc2aea36fd8c533b5b0a91980 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Oct 2014 17:45:53 +0800 Subject: f2fs: set raw_super default to NULL to avoid compile warning Set raw_super default to NULL to avoid the possibly used uninitialized warning, though we may never hit it in fact. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 73993a9..6c5fc76 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -930,7 +930,7 @@ retry: static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; - struct f2fs_super_block *raw_super; + struct f2fs_super_block *raw_super = NULL; struct buffer_head *raw_super_buf; struct inode *root; long err = -EINVAL; -- cgit v0.10.2 From 52aca07425088954384eb503d8e3442d81b9d18c Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Oct 2014 17:45:51 +0800 Subject: f2fs: rename f2fs_set/clear_bit to f2fs_test_and_set/clear_bit Rename f2fs_set/clear_bit to f2fs_test_and_set/clear_bit, which mean set/clear bit and return the old value, for better readability. Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 60045a2..2e9d2e3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1049,7 +1049,7 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr) return mask & *addr; } -static inline int f2fs_set_bit(unsigned int nr, char *addr) +static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr) { int mask; int ret; @@ -1061,7 +1061,7 @@ static inline int f2fs_set_bit(unsigned int nr, char *addr) return ret; } -static inline int f2fs_clear_bit(unsigned int nr, char *addr) +static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr) { int mask; int ret; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6b08504..19de23b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -648,10 +648,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { - if (f2fs_set_bit(offset, se->cur_valid_map)) + if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); } else { - if (!f2fs_clear_bit(offset, se->cur_valid_map)) + if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) -- cgit v0.10.2 From 2cc221861132e0ca54e3f52d506520ded8520e80 Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Mon, 20 Oct 2014 17:45:49 +0800 Subject: f2fs: use current_sit_addr to replace the open code Signed-off-by: Gu Zheng Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 19de23b..21ccc48 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1552,17 +1552,7 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno) { - struct sit_info *sit_i = SIT_I(sbi); - unsigned int offset = SIT_BLOCK_OFFSET(segno); - block_t blk_addr = sit_i->sit_base_addr + offset; - - check_seg_range(sbi, segno); - - /* calculate sit block address */ - if (f2fs_test_bit(offset, sit_i->sit_bitmap)) - blk_addr += sit_i->sit_blocks; - - return get_meta_page(sbi, blk_addr); + return get_meta_page(sbi, current_sit_addr(sbi, segno)); } static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, -- cgit v0.10.2 From 9234f3190bf8b25b11b105191d408ac50a107948 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 22 Oct 2014 15:21:47 +0200 Subject: f2fs: fix possible data corruption in f2fs_write_begin() f2fs_write_begin() doesn't initialize the 'dn' variable if the inode has inline data. However it uses its contents to decide whether it should just zero out the page or load data to it. Thus if we are unlucky we can zero out page contents instead of loading inline data into a page. CC: stable@vger.kernel.org CC: Changman Lee Signed-off-by: Jan Kara Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 973fd77..e3788bd 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1017,21 +1017,19 @@ inline_data: goto out; } - if (dn.data_blkaddr == NEW_ADDR) { + if (f2fs_has_inline_data(inode)) { + err = f2fs_read_inline_data(inode, page); + if (err) { + page_cache_release(page); + goto fail; + } + } else if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { - if (f2fs_has_inline_data(inode)) { - err = f2fs_read_inline_data(inode, page); - if (err) { - page_cache_release(page); - goto fail; - } - } else { - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, - READ_SYNC); - if (err) - goto fail; - } + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, + READ_SYNC); + if (err) + goto fail; lock_page(page); if (unlikely(!PageUptodate(page))) { -- cgit v0.10.2 From ca4b02eeed752c02da0df9da6f9d3c978f9fa9e1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Oct 2014 11:04:35 -0700 Subject: f2fs: call write_checkpoint under disabled gc During the write_checkpoint, we should avoid f2fs_gc trigger to avoid any filesystem consistency. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 21ccc48..d391a59 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1046,7 +1046,9 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.trim_minlen = range->minlen >> sbi->log_blocksize; /* do checkpoint to issue discard commands safely */ + mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); out: range->len = cpc.trimmed << sbi->log_blocksize; return 0; -- cgit v0.10.2 From 427a45c8e2395e0d24cb7fecc2ebf6e5e84e59fd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 26 Oct 2014 22:59:27 -0700 Subject: f2fs: flush_dcache_page for inline data When reading inline data, we should call flush_dcache_page. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index d9daf76..d6677d6 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -57,6 +57,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) src_addr = inline_data_addr(ipage); dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + flush_dcache_page(page); kunmap_atomic(dst_addr); f2fs_put_page(ipage, 1); out: -- cgit v0.10.2 From e3fb1b794b5f0c9f9b65707ce4e6be264a9c60f8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 27 Oct 2014 13:54:27 -0700 Subject: f2fs: do not discard data protected by the previous checkpoint We should not discard any data protected by the previous checkpoint all the time. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d391a59..06dda73 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -517,7 +517,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ for (i = 0; i < entries; i++) - dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; + dmap[i] = ~(cur_map[i] | ckpt_map[i]); while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { start = __find_rev_next_bit(dmap, max_blocks, end + 1); -- cgit v0.10.2 From 1f7732fe6cc0c37befc74cef1d289cd2272b7a5c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Oct 2014 14:40:20 +0200 Subject: f2fs: remove pointless bit testing in f2fs_delete_entry() There's no point in using test_and_clear_bit_le() when we don't use the return value of the function. Just use clear_bit_le() instead. Coverity-id: 1016434 Signed-off-by: Jan Kara Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 4e62bde..5a49995 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -654,7 +654,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, dentry_blk = page_address(page); bit_pos = dentry - dentry_blk->dentry; for (i = 0; i < slots; i++) - test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); /* Let's check and deallocate this dentry page */ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, -- cgit v0.10.2 From b3d208f96d6bb21247108a956dead6a028d5cdb2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 23 Oct 2014 19:48:09 -0700 Subject: f2fs: revisit inline_data to avoid data races and potential bugs This patch simplifies the inline_data usage with the following rule. 1. inline_data is set during the file creation. 2. If new data is requested to be written ranges out of inline_data, f2fs converts that inode permanently. 3. There is no cases which converts non-inline_data inode to inline_data. 4. The inline_data flag should be changed under inode page lock. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e3788bd..ceee1a6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -737,14 +737,14 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, static int f2fs_read_data_page(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; - int ret; + int ret = -EAGAIN; trace_f2fs_readpage(page, DATA); /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); - else + if (ret == -EAGAIN) ret = mpage_readpage(page, get_data_block); return ret; @@ -856,10 +856,11 @@ write: else if (has_not_enough_free_secs(sbi, 0)) goto redirty_out; + err = -EAGAIN; f2fs_lock_op(sbi); - if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) - err = f2fs_write_inline_data(inode, page, offset); - else + if (f2fs_has_inline_data(inode)) + err = f2fs_write_inline_data(inode, page); + if (err == -EAGAIN) err = do_write_data_page(page, &fio); f2fs_unlock_op(sbi); done: @@ -957,24 +958,14 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, f2fs_balance_fs(sbi); repeat: - err = f2fs_convert_inline_data(inode, pos + len, NULL); - if (err) - goto fail; - page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { err = -ENOMEM; goto fail; } - /* to avoid latency during memory pressure */ - unlock_page(page); - *pagep = page; - if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) - goto inline_data; - f2fs_lock_op(sbi); /* check inline_data */ @@ -982,32 +973,42 @@ repeat: if (IS_ERR(ipage)) goto unlock_fail; + set_new_dnode(&dn, inode, ipage, ipage, 0); + if (f2fs_has_inline_data(inode)) { - f2fs_put_page(ipage, 1); - f2fs_unlock_op(sbi); - f2fs_put_page(page, 0); - goto repeat; + if (pos + len <= MAX_INLINE_DATA) { + read_inline_data(page, ipage); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); + goto put_next; + } else if (page->index == 0) { + err = f2fs_convert_inline_page(&dn, page); + if (err) + goto unlock_fail; + } else { + struct page *p = grab_cache_page(inode->i_mapping, 0); + if (!p) { + err = -ENOMEM; + goto unlock_fail; + } + err = f2fs_convert_inline_page(&dn, p); + f2fs_put_page(p, 1); + if (err) + goto unlock_fail; + } } - - set_new_dnode(&dn, inode, ipage, NULL, 0); err = f2fs_reserve_block(&dn, index); if (err) goto unlock_fail; +put_next: f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); -inline_data: - lock_page(page); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); - goto repeat; - } - - f2fs_wait_on_page_writeback(page, DATA); - if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; + f2fs_wait_on_page_writeback(page, DATA); + if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { unsigned start = pos & (PAGE_CACHE_SIZE - 1); unsigned end = start + len; @@ -1017,13 +1018,7 @@ inline_data: goto out; } - if (f2fs_has_inline_data(inode)) { - err = f2fs_read_inline_data(inode, page); - if (err) { - page_cache_release(page); - goto fail; - } - } else if (dn.data_blkaddr == NEW_ADDR) { + if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, @@ -1049,7 +1044,7 @@ out: unlock_fail: f2fs_unlock_op(sbi); - f2fs_put_page(page, 0); + f2fs_put_page(page, 1); fail: f2fs_write_failed(mapping, pos + len); return err; @@ -1102,9 +1097,12 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); int err; - /* Let buffer I/O handle the inline data case. */ - if (f2fs_has_inline_data(inode)) - return 0; + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } if (check_direct_IO(inode, rw, iter, offset)) return 0; @@ -1170,9 +1168,12 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; - if (f2fs_has_inline_data(inode)) - return 0; - + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } return generic_block_bmap(mapping, block, get_data_block); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2e9d2e3..afe3022 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1101,6 +1101,7 @@ enum { FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_VOLATILE_FILE, /* indicate volatile file */ + FI_DATA_EXIST, /* indicate data exists */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1135,6 +1136,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, set_inode_flag(fi, FI_INLINE_DATA); if (ri->i_inline & F2FS_INLINE_DENTRY) set_inode_flag(fi, FI_INLINE_DENTRY); + if (ri->i_inline & F2FS_DATA_EXIST) + set_inode_flag(fi, FI_DATA_EXIST); } static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -1148,6 +1151,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, ri->i_inline |= F2FS_INLINE_DATA; if (is_inode_flag_set(fi, FI_INLINE_DENTRY)) ri->i_inline |= F2FS_INLINE_DENTRY; + if (is_inode_flag_set(fi, FI_DATA_EXIST)) + ri->i_inline |= F2FS_DATA_EXIST; } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -1182,6 +1187,17 @@ static inline int f2fs_has_inline_data(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); } +static inline void f2fs_clear_inline_inode(struct inode *inode) +{ + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST); +} + +static inline int f2fs_exist_data(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); +} + static inline bool f2fs_is_atomic_file(struct inode *inode) { return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); @@ -1590,10 +1606,12 @@ extern const struct inode_operations f2fs_special_inode_operations; * inline.c */ bool f2fs_may_inline(struct inode *); +void read_inline_data(struct page *, struct page *); int f2fs_read_inline_data(struct inode *, struct page *); -int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *); -int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); -void truncate_inline_data(struct inode *, u64); +int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); +int f2fs_convert_inline_inode(struct inode *); +int f2fs_write_inline_data(struct inode *, struct page *); +void truncate_inline_data(struct page *, u64); bool recover_inline_data(struct inode *, struct page *); struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, struct page **); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 402e381..832bd91 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -35,35 +35,17 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, struct inode *inode = file_inode(vma->vm_file); struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct dnode_of_data dn; - struct page *ipage; int err; f2fs_balance_fs(sbi); sb_start_pagefault(inode->i_sb); -retry: - /* force to convert with normal data indices */ - err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page); - if (err) - goto out; + + f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); /* block allocation */ f2fs_lock_op(sbi); - - /* check inline_data */ - ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - f2fs_unlock_op(sbi); - goto out; - } - - if (f2fs_has_inline_data(inode)) { - f2fs_put_page(ipage, 1); - f2fs_unlock_op(sbi); - goto retry; - } - - set_new_dnode(&dn, inode, ipage, NULL, 0); + set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_reserve_block(&dn, page->index); if (err) { f2fs_unlock_op(sbi); @@ -392,6 +374,15 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { + struct inode *inode = file_inode(file); + + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } + file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; return 0; @@ -433,20 +424,17 @@ void truncate_data_blocks(struct dnode_of_data *dn) truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); } -static void truncate_partial_data_page(struct inode *inode, u64 from) +static int truncate_partial_data_page(struct inode *inode, u64 from) { unsigned offset = from & (PAGE_CACHE_SIZE - 1); struct page *page; - if (f2fs_has_inline_data(inode)) - return truncate_inline_data(inode, from); - if (!offset) - return; + return 0; page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); if (IS_ERR(page)) - return; + return 0; lock_page(page); if (unlikely(!PageUptodate(page) || @@ -456,9 +444,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) f2fs_wait_on_page_writeback(page, DATA); zero_user(page, offset, PAGE_CACHE_SIZE - offset); set_page_dirty(page); - out: f2fs_put_page(page, 1); + return 0; } int truncate_blocks(struct inode *inode, u64 from, bool lock) @@ -468,33 +456,35 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; + struct page *ipage; trace_f2fs_truncate_blocks_enter(inode, from); - if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) - goto done; - free_from = (pgoff_t) - ((from + blocksize - 1) >> (sbi->log_blocksize)); + ((from + blocksize - 1) >> (sbi->log_blocksize)); if (lock) f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; + } + + if (f2fs_has_inline_data(inode)) { + truncate_inline_data(ipage, from); + update_inode(inode, ipage); + f2fs_put_page(ipage, 1); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; - if (lock) - f2fs_unlock_op(sbi); - trace_f2fs_truncate_blocks_exit(inode, err); - return err; - } - - /* writepage can convert inline_data under get_donde_of_data */ - if (f2fs_has_inline_data(inode)) { - f2fs_put_dnode(&dn); - goto unlock_done; + goto out; } count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); @@ -510,12 +500,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); -unlock_done: + + /* lastly zero out the first data page */ + if (!err) + err = truncate_partial_data_page(inode, from); +out: if (lock) f2fs_unlock_op(sbi); -done: - /* lastly zero out the first data page */ - truncate_partial_data_page(inode, from); trace_f2fs_truncate_blocks_exit(inode, err); return err; @@ -586,10 +577,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (attr->ia_valid & ATTR_SIZE) { - err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); - if (err) - return err; - if (attr->ia_size != i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); f2fs_truncate(inode); @@ -690,9 +677,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) if (offset >= inode->i_size) return ret; - ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); - if (ret) - return ret; + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -746,9 +735,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; - ret = f2fs_convert_inline_data(inode, offset + len, NULL); - if (ret) - return ret; + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -899,7 +890,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); - return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); + return f2fs_convert_inline_inode(inode); } static int f2fs_ioc_commit_atomic_write(struct file *filp) @@ -933,7 +924,8 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) return -EACCES; set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - return 0; + + return f2fs_convert_inline_inode(inode); } static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index d6677d6..8b66109 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -15,41 +15,26 @@ bool f2fs_may_inline(struct inode *inode) { - block_t nr_blocks; - loff_t i_size; - if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) return false; if (f2fs_is_atomic_file(inode)) return false; - nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; - if (inode->i_blocks > nr_blocks) - return false; - - i_size = i_size_read(inode); - if (i_size > MAX_INLINE_DATA) + if (!S_ISREG(inode->i_mode)) return false; return true; } -int f2fs_read_inline_data(struct inode *inode, struct page *page) +void read_inline_data(struct page *page, struct page *ipage) { - struct page *ipage; void *src_addr, *dst_addr; - if (page->index) { - zero_user_segment(page, 0, PAGE_CACHE_SIZE); - goto out; - } + if (PageUptodate(page)) + return; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) { - unlock_page(page); - return PTR_ERR(ipage); - } + f2fs_bug_on(F2FS_P_SB(page), page->index); zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); @@ -59,104 +44,120 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) memcpy(dst_addr, src_addr, MAX_INLINE_DATA); flush_dcache_page(page); kunmap_atomic(dst_addr); - f2fs_put_page(ipage, 1); -out: SetPageUptodate(page); - unlock_page(page); +} +int f2fs_read_inline_data(struct inode *inode, struct page *page) +{ + struct page *ipage; + + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ipage)) { + unlock_page(page); + return PTR_ERR(ipage); + } + + if (!f2fs_has_inline_data(inode)) { + f2fs_put_page(ipage, 1); + return -EAGAIN; + } + + if (page->index) + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + else + read_inline_data(page, ipage); + + SetPageUptodate(page); + f2fs_put_page(ipage, 1); + unlock_page(page); return 0; } -static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) +int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { - int err = 0; - struct page *ipage; - struct dnode_of_data dn; void *src_addr, *dst_addr; block_t new_blk_addr; - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_io_info fio = { .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, }; + int err; - f2fs_lock_op(sbi); - ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); - goto out; - } + f2fs_bug_on(F2FS_I_SB(dn->inode), page->index); - /* someone else converted inline_data already */ - if (!f2fs_has_inline_data(inode)) - goto out; + if (!f2fs_exist_data(dn->inode)) + goto clear_out; - /* - * i_addr[0] is not used for inline data, - * so reserving new block will not destroy inline data - */ - set_new_dnode(&dn, inode, ipage, NULL, 0); - err = f2fs_reserve_block(&dn, 0); + err = f2fs_reserve_block(dn, 0); if (err) - goto out; + return err; f2fs_wait_on_page_writeback(page, DATA); + + if (PageUptodate(page)) + goto no_update; + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); /* Copy the whole inline data block */ - src_addr = inline_data_addr(ipage); + src_addr = inline_data_addr(dn->inode_page); dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); kunmap_atomic(dst_addr); SetPageUptodate(page); - +no_update: /* write data page to try to make data consistent */ set_page_writeback(page); - write_data_page(page, &dn, &new_blk_addr, &fio); - update_extent_cache(new_blk_addr, &dn); + + write_data_page(page, dn, &new_blk_addr, &fio); + update_extent_cache(new_blk_addr, dn); f2fs_wait_on_page_writeback(page, DATA); /* clear inline data and flag after data writeback */ - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_dec_inline_inode(inode); - - sync_inode_page(&dn); - f2fs_put_dnode(&dn); -out: - f2fs_unlock_op(sbi); - return err; + truncate_inline_data(dn->inode_page, 0); +clear_out: + f2fs_clear_inline_inode(dn->inode); + stat_dec_inline_inode(dn->inode); + sync_inode_page(dn); + f2fs_put_dnode(dn); + return 0; } -int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size, - struct page *page) +int f2fs_convert_inline_inode(struct inode *inode) { - struct page *new_page = page; - int err; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + struct page *ipage, *page; + int err = 0; - if (!f2fs_has_inline_data(inode)) - return 0; - else if (to_size <= MAX_INLINE_DATA) - return 0; + page = grab_cache_page(inode->i_mapping, 0); + if (!page) + return -ENOMEM; - if (!page || page->index != 0) { - new_page = grab_cache_page(inode->i_mapping, 0); - if (!new_page) - return -ENOMEM; + f2fs_lock_op(sbi); + + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + f2fs_unlock_op(sbi); + return PTR_ERR(ipage); } - err = __f2fs_convert_inline_data(inode, new_page); - if (!page || page->index != 0) - f2fs_put_page(new_page, 1); + set_new_dnode(&dn, inode, ipage, ipage, 0); + + if (f2fs_has_inline_data(inode)) + err = f2fs_convert_inline_page(&dn, page); + + f2fs_put_dnode(&dn); + + f2fs_unlock_op(sbi); + + f2fs_put_page(page, 1); return err; } -int f2fs_write_inline_data(struct inode *inode, - struct page *page, unsigned size) +int f2fs_write_inline_data(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; - struct page *ipage; struct dnode_of_data dn; int err; @@ -164,48 +165,39 @@ int f2fs_write_inline_data(struct inode *inode, err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); if (err) return err; - ipage = dn.inode_page; - /* Release any data block if it is allocated */ if (!f2fs_has_inline_data(inode)) { - int count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); - truncate_data_blocks_range(&dn, count); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_inc_inline_inode(inode); + f2fs_put_dnode(&dn); + return -EAGAIN; } - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); + f2fs_bug_on(F2FS_I_SB(inode), page->index); + + f2fs_wait_on_page_writeback(dn.inode_page, NODE); src_addr = kmap_atomic(page); - dst_addr = inline_data_addr(ipage); - memcpy(dst_addr, src_addr, size); + dst_addr = inline_data_addr(dn.inode_page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); kunmap_atomic(src_addr); set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); f2fs_put_dnode(&dn); - return 0; } -void truncate_inline_data(struct inode *inode, u64 from) +void truncate_inline_data(struct page *ipage, u64 from) { - struct page *ipage; + void *addr; if (from >= MAX_INLINE_DATA) return; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return; - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET + from, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - set_page_dirty(ipage); - f2fs_put_page(ipage, 1); + addr = inline_data_addr(ipage); + memset(addr + from, 0, MAX_INLINE_DATA - from); } bool recover_inline_data(struct inode *inode, struct page *npage) @@ -237,6 +229,10 @@ process_inline: src_addr = inline_data_addr(npage); dst_addr = inline_data_addr(ipage); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + update_inode(inode, ipage); f2fs_put_page(ipage, 1); return true; @@ -245,15 +241,12 @@ process_inline: if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + truncate_inline_data(ipage, 0); + f2fs_clear_inline_inode(inode); update_inode(inode, ipage); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { truncate_blocks(inode, 0, false); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); goto process_inline; } return false; @@ -366,8 +359,8 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, set_page_dirty(page); /* clear inline dir and flag after data writeback */ - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); + truncate_inline_data(ipage, 0); + stat_dec_inline_dir(dir); clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 4131e3c..9fe110e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -67,12 +67,38 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } +static int __recover_inline_status(struct inode *inode, struct page *ipage) +{ + void *inline_data = inline_data_addr(ipage); + struct f2fs_inode *ri; + void *zbuf; + + zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS); + if (!zbuf) + return -ENOMEM; + + if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) { + kfree(zbuf); + return 0; + } + kfree(zbuf); + + f2fs_wait_on_page_writeback(ipage, NODE); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + + ri = F2FS_INODE(ipage); + set_raw_inline(F2FS_I(inode), ri); + set_page_dirty(ipage); + return 0; +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct page *node_page; struct f2fs_inode *ri; + int err = 0; /* Check if ino is within scope */ if (check_nid_range(sbi, inode->i_ino)) { @@ -114,11 +140,15 @@ static int do_read_inode(struct inode *inode) get_extent_info(&fi->ext, ri->i_ext); get_inline_info(fi, ri); + /* check data exist */ + if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) + err = __recover_inline_status(inode, node_page); + /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); f2fs_put_page(node_page, 1); - return 0; + return err; } struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) @@ -329,6 +359,7 @@ void handle_failed_inode(struct inode *inode) remove_inode_page(inode); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); alloc_nid_failed(sbi, inode->i_ino); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index a004a97..6312dd2 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -55,6 +55,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) goto out; } + if (f2fs_may_inline(inode)) + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode)) set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); @@ -133,6 +135,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); + stat_inc_inline_inode(inode); d_instantiate(dentry, inode); unlock_new_inode(inode); return 0; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 63f8303..cc1064f 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -172,6 +172,7 @@ struct f2fs_extent { #define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ #define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ +#define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) -- cgit v0.10.2 From adf4983bde9c07c074be12eadb040a88479aa421 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 28 Oct 2014 22:27:59 -0700 Subject: f2fs: send discard commands in larger extent If there is a chance to make a huge sized discard command, we don't need to split it out, since each blkdev_issue_discard should wait one at a time. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 06dda73..2fb3d7f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -469,10 +469,33 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) } } -static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) +static void __add_discard_entry(struct f2fs_sb_info *sbi, + struct cp_control *cpc, unsigned int start, unsigned int end) { struct list_head *head = &SM_I(sbi)->discard_list; - struct discard_entry *new; + struct discard_entry *new, *last; + + if (!list_empty(head)) { + last = list_last_entry(head, struct discard_entry, list); + if (START_BLOCK(sbi, cpc->trim_start) + start == + last->blkaddr + last->len) { + last->len += end - start; + goto done; + } + } + + new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); + INIT_LIST_HEAD(&new->list); + new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; + new->len = end - start; + list_add_tail(&new->list, head); +done: + SM_I(sbi)->nr_discards += end - start; + cpc->trimmed += end - start; +} + +static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) +{ int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); int max_blocks = sbi->blocks_per_seg; struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); @@ -501,13 +524,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) } mutex_unlock(&dirty_i->seglist_lock); - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start); - new->len = sbi->blocks_per_seg; - list_add_tail(&new->list, head); - SM_I(sbi)->nr_discards += sbi->blocks_per_seg; - cpc->trimmed += sbi->blocks_per_seg; + __add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg); return; } @@ -529,14 +546,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (end - start < cpc->trim_minlen) continue; - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; - new->len = end - start; - cpc->trimmed += end - start; - - list_add_tail(&new->list, head); - SM_I(sbi)->nr_discards += end - start; + __add_discard_entry(sbi, cpc, start, end); } } -- cgit v0.10.2 From 6a8f8ca582a1bafe6b620e000316206c8719f1d0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 29 Oct 2014 14:37:22 -0700 Subject: f2fs: avoid race condition in handling wait_io __submit_merged_bio f2fs_write_end_io f2fs_write_end_io wait_io = X wait_io = x complete(X) complete(X) wait_io = NULL wait_for_completion() free(X) spin_lock(X) kernel panic In order to avoid this, this patch removes the wait_io facility. Instead, we can use wait_on_all_pages_writeback(sbi) to wait for end_ios. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index ca514d5..dd6a357 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -978,6 +978,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Here, we only have one bio having CP pack */ sync_meta_pages(sbi, META_FLUSH, LONG_MAX); + /* wait for previous submitted meta pages writeback */ + wait_on_all_pages_writeback(sbi); + release_dirty_inode(sbi); if (unlikely(f2fs_cp_error(sbi))) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ceee1a6..8f16443 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -61,11 +61,6 @@ static void f2fs_write_end_io(struct bio *bio, int err) dec_page_count(sbi, F2FS_WRITEBACK); } - if (sbi->wait_io) { - complete(sbi->wait_io); - sbi->wait_io = NULL; - } - if (!get_pages(sbi, F2FS_WRITEBACK) && !list_empty(&sbi->cp_wait.task_list)) wake_up(&sbi->cp_wait); @@ -95,34 +90,18 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; - int rw; if (!io->bio) return; - rw = fio->rw; - - if (is_read_io(rw)) { - trace_f2fs_submit_read_bio(io->sbi->sb, rw, - fio->type, io->bio); - submit_bio(rw, io->bio); - } else { - trace_f2fs_submit_write_bio(io->sbi->sb, rw, - fio->type, io->bio); - /* - * META_FLUSH is only from the checkpoint procedure, and we - * should wait this metadata bio for FS consistency. - */ - if (fio->type == META_FLUSH) { - DECLARE_COMPLETION_ONSTACK(wait); - io->sbi->wait_io = &wait; - submit_bio(rw, io->bio); - wait_for_completion(&wait); - } else { - submit_bio(rw, io->bio); - } - } + if (is_read_io(fio->rw)) + trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw, + fio->type, io->bio); + else + trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw, + fio->type, io->bio); + submit_bio(fio->rw, io->bio); io->bio = NULL; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index afe3022..5a97058 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -516,7 +516,6 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info read_io; /* for read bios */ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ - struct completion *wait_io; /* for completion bios */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ -- cgit v0.10.2 From be138b7b0d4cbfb8a927d9bc333ceffee9908c23 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Oct 2014 19:01:10 -0700 Subject: f2fs: remove unnecessary macro Let's remove unused macro. Signed-off-by: Jaegeuk Kim diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index cc1064f..87f14e9 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -177,10 +177,6 @@ struct f2fs_extent { #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) -#define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) -\ - sizeof(__le32) * (DEF_ADDRS_PER_INODE + \ - DEF_NIDS_PER_INODE - 1)) - struct f2fs_inode { __le16 i_mode; /* file mode */ __u8 i_advise; /* file hints */ -- cgit v0.10.2 From d5053a34a9cc797b9d5d77574354b5555848c43c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Oct 2014 22:47:03 -0700 Subject: f2fs: introduce -o fastboot for reducing booting time only If a system wants to reduce the booting time as a top priority, now we can use a mount option, -o fastboot. With this option, f2fs conducts a little bit slow write_checkpoint, but it can avoid the node page reads during the next mount time. Signed-off-by: Jaegeuk Kim diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 4bb9f27..e0950c4 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -135,6 +135,9 @@ nobarrier This option can be used if underlying storage guarantees If this option is set, no cache_flush commands are issued but f2fs still guarantees the write ordering of all the data writes. +fastboot This option is used when a system wants to reduce mount + time as much as possible, even though normal performance + can be sacrificed. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5a97058..d45f3f4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -49,6 +49,7 @@ #define F2FS_MOUNT_INLINE_DENTRY 0x00000200 #define F2FS_MOUNT_FLUSH_MERGE 0x00000400 #define F2FS_MOUNT_NOBARRIER 0x00000800 +#define F2FS_MOUNT_FASTBOOT 0x00001000 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 832bd91..46311e7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -130,6 +130,8 @@ static inline bool need_do_checkpoint(struct inode *inode) need_cp = true; else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) need_cp = true; + else if (test_opt(sbi, FASTBOOT)) + need_cp = true; return need_cp; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 7151d7d..b197a2f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -695,9 +695,9 @@ int f2fs_gc(struct f2fs_sb_info *sbi) int gc_type = BG_GC; int nfree = 0; int ret = -1; - struct cp_control cpc = { - .reason = CP_SYNC, - }; + struct cp_control cpc; + + cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; INIT_LIST_HEAD(&ilist); gc_more: diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 6c5fc76..512ffd8 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -54,6 +54,7 @@ enum { Opt_inline_dentry, Opt_flush_merge, Opt_nobarrier, + Opt_fastboot, Opt_err, }; @@ -73,6 +74,7 @@ static match_table_t f2fs_tokens = { {Opt_inline_dentry, "inline_dentry"}, {Opt_flush_merge, "flush_merge"}, {Opt_nobarrier, "nobarrier"}, + {Opt_fastboot, "fastboot"}, {Opt_err, NULL}, }; @@ -351,6 +353,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_nobarrier: set_opt(sbi, NOBARRIER); break; + case Opt_fastboot: + set_opt(sbi, FASTBOOT); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -479,9 +484,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) trace_f2fs_sync_fs(sb, sync); if (sync) { - struct cp_control cpc = { - .reason = CP_SYNC, - }; + struct cp_control cpc; + + cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); @@ -574,6 +579,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",flush_merge"); if (test_opt(sbi, NOBARRIER)) seq_puts(seq, ",nobarrier"); + if (test_opt(sbi, FASTBOOT)) + seq_puts(seq, ",fastboot"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; -- cgit v0.10.2 From a344b9fda0cc2eda54433227837029d410dfb12f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 5 Nov 2014 20:05:53 -0800 Subject: f2fs: disable roll-forward when active_logs = 2 The roll-forward mechanism should be activated when the number of active logs is not 2. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 46311e7..54722a0 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -132,6 +132,8 @@ static inline bool need_do_checkpoint(struct inode *inode) need_cp = true; else if (test_opt(sbi, FASTBOOT)) need_cp = true; + else if (sbi->active_logs == 2) + need_cp = true; return need_cp; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2fb3d7f..16721b5d 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1090,8 +1090,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type) else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(page) && !is_cold_node(page)) - return CURSEG_HOT_NODE; + if (IS_DNODE(page) && is_cold_node(page)) + return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; } -- cgit v0.10.2 From 8c402946f0743af91d22eca31d98b058b3371054 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Nov 2014 15:16:04 -0800 Subject: f2fs: introduce the number of inode entries This patch adds to monitor the number of ino entries. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index dd6a357..bcd686e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -318,6 +318,8 @@ retry: e->ino = ino; list_add_tail(&e->list, &sbi->ino_list[type]); + if (type != ORPHAN_INO) + sbi->ino_num[type]++; } spin_unlock(&sbi->ino_lock[type]); } @@ -331,8 +333,7 @@ static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) if (e) { list_del(&e->list); radix_tree_delete(&sbi->ino_root[type], ino); - if (type == ORPHAN_INO) - sbi->n_orphans--; + sbi->ino_num[type]--; spin_unlock(&sbi->ino_lock[type]); kmem_cache_free(ino_entry_slab, e); return; @@ -373,6 +374,7 @@ void release_dirty_inode(struct f2fs_sb_info *sbi) list_del(&e->list); radix_tree_delete(&sbi->ino_root[i], e->ino); kmem_cache_free(ino_entry_slab, e); + sbi->ino_num[i]--; } spin_unlock(&sbi->ino_lock[i]); } @@ -383,10 +385,10 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) int err = 0; spin_lock(&sbi->ino_lock[ORPHAN_INO]); - if (unlikely(sbi->n_orphans >= sbi->max_orphans)) + if (unlikely(sbi->ino_num[ORPHAN_INO] >= sbi->max_orphans)) err = -ENOSPC; else - sbi->n_orphans++; + sbi->ino_num[ORPHAN_INO]++; spin_unlock(&sbi->ino_lock[ORPHAN_INO]); return err; @@ -395,8 +397,8 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) void release_orphan_inode(struct f2fs_sb_info *sbi) { spin_lock(&sbi->ino_lock[ORPHAN_INO]); - f2fs_bug_on(sbi, sbi->n_orphans == 0); - sbi->n_orphans--; + f2fs_bug_on(sbi, sbi->ino_num[ORPHAN_INO] == 0); + sbi->ino_num[ORPHAN_INO]--; spin_unlock(&sbi->ino_lock[ORPHAN_INO]); } @@ -460,11 +462,12 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) struct f2fs_orphan_block *orphan_blk = NULL; unsigned int nentries = 0; unsigned short index; - unsigned short orphan_blocks = - (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans); + unsigned short orphan_blocks; struct page *page = NULL; struct ino_entry *orphan = NULL; + orphan_blocks = GET_ORPHAN_BLOCKS(sbi->ino_num[ORPHAN_INO]); + for (index = 0; index < orphan_blocks; index++) grab_meta_page(sbi, start_blk + index); @@ -892,7 +895,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) else clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans); + orphan_blocks = GET_ORPHAN_BLOCKS(sbi->ino_num[ORPHAN_INO]); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); @@ -908,7 +911,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) orphan_blocks); } - if (sbi->n_orphans) + if (sbi->ino_num[ORPHAN_INO]) set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); @@ -943,7 +946,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_put_page(cp_page, 1); } - if (sbi->n_orphans) { + if (sbi->ino_num[ORPHAN_INO]) { write_orphan_inodes(sbi, start_blk); start_blk += orphan_blocks; } @@ -1045,6 +1048,7 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC); spin_lock_init(&sbi->ino_lock[i]); INIT_LIST_HEAD(&sbi->ino_list[i]); + sbi->ino_num[i] = 0; } /* @@ -1053,7 +1057,6 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) * orphan entries with the limitation one reserved segment * for cp pack we can have max 1020*504 orphan entries */ - sbi->n_orphans = 0; sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 86e6e92..74a0d78 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -119,6 +119,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); unsigned npages; + int i; if (si->base_mem) goto get_cache; @@ -168,8 +169,9 @@ get_cache: si->cache_mem += npages << PAGE_CACHE_SHIFT; npages = META_MAPPING(sbi)->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; - si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry); si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); + for (i = 0; i <= UPDATE_INO; i++) + si->cache_mem += sbi->ino_num[i] * sizeof(struct ino_entry); } static int stat_show(struct seq_file *s, void *v) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d45f3f4..994b87e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -532,9 +532,9 @@ struct f2fs_sb_info { struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */ spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */ struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */ + unsigned long ino_num[MAX_INO_ENTRY]; /* number of entries */ /* for orphan inode, use 0'th array */ - unsigned int n_orphans; /* # of orphan inodes */ unsigned int max_orphans; /* max orphan inodes */ /* for directory inode management */ -- cgit v0.10.2 From e5e7ea3c86e56b725e4076e8dc583378abad7697 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Nov 2014 15:24:46 -0800 Subject: f2fs: control the memory footprint used by ino entries This patch adds to control the memory footprint used by ino entries. This will conduct best effort, not strictly. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 44b8afe..4ea2c47 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -31,22 +31,38 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct sysinfo val; + unsigned long avail_ram; unsigned long mem_size = 0; bool res = false; si_meminfo(&val); - /* give 25%, 25%, 50% memory for each components respectively */ + + /* only uses low memory */ + avail_ram = val.totalram - val.totalhigh; + + /* give 25%, 25%, 50%, 50% memory for each components respectively */ if (type == FREE_NIDS) { - mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12; - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> + PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { - mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12; - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> + PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == DIRTY_DENTS) { if (sbi->sb->s_bdi->dirty_exceeded) return false; mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1); + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else if (type == INO_ENTRIES) { + int i; + + if (sbi->sb->s_bdi->dirty_exceeded) + return false; + for (i = 0; i <= UPDATE_INO; i++) + mem_size += (sbi->ino_num[i] * sizeof(struct ino_entry)) + >> PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } return res; } diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index acb71e5..d10b644 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -106,7 +106,8 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ - DIRTY_DENTS /* indicates dirty dentry pages */ + DIRTY_DENTS, /* indicates dirty dentry pages */ + INO_ENTRIES, /* indicates inode entries */ }; struct nat_entry_set { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 16721b5d..e094675 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -276,7 +276,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) { /* check the # of cached NAT entries and prefree segments */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || - excess_prefree_segs(sbi)) + excess_prefree_segs(sbi) || + available_free_memory(sbi, INO_ENTRIES)) f2fs_sync_fs(sbi->sb, true); } -- cgit v0.10.2 From 2f97c326bf05ca2feeb95688da50c68671638b69 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Nov 2014 17:21:24 -0800 Subject: f2fs: write node pages if checkpoint is not doing It needs to write node pages if checkpoint is not doing in order to avoid memory pressure. Reviewed-by: Changman Lee Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4ea2c47..6f514fb 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1314,10 +1314,12 @@ static int f2fs_write_node_page(struct page *page, return 0; } - if (wbc->for_reclaim) - goto redirty_out; - - down_read(&sbi->node_write); + if (wbc->for_reclaim) { + if (!down_read_trylock(&sbi->node_write)) + goto redirty_out; + } else { + down_read(&sbi->node_write); + } set_page_writeback(page); write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); -- cgit v0.10.2 From 510184c89f21f40634e1585790714be060cd49c5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 6 Nov 2014 17:23:08 -0800 Subject: f2fs: do not skip any writes under memory pressure Under memory pressure, let's avoid skipping data writes. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 6723ccc..7f327c0 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -711,6 +711,9 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) */ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) { + if (sbi->sb->s_bdi->dirty_exceeded) + return 0; + if (type == DATA) return sbi->blocks_per_seg; else if (type == NODE) -- cgit v0.10.2 From b7e1d800031c827a80c9a9d03cf15e4dd5405a44 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sun, 9 Nov 2014 22:15:31 -0800 Subject: f2fs: implement -o dirsync If a mount option has dirsync, we should call checkpoint for all the directory operations. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6312dd2..2bee559 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -138,6 +138,9 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, stat_inc_inline_inode(inode); d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: handle_failed_inode(inode); @@ -164,6 +167,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, f2fs_unlock_op(sbi); d_instantiate(dentry, inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); @@ -235,6 +241,9 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); fail: trace_f2fs_unlink_exit(inode, err); return err; @@ -268,6 +277,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return err; out: handle_failed_inode(inode); @@ -304,6 +316,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) d_instantiate(dentry, inode); unlock_new_inode(inode); + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out_fail: @@ -346,8 +360,12 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, f2fs_unlock_op(sbi); alloc_nid_done(sbi, inode->i_ino); + d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: handle_failed_inode(inode); @@ -461,6 +479,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_unlock_op(sbi); + + if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; put_out_dir: @@ -600,6 +621,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, update_inode_page(new_dir); f2fs_unlock_op(sbi); + + if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out_undo: /* Still we may fail to recover name info of f2fs_inode here */ -- cgit v0.10.2 From 57e2a2c0a698406e6206ab707bd7c3dcf248c738 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 10 Nov 2014 16:29:14 -0800 Subject: f2fs: reduce the number of inline_data inode before clearing it The # of inline_data inode is decreased only when it has inline_data. After clearing the flag, we can't decreased the number. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 8b66109..2310670 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -116,8 +116,8 @@ no_update: /* clear inline data and flag after data writeback */ truncate_inline_data(dn->inode_page, 0); clear_out: - f2fs_clear_inline_inode(dn->inode); stat_dec_inline_inode(dn->inode); + f2fs_clear_inline_inode(dn->inode); sync_inode_page(dn); f2fs_put_dnode(dn); return 0; -- cgit v0.10.2 From 764d2c80401fcc7ee15933d604c6a783d5addc40 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 11 Nov 2014 11:01:01 -0800 Subject: f2fs: fix deadlock to grab 0'th data page The scenario is like this. One trhead triggers: f2fs_write_data_pages lock_page f2fs_write_data_page f2fs_lock_op <- wait The other thread triggers: f2fs_truncate truncate_blocks f2fs_lock_op truncate_partial_data_page lock_page <- wait for locking the page This patch resolves this bug by relocating truncate_partial_data_page. This function is just to truncate user data page and not related to FS consistency as well. And, we don't need to call truncate_inline_data. Rather than that, f2fs_write_data_page will finally update inline_data later. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 54722a0..edc3ce8 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -477,8 +477,6 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) } if (f2fs_has_inline_data(inode)) { - truncate_inline_data(ipage, from); - update_inode(inode, ipage); f2fs_put_page(ipage, 1); goto out; } @@ -504,13 +502,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); +out: + if (lock) + f2fs_unlock_op(sbi); /* lastly zero out the first data page */ if (!err) err = truncate_partial_data_page(inode, from); -out: - if (lock) - f2fs_unlock_op(sbi); trace_f2fs_truncate_blocks_exit(inode, err); return err; -- cgit v0.10.2 From 92dffd01790a5219d234fc83c3ba854f4490b7f4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 11 Nov 2014 14:10:01 -0800 Subject: f2fs: convert inline_data when i_size becomes large If i_size becomes large outside of MAX_INLINE_DATA, we shoud convert the inode. Otherwise, we can make some dirty pages during the truncation, and those pages will be written through f2fs_write_data_page. At that moment, the inode has still inline_data, so that it tries to write non- zero pages into inline_data area. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index edc3ce8..7c2ec3e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -522,6 +522,12 @@ void f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); + /* we should check inline_data size */ + if (f2fs_has_inline_data(inode) && !f2fs_may_inline(inode)) { + if (f2fs_convert_inline_inode(inode)) + return; + } + if (!truncate_blocks(inode, i_size_read(inode), true)) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 2310670..053d114 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -24,6 +24,9 @@ bool f2fs_may_inline(struct inode *inode) if (!S_ISREG(inode->i_mode)) return false; + if (i_size_read(inode) > MAX_INLINE_DATA) + return false; + return true; } -- cgit v0.10.2 From 6d20aff83c24e9a7f15fffe4be36ed33d24465da Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 17 Nov 2014 16:06:55 -0800 Subject: f2fs: fix to call put_page at the error handling routine The locked page should be released before returning the function. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 053d114..f26fb87 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -141,8 +141,8 @@ int f2fs_convert_inline_inode(struct inode *inode) ipage = get_node_page(sbi, inode->i_ino); if (IS_ERR(ipage)) { - f2fs_unlock_op(sbi); - return PTR_ERR(ipage); + err = PTR_ERR(ipage); + goto out; } set_new_dnode(&dn, inode, ipage, ipage, 0); @@ -151,7 +151,7 @@ int f2fs_convert_inline_inode(struct inode *inode) err = f2fs_convert_inline_page(&dn, page); f2fs_put_dnode(&dn); - +out: f2fs_unlock_op(sbi); f2fs_put_page(page, 1); -- cgit v0.10.2 From 8cdcb71322ec21aaee90117b0c01d576851a8faa Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 17 Nov 2014 16:14:11 -0800 Subject: f2fs: put the inode page when error was occurred We should put the inode page when error was occurred. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8f16443..12dd58a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -963,22 +963,22 @@ repeat: } else if (page->index == 0) { err = f2fs_convert_inline_page(&dn, page); if (err) - goto unlock_fail; + goto put_fail; } else { struct page *p = grab_cache_page(inode->i_mapping, 0); if (!p) { err = -ENOMEM; - goto unlock_fail; + goto put_fail; } err = f2fs_convert_inline_page(&dn, p); f2fs_put_page(p, 1); if (err) - goto unlock_fail; + goto put_fail; } } err = f2fs_reserve_block(&dn, index); if (err) - goto unlock_fail; + goto put_fail; put_next: f2fs_put_dnode(&dn); f2fs_unlock_op(sbi); @@ -1021,6 +1021,8 @@ out: clear_cold_data(page); return 0; +put_fail: + f2fs_put_dnode(&dn); unlock_fail: f2fs_unlock_op(sbi); f2fs_put_page(page, 1); -- cgit v0.10.2 From 6c0299320318c8154a20a3d9e73cbd1fc58d96e1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Nov 2014 11:16:01 +0800 Subject: f2fs: avoid unable to restart gc thread in remount In f2fs_remount, we will stop gc thread and set need_restart_gc as true when new option is set without BG_GC, then if any error occurred in the following procedure, we can restore to start the gc thread. But after that, We will fail to restore gc thread in start_gc_thread as BG_GC is not set in new option, so we'd better move this condition judgment out of start_gc_thread to fix this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index b197a2f..657683c9 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -96,8 +96,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi) dev_t dev = sbi->sb->s_bdev->bd_dev; int err = 0; - if (!test_opt(sbi, BG_GC)) - goto out; gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); if (!gc_th) { err = -ENOMEM; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 512ffd8..536d414 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1138,7 +1138,7 @@ try_onemore: * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (!f2fs_readonly(sb)) { + if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = start_gc_thread(sbi); if (err) -- cgit v0.10.2 From aba291b3d8d83941c7ea39487e279ae793b711b3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Nov 2014 11:17:20 +0800 Subject: f2fs: remove unneeded check code with option in f2fs_remount Because we have checked the contrary condition in case of "if" judgment, we do not need to check the condition again in case of "else" judgment. Let's remove it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 536d414..f71421d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -669,7 +669,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) f2fs_sync_fs(sb, 1); need_restart_gc = true; } - } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { + } else if (!sbi->gc_thread) { err = start_gc_thread(sbi); if (err) goto restore_opts; @@ -682,7 +682,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { destroy_flush_cmd_control(sbi); - } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) { + } else if (!SM_I(sbi)->cmd_control_info) { err = create_flush_cmd_control(sbi); if (err) goto restore_gc; -- cgit v0.10.2 From 67298804f34452a53a9ec9e609d95aa35084132b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Nov 2014 11:18:36 +0800 Subject: f2fs: introduce struct inode_management to wrap inner fields Now in f2fs, we have three inode cache: ORPHAN_INO, APPEND_INO, UPDATE_INO, and we manage fields related to inode cache separately in struct f2fs_sb_info for each inode cache type. This makes codes a bit messy, so that this patch intorduce a new struct inode_management to wrap inner fields as following which make codes more neat. /* for inner inode cache management */ struct inode_management { struct radix_tree_root ino_root; /* ino entry array */ spinlock_t ino_lock; /* for ino entry lock */ struct list_head ino_list; /* inode list head */ unsigned long ino_num; /* number of entries */ }; struct f2fs_sb_info { ... struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ ... } Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index bcd686e..838e8ed 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -298,47 +298,49 @@ const struct address_space_operations f2fs_meta_aops = { static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { + struct inode_management *im = &sbi->im[type]; struct ino_entry *e; retry: - spin_lock(&sbi->ino_lock[type]); + spin_lock(&im->ino_lock); - e = radix_tree_lookup(&sbi->ino_root[type], ino); + e = radix_tree_lookup(&im->ino_root, ino); if (!e) { e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); if (!e) { - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); goto retry; } - if (radix_tree_insert(&sbi->ino_root[type], ino, e)) { - spin_unlock(&sbi->ino_lock[type]); + if (radix_tree_insert(&im->ino_root, ino, e)) { + spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); goto retry; } memset(e, 0, sizeof(struct ino_entry)); e->ino = ino; - list_add_tail(&e->list, &sbi->ino_list[type]); + list_add_tail(&e->list, &im->ino_list); if (type != ORPHAN_INO) - sbi->ino_num[type]++; + im->ino_num++; } - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); } static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { + struct inode_management *im = &sbi->im[type]; struct ino_entry *e; - spin_lock(&sbi->ino_lock[type]); - e = radix_tree_lookup(&sbi->ino_root[type], ino); + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); if (e) { list_del(&e->list); - radix_tree_delete(&sbi->ino_root[type], ino); - sbi->ino_num[type]--; - spin_unlock(&sbi->ino_lock[type]); + radix_tree_delete(&im->ino_root, ino); + im->ino_num--; + spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); return; } - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); } void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) @@ -356,10 +358,12 @@ void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) /* mode should be APPEND_INO or UPDATE_INO */ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) { + struct inode_management *im = &sbi->im[mode]; struct ino_entry *e; - spin_lock(&sbi->ino_lock[mode]); - e = radix_tree_lookup(&sbi->ino_root[mode], ino); - spin_unlock(&sbi->ino_lock[mode]); + + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); + spin_unlock(&im->ino_lock); return e ? true : false; } @@ -369,37 +373,42 @@ void release_dirty_inode(struct f2fs_sb_info *sbi) int i; for (i = APPEND_INO; i <= UPDATE_INO; i++) { - spin_lock(&sbi->ino_lock[i]); - list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) { + struct inode_management *im = &sbi->im[i]; + + spin_lock(&im->ino_lock); + list_for_each_entry_safe(e, tmp, &im->ino_list, list) { list_del(&e->list); - radix_tree_delete(&sbi->ino_root[i], e->ino); + radix_tree_delete(&im->ino_root, e->ino); kmem_cache_free(ino_entry_slab, e); - sbi->ino_num[i]--; + im->ino_num--; } - spin_unlock(&sbi->ino_lock[i]); + spin_unlock(&im->ino_lock); } } int acquire_orphan_inode(struct f2fs_sb_info *sbi) { + struct inode_management *im = &sbi->im[ORPHAN_INO]; int err = 0; - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - if (unlikely(sbi->ino_num[ORPHAN_INO] >= sbi->max_orphans)) + spin_lock(&im->ino_lock); + if (unlikely(im->ino_num >= sbi->max_orphans)) err = -ENOSPC; else - sbi->ino_num[ORPHAN_INO]++; - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + im->ino_num++; + spin_unlock(&im->ino_lock); return err; } void release_orphan_inode(struct f2fs_sb_info *sbi) { - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - f2fs_bug_on(sbi, sbi->ino_num[ORPHAN_INO] == 0); - sbi->ino_num[ORPHAN_INO]--; - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + struct inode_management *im = &sbi->im[ORPHAN_INO]; + + spin_lock(&im->ino_lock); + f2fs_bug_on(sbi, im->ino_num == 0); + im->ino_num--; + spin_unlock(&im->ino_lock); } void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -465,15 +474,16 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) unsigned short orphan_blocks; struct page *page = NULL; struct ino_entry *orphan = NULL; + struct inode_management *im = &sbi->im[ORPHAN_INO]; - orphan_blocks = GET_ORPHAN_BLOCKS(sbi->ino_num[ORPHAN_INO]); + orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); for (index = 0; index < orphan_blocks; index++) grab_meta_page(sbi, start_blk + index); index = 1; - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - head = &sbi->ino_list[ORPHAN_INO]; + spin_lock(&im->ino_lock); + head = &im->ino_list; /* loop for each orphan inode entry and write them in Jornal block */ list_for_each_entry(orphan, head, list) { @@ -513,7 +523,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) f2fs_put_page(page, 1); } - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + spin_unlock(&im->ino_lock); } static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, @@ -836,6 +846,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; nid_t last_nid = nm_i->next_scan_nid; block_t start_blk; struct page *cp_page; @@ -895,7 +906,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) else clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - orphan_blocks = GET_ORPHAN_BLOCKS(sbi->ino_num[ORPHAN_INO]); + orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); @@ -911,7 +922,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) orphan_blocks); } - if (sbi->ino_num[ORPHAN_INO]) + if (orphan_num) set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); @@ -946,7 +957,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_put_page(cp_page, 1); } - if (sbi->ino_num[ORPHAN_INO]) { + if (orphan_num) { write_orphan_inodes(sbi, start_blk); start_blk += orphan_blocks; } @@ -1045,10 +1056,12 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) int i; for (i = 0; i < MAX_INO_ENTRY; i++) { - INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC); - spin_lock_init(&sbi->ino_lock[i]); - INIT_LIST_HEAD(&sbi->ino_list[i]); - sbi->ino_num[i] = 0; + struct inode_management *im = &sbi->im[i]; + + INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC); + spin_lock_init(&im->ino_lock); + INIT_LIST_HEAD(&im->ino_list); + im->ino_num = 0; } /* diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 74a0d78..40b679c 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -171,7 +171,7 @@ get_cache: si->cache_mem += npages << PAGE_CACHE_SHIFT; si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); for (i = 0; i <= UPDATE_INO; i++) - si->cache_mem += sbi->ino_num[i] * sizeof(struct ino_entry); + si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); } static int stat_show(struct seq_file *s, void *v) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 994b87e..418c852 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -499,6 +499,14 @@ struct f2fs_bio_info { struct rw_semaphore io_rwsem; /* blocking op for bio */ }; +/* for inner inode cache management */ +struct inode_management { + struct radix_tree_root ino_root; /* ino entry array */ + spinlock_t ino_lock; /* for ino entry lock */ + struct list_head ino_list; /* inode list head */ + unsigned long ino_num; /* number of entries */ +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -528,11 +536,7 @@ struct f2fs_sb_info { bool por_doing; /* recovery is doing or not */ wait_queue_head_t cp_wait; - /* for inode management */ - struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */ - spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */ - struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */ - unsigned long ino_num[MAX_INO_ENTRY]; /* number of entries */ + struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ /* for orphan inode, use 0'th array */ unsigned int max_orphans; /* max orphan inodes */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6f514fb..478ce1e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -60,8 +60,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) if (sbi->sb->s_bdi->dirty_exceeded) return false; for (i = 0; i <= UPDATE_INO; i++) - mem_size += (sbi->ino_num[i] * sizeof(struct ino_entry)) - >> PAGE_CACHE_SHIFT; + mem_size += (sbi->im[i].ino_num * + sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } return res; -- cgit v0.10.2 From 27c6bd60ac9bf8114f6bb41eda6addc16a6b76a9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Nov 2014 10:54:48 -0800 Subject: f2fs: submit bio for node blocks in the reclaim path If a node page is request to be written during the reclaiming path, we should submit the bio to avoid pending to recliam it. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 478ce1e..dbf49cc 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1326,6 +1326,10 @@ static int f2fs_write_node_page(struct page *page, dec_page_count(sbi, F2FS_DIRTY_NODES); up_read(&sbi->node_write); unlock_page(page); + + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, NODE, WRITE); + return 0; redirty_out: -- cgit v0.10.2 From 857dc4e0598e2a95b1cbb8d17c8538b34cc9a2f3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Nov 2014 11:03:34 -0800 Subject: f2fs: write SSA pages under memory pressure Under memory pressure, we don't need to skip SSA page writes. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 838e8ed..20a917b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -178,7 +178,7 @@ static int f2fs_write_meta_page(struct page *page, if (unlikely(sbi->por_doing)) goto redirty_out; - if (wbc->for_reclaim) + if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; @@ -187,6 +187,9 @@ static int f2fs_write_meta_page(struct page *page, write_meta_page(sbi, page); dec_page_count(sbi, F2FS_DIRTY_META); unlock_page(page); + + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, META, WRITE); return 0; redirty_out: -- cgit v0.10.2 From 09b8b3c83971c82fd1a0ae32625b3be2aa891688 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 18 Nov 2014 10:50:21 -0800 Subject: f2fs: call flush_dcache_page when the page was updated Whenever f2fs updates mapped pages, it needs to call flush_dcache_page. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index f26fb87..914b6d3 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -106,6 +106,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) src_addr = inline_data_addr(dn->inode_page); dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + flush_dcache_page(page); kunmap_atomic(dst_addr); SetPageUptodate(page); no_update: -- cgit v0.10.2 From c9ee00857c9630fba14ebe368d06dc3ac2489b84 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Fri, 21 Nov 2014 15:42:07 +0900 Subject: f2fs: fix wrong data structure when create slab It used nat_entry_set when create slab for sit_entry_set. Signed-off-by: Changman Lee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e094675..9de857f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2231,7 +2231,7 @@ int __init create_segment_manager_caches(void) goto fail; sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", - sizeof(struct nat_entry_set)); + sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) goto destory_discard_entry; -- cgit v0.10.2 From 9486ba442b00a6b227bfe0d66b0f4dbcd1a2ee91 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 21 Nov 2014 16:36:28 -0800 Subject: f2fs: introduce f2fs_dentry_kunmap to clean up This patch introduces f2fs_dentry_kunmap to clean up dirty codes. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 5a49995..b1a7d57 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -271,8 +271,7 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) de = f2fs_find_entry(dir, qstr, &page); if (de) { res = le32_to_cpu(de->ino); - if (!f2fs_has_inline_dentry(dir)) - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 418c852..d042813 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1229,6 +1229,12 @@ static inline void *inline_dentry_addr(struct page *page) return (void *)&(ri->i_addr[1]); } +static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) +{ + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 2bee559..547a2de 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -200,8 +200,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, de = f2fs_find_entry(dir, &dentry->d_name, &page); if (de) { nid_t ino = le32_to_cpu(de->ino); - if (!f2fs_has_inline_dentry(dir)) - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); @@ -231,8 +230,7 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) err = acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - if (!f2fs_has_inline_dentry(dir)) - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); goto fail; } @@ -469,8 +467,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir_page, new_dir); update_inode_page(old_inode); } else { - if (!f2fs_has_inline_dentry(old_inode)) - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } drop_nlink(old_dir); @@ -486,18 +483,15 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, put_out_dir: f2fs_unlock_op(sbi); - if (!f2fs_has_inline_dentry(new_dir)) - kunmap(new_page); + f2fs_dentry_kunmap(new_dir, new_page); f2fs_put_page(new_page, 0); out_dir: if (old_dir_entry) { - if (!f2fs_has_inline_dentry(old_inode)) - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } out_old: - if (!f2fs_has_inline_dentry(old_dir)) - kunmap(old_page); + f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; @@ -632,23 +626,19 @@ out_unlock: f2fs_unlock_op(sbi); out_new_dir: if (new_dir_entry) { - if (!f2fs_has_inline_dentry(new_inode)) - kunmap(new_dir_page); + f2fs_dentry_kunmap(new_inode, new_dir_page); f2fs_put_page(new_dir_page, 0); } out_old_dir: if (old_dir_entry) { - if (!f2fs_has_inline_dentry(old_inode)) - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } out_new: - if (!f2fs_has_inline_dentry(new_dir)) - kunmap(new_page); + f2fs_dentry_kunmap(new_dir, new_page); f2fs_put_page(new_page, 0); out_old: - if (!f2fs_has_inline_dentry(old_dir)) - kunmap(old_page); + f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 4b180bb..9a93a6e 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -129,8 +129,7 @@ retry: goto out; out_unmap_put: - if (!f2fs_has_inline_dentry(dir)) - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); out_err: iput(dir); -- cgit v0.10.2 From 0341845efcb4a656707b6d551c3057d6dd27009f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 21 Nov 2014 16:37:40 -0800 Subject: f2fs: fix livelock calling f2fs_iget during f2fs_evict_inode In f2fs_evict_inode, commit_inmemory_pages f2fs_gc f2fs_iget iget_locked -> wait for inode free Here, if the inode is same as the one to be evicted, f2fs should wait forever. Actually, we should not call f2fs_balance_fs during f2fs_evict_inode to avoid this. But, the commit_inmem_pages calls f2fs_balance_fs by default, even if f2fs_evict_inode wants to free inmemory pages only. Hence, this patch adds to trigger f2fs_balance_fs only when there is something to write. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9de857f..9a33e34 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -230,7 +230,16 @@ void commit_inmem_pages(struct inode *inode, bool abort) .rw = WRITE_SYNC, }; - f2fs_balance_fs(sbi); + /* + * The abort is true only when f2fs_evict_inode is called. + * Basically, the f2fs_evict_inode doesn't produce any data writes, so + * that we don't need to call f2fs_balance_fs. + * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this + * inode becomes free by iget_locked in f2fs_iget. + */ + if (!abort) + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); mutex_lock(&fi->inmem_lock); -- cgit v0.10.2 From ce3e6d25f3fbaf48a3e1914d5ac0ddfdc1b38349 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 24 Nov 2014 15:52:00 +0100 Subject: f2fs: fix typos for the word "destroy" in jump labels Two jump labels were adjusted in the implementation of the create_node_manager_caches() function because these identifiers contained typos. Signed-off-by: Markus Elfring Acked-by: Randy Dunlap Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index dbf49cc..36656ad 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2083,17 +2083,17 @@ int __init create_node_manager_caches(void) free_nid_slab = f2fs_kmem_cache_create("free_nid", sizeof(struct free_nid)); if (!free_nid_slab) - goto destory_nat_entry; + goto destroy_nat_entry; nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", sizeof(struct nat_entry_set)); if (!nat_entry_set_slab) - goto destory_free_nid; + goto destroy_free_nid; return 0; -destory_free_nid: +destroy_free_nid: kmem_cache_destroy(free_nid_slab); -destory_nat_entry: +destroy_nat_entry: kmem_cache_destroy(nat_entry_slab); fail: return -ENOMEM; -- cgit v0.10.2 From 5f72739583a29bfaa57448ec2c9b122995d0ae4f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Nov 2014 10:59:45 -0800 Subject: f2fs: fix deadlock during inline_data conversion A deadlock can be occurred: Thread 1] Thread 2] - f2fs_write_data_pages - f2fs_write_begin - lock_page(page #0) - grab_cache_page(page #X) - get_node_page(inode_page) - grab_cache_page(page #0) : to convert inline_data - f2fs_write_data_page - f2fs_write_inline_data - get_node_page(inode_page) In this case, trying to lock inode_page and page #0 causes deadlock. In order to avoid this, this patch adds a rule for this locking policy, which is that page #0 should be locked followed by inode_page lock. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 12dd58a..c7bc626 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -936,6 +936,17 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); f2fs_balance_fs(sbi); + + /* + * We should check this at this moment to avoid deadlock on inode page + * and #0 page. The locking rule for inline_data conversion should be: + * lock_page(page #0) -> lock_page(inode_page) + */ + if (index != 0) { + err = f2fs_convert_inline_inode(inode); + if (err) + goto fail; + } repeat: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { @@ -960,21 +971,10 @@ repeat: set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); sync_inode_page(&dn); goto put_next; - } else if (page->index == 0) { - err = f2fs_convert_inline_page(&dn, page); - if (err) - goto put_fail; - } else { - struct page *p = grab_cache_page(inode->i_mapping, 0); - if (!p) { - err = -ENOMEM; - goto put_fail; - } - err = f2fs_convert_inline_page(&dn, p); - f2fs_put_page(p, 1); - if (err) - goto put_fail; } + err = f2fs_convert_inline_page(&dn, page); + if (err) + goto put_fail; } err = f2fs_reserve_block(&dn, index); if (err) -- cgit v0.10.2 From 20d047c87621090152c89f9581f78a69b02e0e35 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Tue, 25 Nov 2014 12:44:23 +0900 Subject: f2fs: check dirty_nat_cnt before flushing nat entries in journal It's meaningless to check dirty_nat_cnt after re-dirtying nat entries in journal. And although there are rooms for dirty nat entires if dirty_nat_cnt is zero, it's also meaningless to check __has_cursum_space. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 36656ad..4af3fee 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -171,7 +171,7 @@ retry: static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, struct nat_entry *ne) { - nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK; + nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; head = radix_tree_lookup(&nm_i->nat_set_root, set); @@ -1945,6 +1945,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) nid_t set_idx = 0; LIST_HEAD(sets); + if (!nm_i->dirty_nat_cnt) + return; /* * if there are no enough space in journal to store dirty nat * entries, remove all entries from journal and merge them @@ -1953,9 +1955,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) remove_nats_in_journal(sbi); - if (!nm_i->dirty_nat_cnt) - return; - while ((found = __gang_lookup_nat_set(nm_i, set_idx, NATVEC_SIZE, setvec))) { unsigned idx; -- cgit v0.10.2 From 80ec2e914d7e1a1edb93d89cce96dd145c85636a Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Tue, 25 Nov 2014 12:44:24 +0900 Subject: f2fs: no more dirty_nat_entires when flushing After flushing dirty nat entries, it has to be no more dirty nat entries. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 4af3fee..b1466cf 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1925,10 +1925,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, else f2fs_put_page(page, 1); - if (!set->entry_cnt) { - radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); - kmem_cache_free(nat_entry_set_slab, set); - } + f2fs_bug_on(sbi, set->entry_cnt); + + radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); + kmem_cache_free(nat_entry_set_slab, set); } /* -- cgit v0.10.2 From 158c194c375be1b82149f9de80fd90e522979dc5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Nov 2014 11:34:02 -0800 Subject: f2fs: make clean the page before writing If a page is set to be written to the disk, we can make clean the page. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 914b6d3..e27f290 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -84,7 +84,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, }; - int err; + int dirty, err; f2fs_bug_on(F2FS_I_SB(dn->inode), page->index); @@ -110,12 +110,17 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) kunmap_atomic(dst_addr); SetPageUptodate(page); no_update: + /* clear dirty state */ + dirty = clear_page_dirty_for_io(page); + /* write data page to try to make data consistent */ set_page_writeback(page); write_data_page(page, dn, &new_blk_addr, &fio); update_extent_cache(new_blk_addr, dn); f2fs_wait_on_page_writeback(page, DATA); + if (dirty) + inode_dec_dirty_pages(dn->inode); /* clear inline data and flag after data writeback */ truncate_inline_data(dn->inode_page, 0); -- cgit v0.10.2 From 95f5b0fc5e25d94ace84dbf5bb4acb8b80b4f062 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 25 Nov 2014 17:27:38 -0800 Subject: f2fs: fix to recover converted inline_data If an inode has converted inline_data which was written to the disk, we should set its inode flag for further fsync so that this inline_data can be recovered from sudden power off. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e27f290..f2d3c58 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -122,6 +122,9 @@ no_update: if (dirty) inode_dec_dirty_pages(dn->inode); + /* this converted inline_data should be recovered. */ + set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); + /* clear inline data and flag after data writeback */ truncate_inline_data(dn->inode_page, 0); clear_out: -- cgit v0.10.2 From 31a3268839c1aa808a5109111ec847b95e1bb114 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Thu, 27 Nov 2014 16:03:08 +0900 Subject: f2fs: cleanup if-statement of phase in gc_data_segment Little cleanup to distinguish each phase easily Signed-off-by: Changman Lee [Jaegeuk Kim: modify indentation for code readability] Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 657683c9..6acd5f2 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -603,27 +603,27 @@ next_step: data_page = find_data_page(inode, start_bidx + ofs_in_node, false); - if (IS_ERR(data_page)) - goto next_iput; + if (IS_ERR(data_page)) { + iput(inode); + continue; + } f2fs_put_page(data_page, 0); add_gc_inode(inode, ilist); - } else { - inode = find_gc_inode(dni.ino, ilist); - if (inode) { - start_bidx = start_bidx_of_node(nofs, - F2FS_I(inode)); - data_page = get_lock_data_page(inode, + continue; + } + + /* phase 3 */ + inode = find_gc_inode(dni.ino, ilist); + if (inode) { + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); + data_page = get_lock_data_page(inode, start_bidx + ofs_in_node); - if (IS_ERR(data_page)) - continue; - move_data_page(inode, data_page, gc_type); - stat_inc_data_blk_count(sbi, 1); - } + if (IS_ERR(data_page)) + continue; + move_data_page(inode, data_page, gc_type); + stat_inc_data_blk_count(sbi, 1); } - continue; -next_iput: - iput(inode); } if (++phase < 4) -- cgit v0.10.2 From cd34e2969b28de7685bb51d4b0fafeced8ef7f66 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 1 Dec 2014 11:30:20 +0800 Subject: f2fs: fix to return correct error number in f2fs_write_begin Fix the wrong error number in error path of f2fs_write_begin. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c7bc626..7ec697b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -960,8 +960,10 @@ repeat: /* check inline_data */ ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); goto unlock_fail; + } set_new_dnode(&dn, inode, ipage, ipage, 0); -- cgit v0.10.2 From 9c01503f4da3ff9c327d37249fe148ed7c188b20 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Mon, 1 Dec 2014 16:29:58 +0900 Subject: f2fs: cleanup redundant macro We've already made fi and sbi for inode. Let's avoid duplicated work. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7c2ec3e..146e58a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -206,10 +206,10 @@ go_write: ret = f2fs_sync_fs(inode->i_sb, 1); down_write(&fi->i_sem); - F2FS_I(inode)->xattr_ver = 0; + fi->xattr_ver = 0; if (file_wrong_pino(inode) && inode->i_nlink == 1 && get_parent_ino(inode, &pino)) { - F2FS_I(inode)->i_pino = pino; + fi->i_pino = pino; file_got_pino(inode); up_write(&fi->i_sem); mark_inode_dirty_sync(inode); @@ -241,7 +241,7 @@ sync_nodes: flush_out: remove_dirty_inode(sbi, ino, UPDATE_INO); clear_inode_flag(fi, FI_UPDATE_WRITE); - ret = f2fs_issue_flush(F2FS_I_SB(inode)); + ret = f2fs_issue_flush(sbi); } out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); -- cgit v0.10.2 From 7dda2af83b2b7593458828d4f15443167b3da8c4 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Fri, 28 Nov 2014 15:49:40 +0000 Subject: f2fs: more fast lookup for gc_inode list If there are many inodes that have data blocks in victim segment, it takes long time to find a inode in gc_inode list. Let's use radix_tree to reduce lookup time. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 6acd5f2..a1af74f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -338,34 +338,42 @@ static const struct victim_selection default_v_ops = { .get_victim = get_victim_by_default, }; -static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) +static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino) { struct inode_entry *ie; - list_for_each_entry(ie, ilist, list) - if (ie->inode->i_ino == ino) - return ie->inode; + ie = radix_tree_lookup(&gc_list->iroot, ino); + if (ie) + return ie->inode; return NULL; } -static void add_gc_inode(struct inode *inode, struct list_head *ilist) +static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) { struct inode_entry *new_ie; + int ret; - if (inode == find_gc_inode(inode->i_ino, ilist)) { + if (inode == find_gc_inode(gc_list, inode->i_ino)) { iput(inode); return; } - +retry: new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); new_ie->inode = inode; - list_add_tail(&new_ie->list, ilist); + + ret = radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); + if (ret) { + kmem_cache_free(winode_slab, new_ie); + goto retry; + } + list_add_tail(&new_ie->list, &gc_list->ilist); } -static void put_gc_inode(struct list_head *ilist) +static void put_gc_inode(struct gc_inode_list *gc_list) { struct inode_entry *ie, *next_ie; - list_for_each_entry_safe(ie, next_ie, ilist, list) { + list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) { + radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); iput(ie->inode); list_del(&ie->list); kmem_cache_free(winode_slab, ie); @@ -551,7 +559,7 @@ out: * the victim data block is ignored. */ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, - struct list_head *ilist, unsigned int segno, int gc_type) + struct gc_inode_list *gc_list, unsigned int segno, int gc_type) { struct super_block *sb = sbi->sb; struct f2fs_summary *entry; @@ -609,12 +617,12 @@ next_step: } f2fs_put_page(data_page, 0); - add_gc_inode(inode, ilist); + add_gc_inode(gc_list, inode); continue; } /* phase 3 */ - inode = find_gc_inode(dni.ino, ilist); + inode = find_gc_inode(gc_list, dni.ino); if (inode) { start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); data_page = get_lock_data_page(inode, @@ -657,7 +665,7 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, } static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, - struct list_head *ilist, int gc_type) + struct gc_inode_list *gc_list, int gc_type) { struct page *sum_page; struct f2fs_summary_block *sum; @@ -675,7 +683,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, gc_node_segment(sbi, sum->entries, segno, gc_type); break; case SUM_TYPE_DATA: - gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); + gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type); break; } blk_finish_plug(&plug); @@ -688,16 +696,18 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, int f2fs_gc(struct f2fs_sb_info *sbi) { - struct list_head ilist; unsigned int segno, i; int gc_type = BG_GC; int nfree = 0; int ret = -1; struct cp_control cpc; + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(GFP_ATOMIC), + }; cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; - INIT_LIST_HEAD(&ilist); gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; @@ -719,7 +729,7 @@ gc_more: META_SSA); for (i = 0; i < sbi->segs_per_sec; i++) - do_garbage_collect(sbi, segno + i, &ilist, gc_type); + do_garbage_collect(sbi, segno + i, &gc_list, gc_type); if (gc_type == FG_GC) { sbi->cur_victim_sec = NULL_SEGNO; @@ -735,7 +745,7 @@ gc_more: stop: mutex_unlock(&sbi->gc_mutex); - put_gc_inode(&ilist); + put_gc_inode(&gc_list); return ret; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 16f0b2b..6ff7ad3 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -40,6 +40,11 @@ struct inode_entry { struct inode *inode; }; +struct gc_inode_list { + struct list_head ilist; + struct radix_tree_root iroot; +}; + /* * inline functions */ -- cgit v0.10.2 From 4634d71ed190c99e42ebee450f9a6897d20ee22c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 3 Dec 2014 16:40:28 -0800 Subject: f2fs: fix missing kmem_cache_free This patch fixes missing kmem_cache_free when handling errors. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b1466cf..c59341d 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -158,7 +158,7 @@ retry: head->entry_cnt = 0; if (radix_tree_insert(&nm_i->nat_set_root, set, head)) { - cond_resched(); + kmem_cache_free(nat_entry_set_slab, head); goto retry; } } -- cgit v0.10.2 From 8b26ef98da3387eb57a8a5c1747c6e628948ee0c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 3 Dec 2014 21:15:10 -0800 Subject: f2fs: use rw_semaphore for nat entry lock Previoulsy, we used rwlock for nat_entry lock. But, now we have a lot of complex operations in set_node_addr. (e.g., allocating kernel memories, handling radix_trees, and so on) So, this patches tries to change spinlock to rw_semaphore to give CPUs to other threads. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d042813..c873140 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -332,7 +332,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - rwlock_t nat_tree_lock; /* protect nat_tree_lock */ + struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index c59341d..b47555f 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -196,11 +196,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -210,11 +210,11 @@ bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool fsynced = false; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_FSYNCED_INODE)) fsynced = true; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return fsynced; } @@ -224,13 +224,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return need_update; } @@ -258,17 +258,17 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, { struct nat_entry *e; retry: - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) { e = grab_nat_entry(nm_i, nid); if (!e) { - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); goto retry; } node_info_from_raw_nat(&e->ni, ne); } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, @@ -277,12 +277,12 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; retry: - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); if (!e) { - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); goto retry; } e->ni = *ni; @@ -326,7 +326,7 @@ retry: set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -336,7 +336,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) if (available_free_memory(sbi, NAT_ENTRIES)) return 0; - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while (nr_shrink && !list_empty(&nm_i->nat_entries)) { struct nat_entry *ne; ne = list_first_entry(&nm_i->nat_entries, @@ -344,7 +344,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) __del_from_nat_cache(nm_i, ne); nr_shrink--; } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); return nr_shrink; } @@ -367,14 +367,14 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ni->nid = nid; /* Check nat cache */ - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); } - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); if (e) return; @@ -1432,13 +1432,13 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) if (build) { /* do not add allocated nids */ - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) allocated = true; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); if (allocated) return 0; } @@ -1827,20 +1827,20 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) raw_ne = nat_in_journal(sum, i); retry: - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); if (ne) goto found; ne = grab_nat_entry(nm_i, nid); if (!ne) { - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); goto retry; } node_info_from_raw_nat(&ne->ni, &raw_ne); found: __set_nat_cache_dirty(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } update_nats_in_cursum(sum, -i); mutex_unlock(&curseg->curseg_mutex); @@ -1911,10 +1911,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, } raw_nat_from_node_info(raw_ne, &ne->ni); - write_lock(&NM_I(sbi)->nat_tree_lock); + down_write(&NM_I(sbi)->nat_tree_lock); nat_reset_flag(ne); __clear_nat_cache_dirty(NM_I(sbi), ne); - write_unlock(&NM_I(sbi)->nat_tree_lock); + up_write(&NM_I(sbi)->nat_tree_lock); if (nat_get_blkaddr(ne) == NULL_ADDR) add_free_nid(sbi, nid, false); @@ -2000,7 +2000,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->free_nid_list_lock); - rwlock_init(&nm_i->nat_tree_lock); + init_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -2056,7 +2056,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -2065,7 +2065,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) __del_from_nat_cache(nm_i, natvec[idx]); } f2fs_bug_on(sbi, nm_i->nat_cnt); - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); sbi->nm_info = NULL; -- cgit v0.10.2 From 769ec6e5b7d4a8115447736871be8bffaaba3a7d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 3 Dec 2014 20:47:26 -0800 Subject: f2fs: call radix_tree_preload before radix_tree_insert This patch tries to fix: BUG: using smp_processor_id() in preemptible [00000000] code: f2fs_gc-254:0/384 (radix_tree_node_alloc+0x14/0x74) from [] (radix_tree_insert+0x110/0x200) (radix_tree_insert+0x110/0x200) from [] (gc_data_segment+0x340/0x52c) (gc_data_segment+0x340/0x52c) from [] (f2fs_gc+0x208/0x400) (f2fs_gc+0x208/0x400) from [] (gc_thread_func+0x248/0x28c) (gc_thread_func+0x248/0x28c) from [] (kthread+0xa0/0xac) (kthread+0xa0/0xac) from [] (ret_from_fork+0x14/0x3c) The reason is that f2fs calls radix_tree_insert under enabled preemption. So, before calling it, we need to call radix_tree_preload. Otherwise, we should use _GFP_WAIT for the radix tree, and use mutex or semaphore to cover the radix tree operations. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 20a917b..6a81b73 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -304,6 +304,11 @@ static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) struct inode_management *im = &sbi->im[type]; struct ino_entry *e; retry: + if (radix_tree_preload(GFP_NOFS)) { + cond_resched(); + goto retry; + } + spin_lock(&im->ino_lock); e = radix_tree_lookup(&im->ino_root, ino); @@ -311,11 +316,13 @@ retry: e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); if (!e) { spin_unlock(&im->ino_lock); + radix_tree_preload_end(); goto retry; } if (radix_tree_insert(&im->ino_root, ino, e)) { spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); + radix_tree_preload_end(); goto retry; } memset(e, 0, sizeof(struct ino_entry)); @@ -326,6 +333,7 @@ retry: im->ino_num++; } spin_unlock(&im->ino_lock); + radix_tree_preload_end(); } static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a1af74f..2c58c58 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -351,7 +351,6 @@ static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino) static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) { struct inode_entry *new_ie; - int ret; if (inode == find_gc_inode(gc_list, inode->i_ino)) { iput(inode); @@ -361,8 +360,7 @@ retry: new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); new_ie->inode = inode; - ret = radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie); - if (ret) { + if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { kmem_cache_free(winode_slab, new_ie); goto retry; } @@ -703,7 +701,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi) struct cp_control cpc; struct gc_inode_list gc_list = { .ilist = LIST_HEAD_INIT(gc_list.ilist), - .iroot = RADIX_TREE_INIT(GFP_ATOMIC), + .iroot = RADIX_TREE_INIT(GFP_NOFS), }; cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b47555f..8de4f55 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1447,15 +1447,22 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) i->nid = nid; i->state = NID_NEW; + if (radix_tree_preload(GFP_NOFS)) { + kmem_cache_free(free_nid_slab, i); + return 0; + } + spin_lock(&nm_i->free_nid_list_lock); if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) { spin_unlock(&nm_i->free_nid_list_lock); + radix_tree_preload_end(); kmem_cache_free(free_nid_slab, i); return 0; } list_add_tail(&i->list, &nm_i->free_nid_list); nm_i->fcnt++; spin_unlock(&nm_i->free_nid_list_lock); + radix_tree_preload_end(); return 1; } @@ -1994,8 +2001,8 @@ static int init_node_manager(struct f2fs_sb_info *sbi) INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->free_nid_list); - INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); - INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC); + INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); + INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); INIT_LIST_HEAD(&nm_i->nat_entries); mutex_init(&nm_i->build_lock); -- cgit v0.10.2 From 9be32d72becca41d7d9b010d7d9be1d39489414f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 5 Dec 2014 10:39:49 -0800 Subject: f2fs: do retry operations with cond_resched This patch revists retrial paths in f2fs. The basic idea is to use cond_resched instead of retrying from the very early stage. Suggested-by: Gu Zheng Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c873140..c787fe3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1021,6 +1021,13 @@ retry: return entry; } +static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) +{ + while (radix_tree_insert(root, index, item)) + cond_resched(); +} + #define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) static inline bool IS_INODE(struct page *page) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2c58c58..eec0933 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -356,12 +356,11 @@ static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) iput(inode); return; } -retry: new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); new_ie->inode = inode; - +retry: if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { - kmem_cache_free(winode_slab, new_ie); + cond_resched(); goto retry; } list_add_tail(&new_ie->list, &gc_list->ilist); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 8de4f55..f83326c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -147,7 +147,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, if (get_nat_flag(ne, IS_DIRTY)) return; -retry: + head = radix_tree_lookup(&nm_i->nat_set_root, set); if (!head) { head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); @@ -156,11 +156,7 @@ retry: INIT_LIST_HEAD(&head->set_list); head->set = set; head->entry_cnt = 0; - - if (radix_tree_insert(&nm_i->nat_set_root, set, head)) { - kmem_cache_free(nat_entry_set_slab, head); - goto retry; - } + f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } list_move_tail(&ne->list, &head->entry_list); nm_i->dirty_nat_cnt++; @@ -238,13 +234,8 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) { struct nat_entry *new; - new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); - if (!new) - return NULL; - if (radix_tree_insert(&nm_i->nat_root, nid, new)) { - kmem_cache_free(nat_entry_slab, new); - return NULL; - } + new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); + f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); memset(new, 0, sizeof(struct nat_entry)); nat_set_nid(new, nid); nat_reset_flag(new); @@ -257,15 +248,11 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, struct f2fs_nat_entry *ne) { struct nat_entry *e; -retry: + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) { e = grab_nat_entry(nm_i, nid); - if (!e) { - up_write(&nm_i->nat_tree_lock); - goto retry; - } node_info_from_raw_nat(&e->ni, ne); } up_write(&nm_i->nat_tree_lock); @@ -276,15 +263,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; -retry: + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); - if (!e) { - up_write(&nm_i->nat_tree_lock); - goto retry; - } e->ni = *ni; f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { @@ -1833,19 +1816,13 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) nid_t nid = le32_to_cpu(nid_in_journal(sum, i)); raw_ne = nat_in_journal(sum, i); -retry: + down_write(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); - if (ne) - goto found; - - ne = grab_nat_entry(nm_i, nid); if (!ne) { - up_write(&nm_i->nat_tree_lock); - goto retry; + ne = grab_nat_entry(nm_i, nid); + node_info_from_raw_nat(&ne->ni, &raw_ne); } - node_info_from_raw_nat(&ne->ni, &raw_ne); -found: __set_nat_cache_dirty(nm_i, ne); up_write(&nm_i->nat_tree_lock); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 9a33e34..c79d67e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -179,13 +179,13 @@ void register_inmem_page(struct inode *inode, struct page *page) struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; int err; -retry: + new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); /* add atomic page indices to the list */ new->page = page; INIT_LIST_HEAD(&new->list); - +retry: /* increase reference count with clean state */ mutex_lock(&fi->inmem_lock); err = radix_tree_insert(&fi->inmem_root, page->index, new); @@ -195,7 +195,6 @@ retry: return; } else if (err) { mutex_unlock(&fi->inmem_lock); - kmem_cache_free(inmem_entry_slab, new); goto retry; } get_page(page); -- cgit v0.10.2 From 9d1015dd4c6ba602272f80487bb9c922a9a71cc1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 5 Dec 2014 10:51:50 -0800 Subject: f2fs: count inline_xx in do_read_inode In do_read_inode, if we failed __recover_inline_status, the inode has inline flag without increasing its count. Later, f2fs_evict_inode will decrease the count, which causes -1. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9fe110e..196cc78 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -148,6 +148,10 @@ static int do_read_inode(struct inode *inode) __get_inode_rdev(inode, ri); f2fs_put_page(node_page, 1); + + stat_inc_inline_inode(inode); + stat_inc_inline_dir(inode); + return err; } @@ -199,8 +203,6 @@ make_now: goto bad_inode; } unlock_new_inode(inode); - stat_inc_inline_inode(inode); - stat_inc_inline_dir(inode); trace_f2fs_iget(inode); return inode; -- cgit v0.10.2 From 0722b1011a5f6b03b73334ecd80e121a95e8d4e0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 5 Dec 2014 11:58:02 -0800 Subject: f2fs: set page private for inmemory pages for truncation The inmemory pages should be handled by invalidate_page since it needs to be released int the truncation path. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c79d67e..3ebcf96 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -180,6 +180,8 @@ void register_inmem_page(struct inode *inode, struct page *page) struct inmem_pages *new; int err; + SetPagePrivate(page); + new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); /* add atomic page indices to the list */ -- cgit v0.10.2 From 126622343a8488917c5e0891eae1bc936a63f49a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 5 Dec 2014 14:37:37 -0800 Subject: f2fs: release inmemory pages when the file was closed If file is closed, let's drop inmemory pages. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 146e58a..b6f3fbf 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -901,6 +901,14 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) return f2fs_convert_inline_inode(inode); } +static int f2fs_release_file(struct inode *inode, struct file *filp) +{ + /* some remained atomic pages should discarded */ + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + commit_inmem_pages(inode, true); + return 0; +} + static int f2fs_ioc_commit_atomic_write(struct file *filp) { struct inode *inode = file_inode(filp); @@ -1010,6 +1018,7 @@ const struct file_operations f2fs_file_operations = { .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .open = generic_file_open, + .release = f2fs_release_file, .mmap = f2fs_file_mmap, .fsync = f2fs_sync_file, .fallocate = f2fs_fallocate, -- cgit v0.10.2 From 8dcf2ff72120707f960d9b3b15ce6e50705b13a6 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 5 Dec 2014 17:18:15 -0800 Subject: f2fs: count the number of inmemory pages This patch adds counting # of inmemory pages in the page cache. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 40b679c..4e2e39c 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -39,6 +39,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_dirs = sbi->n_dirty_dirs; si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); + si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); @@ -249,6 +250,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); seq_puts(s, "\nBalancing F2FS Async:\n"); + seq_printf(s, " - inmem: %4d\n", + si->inmem_pages); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c787fe3..f60b817 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -462,6 +462,7 @@ enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_NODES, F2FS_DIRTY_META, + F2FS_INMEM_PAGES, NR_COUNT_TYPE, }; @@ -1498,7 +1499,7 @@ struct f2fs_stat_info { int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int nats, sits, fnids; int total_count, utilization; - int bg_gc, inline_inode, inline_dir; + int bg_gc, inline_inode, inline_dir, inmem_pages; unsigned int valid_count, valid_node_count, valid_inode_count; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3ebcf96..42607a6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -201,6 +201,7 @@ retry: } get_page(page); list_add_tail(&new->list, &fi->inmem_pages); + inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); mutex_unlock(&fi->inmem_lock); } @@ -216,6 +217,7 @@ void invalidate_inmem_page(struct inode *inode, struct page *page) f2fs_put_page(cur->page, 0); list_del(&cur->list); kmem_cache_free(inmem_entry_slab, cur); + dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); } mutex_unlock(&fi->inmem_lock); } @@ -257,6 +259,7 @@ void commit_inmem_pages(struct inode *inode, bool abort) f2fs_put_page(cur->page, 1); list_del(&cur->list); kmem_cache_free(inmem_entry_slab, cur); + dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); } if (submit_bio) f2fs_submit_merged_bio(sbi, DATA, WRITE); -- cgit v0.10.2 From 9c7bb702122fdf7c391f7d02c7d27a61a2c0c4b7 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Mon, 8 Dec 2014 15:29:40 +0900 Subject: f2fs: check if inode state is dirty at fsync If inode state is dirty, go straight to write. Suggested-by: Jaegeuk Kim Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b6f3fbf..0b97002 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -138,6 +138,17 @@ static inline bool need_do_checkpoint(struct inode *inode) return need_cp; } +static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct page *i = find_get_page(NODE_MAPPING(sbi), ino); + bool ret = false; + /* But we need to avoid that there are some inode updates */ + if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) + ret = true; + f2fs_put_page(i, 0); + return ret; +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -168,19 +179,21 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) return ret; } + /* if the inode is dirty, let's recover all the time */ + if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) { + update_inode_page(inode); + goto go_write; + } + /* * if there is no written data, don't waste time to write recovery info. */ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && !exist_written_data(sbi, ino, APPEND_INO)) { - struct page *i = find_get_page(NODE_MAPPING(sbi), ino); - /* But we need to avoid that there are some inode updates */ - if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) { - f2fs_put_page(i, 0); + /* it may call write_inode just prior to fsync */ + if (need_inode_page_update(sbi, ino)) goto go_write; - } - f2fs_put_page(i, 0); if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || exist_written_data(sbi, ino, UPDATE_INO)) -- cgit v0.10.2 From 51455b19384d26afac4f214fab2ad0a4f1964e30 Mon Sep 17 00:00:00 2001 From: Changman Lee Date: Mon, 8 Dec 2014 15:29:41 +0900 Subject: f2fs: cleanup path to need cp at fsync Added some commentaries for code readability and cleaned up if-statement clearly. Signed-off-by: Changman Lee Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0b97002..3c27e0e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -149,6 +149,26 @@ static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) return ret; } +static void try_to_fix_pino(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + nid_t pino; + + down_write(&fi->i_sem); + fi->xattr_ver = 0; + if (file_wrong_pino(inode) && inode->i_nlink == 1 && + get_parent_ino(inode, &pino)) { + fi->i_pino = pino; + file_got_pino(inode); + up_write(&fi->i_sem); + + mark_inode_dirty_sync(inode); + f2fs_write_inode(inode, NULL); + } else { + up_write(&fi->i_sem); + } +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -213,49 +233,36 @@ go_write: up_read(&fi->i_sem); if (need_cp) { - nid_t pino; - /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); - down_write(&fi->i_sem); - fi->xattr_ver = 0; - if (file_wrong_pino(inode) && inode->i_nlink == 1 && - get_parent_ino(inode, &pino)) { - fi->i_pino = pino; - file_got_pino(inode); - up_write(&fi->i_sem); - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - } else { - up_write(&fi->i_sem); - } - } else { + /* + * We've secured consistency through sync_fs. Following pino + * will be used only for fsynced inodes after checkpoint. + */ + try_to_fix_pino(inode); + goto out; + } sync_nodes: - sync_node_pages(sbi, ino, &wbc); - - if (need_inode_block_update(sbi, ino)) { - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - goto sync_nodes; - } + sync_node_pages(sbi, ino, &wbc); - ret = wait_on_node_pages_writeback(sbi, ino); - if (ret) - goto out; + if (need_inode_block_update(sbi, ino)) { + mark_inode_dirty_sync(inode); + f2fs_write_inode(inode, NULL); + goto sync_nodes; + } - /* once recovery info is written, don't need to tack this */ - remove_dirty_inode(sbi, ino, APPEND_INO); - clear_inode_flag(fi, FI_APPEND_WRITE); + ret = wait_on_node_pages_writeback(sbi, ino); + if (ret) + goto out; + + /* once recovery info is written, don't need to tack this */ + remove_dirty_inode(sbi, ino, APPEND_INO); + clear_inode_flag(fi, FI_APPEND_WRITE); flush_out: - remove_dirty_inode(sbi, ino, UPDATE_INO); - clear_inode_flag(fi, FI_UPDATE_WRITE); - ret = f2fs_issue_flush(sbi); - } + remove_dirty_inode(sbi, ino, UPDATE_INO); + clear_inode_flag(fi, FI_UPDATE_WRITE); + ret = f2fs_issue_flush(sbi); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); return ret; -- cgit v0.10.2 From 03e14d522eb1fdf9c0ce37085cb56749342a842c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Dec 2014 19:08:20 +0800 Subject: f2fs: use atomic for counting inode with inline_{dir,inode} flag As inline_{dir,inode} stat is increased/decreased concurrently by multi threads, so the value is not so accurate, let's use atomic type for counting accurately. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 4e2e39c..91e8f69 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -46,8 +46,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->valid_count = valid_user_blocks(sbi); si->valid_node_count = valid_node_count(sbi); si->valid_inode_count = valid_inode_count(sbi); - si->inline_inode = sbi->inline_inode; - si->inline_dir = sbi->inline_dir; + si->inline_inode = atomic_read(&sbi->inline_inode); + si->inline_dir = atomic_read(&sbi->inline_dir); si->utilization = utilization(sbi); si->free_segs = free_segments(sbi); @@ -329,6 +329,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) si->sbi = sbi; sbi->stat_info = si; + atomic_set(&sbi->inline_inode, 0); + atomic_set(&sbi->inline_dir, 0); + mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); mutex_unlock(&f2fs_stat_mutex); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f60b817..2695d78 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -590,8 +590,8 @@ struct f2fs_sb_info { unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ - int inline_inode; /* # of inline_data inodes */ - int inline_dir; /* # of inline_dentry inodes */ + atomic_t inline_inode; /* # of inline_data inodes */ + atomic_t inline_dir; /* # of inline_dentry inodes */ int bg_gc; /* background gc calls */ unsigned int n_dirty_dirs; /* # of dir inodes */ #endif @@ -1532,22 +1532,22 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_I_SB(inode))->inline_inode++); \ + (atomic_inc(&F2FS_I_SB(inode)->inline_inode)); \ } while (0) #define stat_dec_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_I_SB(inode))->inline_inode--); \ + (atomic_dec(&F2FS_I_SB(inode)->inline_inode)); \ } while (0) #define stat_inc_inline_dir(inode) \ do { \ if (f2fs_has_inline_dentry(inode)) \ - ((F2FS_I_SB(inode))->inline_dir++); \ + (atomic_inc(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) #define stat_dec_inline_dir(inode) \ do { \ if (f2fs_has_inline_dentry(inode)) \ - ((F2FS_I_SB(inode))->inline_dir--); \ + (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) -- cgit v0.10.2 From 13da549460d549aec78a943e589f4ffc3fdc716c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Dec 2014 14:56:19 +0800 Subject: f2fs: fix to enable readahead for SSA/CP blocks 1.We use zero as upper boundary value for ra SSA/CP blocks, we will skip readahead as verification failure with max number, it causes low performance. 2.Low boundary value is not accurate for SSA/CP/POR region verification, so these values need to be redefined. This patch fixes above issues. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 6a81b73..f3ebfb5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -95,8 +95,9 @@ static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type) case META_SIT: return SIT_BLK_CNT(sbi); case META_SSA: + return MAIN_BLKADDR(sbi); case META_CP: - return 0; + return SM_I(sbi)->sit_info->sit_base_addr; case META_POR: return MAX_BLKADDR(sbi); default: @@ -141,11 +142,23 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type prev_blk_addr = blk_addr; break; case META_SSA: + if (unlikely(blkno >= max_blks)) + goto out; + if (unlikely(blkno < SM_I(sbi)->ssa_blkaddr)) + goto out; + blk_addr = blkno; + break; case META_CP: + if (unlikely(blkno >= max_blks)) + goto out; + if (unlikely(blkno < __start_cp_addr(sbi))) + goto out; + blk_addr = blkno; + break; case META_POR: if (unlikely(blkno >= max_blks)) goto out; - if (unlikely(blkno < SEG0_BLKADDR(sbi))) + if (unlikely(blkno < MAIN_BLKADDR(sbi))) goto out; blk_addr = blkno; break; -- cgit v0.10.2 From 66b00c186764e29765e8962a03556c329dee48e5 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Dec 2014 14:59:17 +0800 Subject: f2fs: introduce is_valid_blkaddr to cleanup codes in ra_meta_pages This patch does cleanup work, it introduces is_valid_blkaddr() to include verification code for blkaddr with upper and down boundary value which were in ra_meta_pages previous. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f3ebfb5..b2d5431 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -87,22 +87,36 @@ struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index) return get_meta_page(sbi, index); } -static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type) +static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) { switch (type) { case META_NAT: - return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK; + break; case META_SIT: - return SIT_BLK_CNT(sbi); + if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) + return false; + break; case META_SSA: - return MAIN_BLKADDR(sbi); + if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || + blkaddr < SM_I(sbi)->ssa_blkaddr)) + return false; + break; case META_CP: - return SM_I(sbi)->sit_info->sit_base_addr; + if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || + blkaddr < __start_cp_addr(sbi))) + return false; + break; case META_POR: - return MAX_BLKADDR(sbi); + if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || + blkaddr < MAIN_BLKADDR(sbi))) + return false; + break; default: BUG(); } + + return true; } /* @@ -113,7 +127,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type block_t prev_blk_addr = 0; struct page *page; block_t blkno = start; - block_t max_blks = get_max_meta_blks(sbi, type); struct f2fs_io_info fio = { .type = META, @@ -123,18 +136,20 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type for (; nrpages-- > 0; blkno++) { block_t blk_addr; + if (!is_valid_blkaddr(sbi, blkno, type)) + goto out; + switch (type) { case META_NAT: - /* get nat block addr */ - if (unlikely(blkno >= max_blks)) + if (unlikely(blkno >= + NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) blkno = 0; + /* get nat block addr */ blk_addr = current_nat_addr(sbi, blkno * NAT_ENTRY_PER_BLOCK); break; case META_SIT: /* get sit block addr */ - if (unlikely(blkno >= max_blks)) - goto out; blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); if (blkno != start && prev_blk_addr + 1 != blk_addr) @@ -142,24 +157,8 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type prev_blk_addr = blk_addr; break; case META_SSA: - if (unlikely(blkno >= max_blks)) - goto out; - if (unlikely(blkno < SM_I(sbi)->ssa_blkaddr)) - goto out; - blk_addr = blkno; - break; case META_CP: - if (unlikely(blkno >= max_blks)) - goto out; - if (unlikely(blkno < __start_cp_addr(sbi))) - goto out; - blk_addr = blkno; - break; case META_POR: - if (unlikely(blkno >= max_blks)) - goto out; - if (unlikely(blkno < MAIN_BLKADDR(sbi))) - goto out; blk_addr = blkno; break; default: -- cgit v0.10.2 From 635aee1fefef921ae4124b127fced62ea6008839 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 8 Dec 2014 15:02:52 +0800 Subject: f2fs: avoid to ra unneeded blocks in recover flow To improve recovery speed, f2fs try to readahead many contiguous blocks in warm node segment, but for most time, abnormal power-off do not occur frequently, so when mount a normal power-off f2fs image, by contrary ra so many blocks and then invalid them will hurt the performance of mount. It's better to just ra the first next-block for normal condition. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b2d5431..e6c271f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -72,21 +72,6 @@ out: return page; } -struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index) -{ - bool readahead = false; - struct page *page; - - page = find_get_page(META_MAPPING(sbi), index); - if (!page || (page && !PageUptodate(page))) - readahead = true; - f2fs_put_page(page, 0); - - if (readahead) - ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); - return get_meta_page(sbi, index); -} - static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type) { @@ -181,6 +166,20 @@ out: return blkno - start; } +void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct page *page; + bool readahead = false; + + page = find_get_page(META_MAPPING(sbi), index); + if (!page || (page && !PageUptodate(page))) + readahead = true; + f2fs_put_page(page, 0); + + if (readahead) + ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); +} + static int f2fs_write_meta_page(struct page *page, struct writeback_control *wbc) { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2695d78..ec58bb2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1431,8 +1431,8 @@ void destroy_segment_manager_caches(void); */ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); -struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t); int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); +void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9a93a6e..9160a37 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -170,13 +170,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + ra_meta_pages(sbi, blkaddr, 1, META_POR); + while (1) { struct fsync_inode_entry *entry; if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) return 0; - page = get_meta_page_ra(sbi, blkaddr); + page = get_meta_page(sbi, blkaddr); if (cp_ver != cpver_of_node(page)) break; @@ -227,6 +229,8 @@ next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); + + ra_meta_pages_cond(sbi, blkaddr); } f2fs_put_page(page, 1); return err; @@ -436,7 +440,9 @@ static int recover_data(struct f2fs_sb_info *sbi, if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) break; - page = get_meta_page_ra(sbi, blkaddr); + ra_meta_pages_cond(sbi, blkaddr); + + page = get_meta_page(sbi, blkaddr); if (cp_ver != cpver_of_node(page)) { f2fs_put_page(page, 1); -- cgit v0.10.2