diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-10 23:41:28 (GMT) |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-10 23:41:28 (GMT) |
commit | 4b0a268eeccae14d42ff5fb9f19b612913c0007c (patch) | |
tree | f8a9e9345b50b437b205421d8c7898c7ccaafdf4 | |
parent | a6b849578ef3e0b131b1ea4063473a4f935a65e9 (diff) | |
parent | 635aee1fefef921ae4124b127fced62ea6008839 (diff) | |
download | linux-4b0a268eeccae14d42ff5fb9f19b612913c0007c.tar.xz |
Merge tag 'for-f2fs-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"This patch-set includes lots of bug fixes based on clean-ups and
refactored codes. And inline_dir was introduced and two minor mount
options were added. Details from signed tag:
This series includes the following enhancement with refactored flows.
- fix inmemory page operations
- fix wrong inline_data & inline_dir logics
- enhance memory and IO control under memory pressure
- consider preemption on radix_tree operation
- fix memory leaks and deadlocks
But also, there are a couple of new features:
- support inline_dir to store dentries inside inode page
- add -o fastboot to reduce booting time
- implement -o dirsync
And a lot of clean-ups and minor bug fixes as well"
* tag 'for-f2fs-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (88 commits)
f2fs: avoid to ra unneeded blocks in recover flow
f2fs: introduce is_valid_blkaddr to cleanup codes in ra_meta_pages
f2fs: fix to enable readahead for SSA/CP blocks
f2fs: use atomic for counting inode with inline_{dir,inode} flag
f2fs: cleanup path to need cp at fsync
f2fs: check if inode state is dirty at fsync
f2fs: count the number of inmemory pages
f2fs: release inmemory pages when the file was closed
f2fs: set page private for inmemory pages for truncation
f2fs: count inline_xx in do_read_inode
f2fs: do retry operations with cond_resched
f2fs: call radix_tree_preload before radix_tree_insert
f2fs: use rw_semaphore for nat entry lock
f2fs: fix missing kmem_cache_free
f2fs: more fast lookup for gc_inode list
f2fs: cleanup redundant macro
f2fs: fix to return correct error number in f2fs_write_begin
f2fs: cleanup if-statement of phase in gc_data_segment
f2fs: fix to recover converted inline_data
f2fs: make clean the page before writing
...
-rw-r--r-- | Documentation/filesystems/f2fs.txt | 7 | ||||
-rw-r--r-- | fs/f2fs/acl.c | 148 | ||||
-rw-r--r-- | fs/f2fs/acl.h | 5 | ||||
-rw-r--r-- | fs/f2fs/checkpoint.c | 186 | ||||
-rw-r--r-- | fs/f2fs/data.c | 166 | ||||
-rw-r--r-- | fs/f2fs/debug.c | 15 | ||||
-rw-r--r-- | fs/f2fs/dir.c | 308 | ||||
-rw-r--r-- | fs/f2fs/f2fs.h | 176 | ||||
-rw-r--r-- | fs/f2fs/file.c | 212 | ||||
-rw-r--r-- | fs/f2fs/gc.c | 89 | ||||
-rw-r--r-- | fs/f2fs/gc.h | 5 | ||||
-rw-r--r-- | fs/f2fs/inline.c | 482 | ||||
-rw-r--r-- | fs/f2fs/inode.c | 44 | ||||
-rw-r--r-- | fs/f2fs/namei.c | 58 | ||||
-rw-r--r-- | fs/f2fs/node.c | 163 | ||||
-rw-r--r-- | fs/f2fs/node.h | 8 | ||||
-rw-r--r-- | fs/f2fs/recovery.c | 14 | ||||
-rw-r--r-- | fs/f2fs/segment.c | 122 | ||||
-rw-r--r-- | fs/f2fs/segment.h | 8 | ||||
-rw-r--r-- | fs/f2fs/super.c | 29 | ||||
-rw-r--r-- | fs/f2fs/xattr.c | 6 | ||||
-rw-r--r-- | fs/f2fs/xattr.h | 6 | ||||
-rw-r--r-- | include/linux/f2fs_fs.h | 27 |
23 files changed, 1578 insertions, 706 deletions
diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index 2cca5a2..e0950c4 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -122,6 +122,10 @@ disable_ext_identify Disable the extension list configured by mkfs, so f2fs inline_xattr Enable the inline xattrs feature. inline_data Enable the inline data feature: New created small(<~3.4k) files can be written into inode block. +inline_dentry Enable the inline dir feature: data in new created + directory entries can be written into inode block. The + space of inode block which is used to store inline + dentries is limited to ~3.4k. flush_merge Merge concurrent cache_flush commands as much as possible to eliminate redundant command issues. If the underlying device handles the cache_flush command relatively slowly, @@ -131,6 +135,9 @@ nobarrier This option can be used if underlying storage guarantees If this option is set, no cache_flush commands are issued but f2fs still guarantees the write ordering of all the data writes. +fastboot This option is used when a system wants to reduce mount + time as much as possible, even though normal performance + can be sacrificed. ================================================================================ DEBUGFS ENTRIES diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 83b9b5a..1ccb26b 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -162,7 +162,8 @@ fail: return ERR_PTR(-EINVAL); } -struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, + struct page *dpage) { int name_index = F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT; void *value = NULL; @@ -172,12 +173,13 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) if (type == ACL_TYPE_ACCESS) name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; - retval = f2fs_getxattr(inode, name_index, "", NULL, 0); + retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage); if (retval > 0) { value = kmalloc(retval, GFP_F2FS_ZERO); if (!value) return ERR_PTR(-ENOMEM); - retval = f2fs_getxattr(inode, name_index, "", value, retval); + retval = f2fs_getxattr(inode, name_index, "", value, + retval, dpage); } if (retval > 0) @@ -194,6 +196,11 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) return acl; } +struct posix_acl *f2fs_get_acl(struct inode *inode, int type) +{ + return __f2fs_get_acl(inode, type, NULL); +} + static int __f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl, struct page *ipage) { @@ -229,7 +236,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (acl) { value = f2fs_acl_to_disk(acl, &size); if (IS_ERR(value)) { - cond_clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(fi, FI_ACL_MODE); return (int)PTR_ERR(value); } } @@ -240,7 +247,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, if (!error) set_cached_acl(inode, type, acl); - cond_clear_inode_flag(fi, FI_ACL_MODE); + clear_inode_flag(fi, FI_ACL_MODE); return error; } @@ -249,12 +256,137 @@ int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type) return __f2fs_set_acl(inode, type, acl, NULL); } -int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage) +/* + * Most part of f2fs_acl_clone, f2fs_acl_create_masq, f2fs_acl_create + * are copied from posix_acl.c + */ +static struct posix_acl *f2fs_acl_clone(const struct posix_acl *acl, + gfp_t flags) +{ + struct posix_acl *clone = NULL; + + if (acl) { + int size = sizeof(struct posix_acl) + acl->a_count * + sizeof(struct posix_acl_entry); + clone = kmemdup(acl, size, flags); + if (clone) + atomic_set(&clone->a_refcount, 1); + } + return clone; +} + +static int f2fs_acl_create_masq(struct posix_acl *acl, umode_t *mode_p) +{ + struct posix_acl_entry *pa, *pe; + struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; + umode_t mode = *mode_p; + int not_equiv = 0; + + /* assert(atomic_read(acl->a_refcount) == 1); */ + + FOREACH_ACL_ENTRY(pa, acl, pe) { + switch(pa->e_tag) { + case ACL_USER_OBJ: + pa->e_perm &= (mode >> 6) | ~S_IRWXO; + mode &= (pa->e_perm << 6) | ~S_IRWXU; + break; + + case ACL_USER: + case ACL_GROUP: + not_equiv = 1; + break; + + case ACL_GROUP_OBJ: + group_obj = pa; + break; + + case ACL_OTHER: + pa->e_perm &= mode | ~S_IRWXO; + mode &= pa->e_perm | ~S_IRWXO; + break; + + case ACL_MASK: + mask_obj = pa; + not_equiv = 1; + break; + + default: + return -EIO; + } + } + + if (mask_obj) { + mask_obj->e_perm &= (mode >> 3) | ~S_IRWXO; + mode &= (mask_obj->e_perm << 3) | ~S_IRWXG; + } else { + if (!group_obj) + return -EIO; + group_obj->e_perm &= (mode >> 3) | ~S_IRWXO; + mode &= (group_obj->e_perm << 3) | ~S_IRWXG; + } + + *mode_p = (*mode_p & ~S_IRWXUGO) | mode; + return not_equiv; +} + +static int f2fs_acl_create(struct inode *dir, umode_t *mode, + struct posix_acl **default_acl, struct posix_acl **acl, + struct page *dpage) +{ + struct posix_acl *p; + int ret; + + if (S_ISLNK(*mode) || !IS_POSIXACL(dir)) + goto no_acl; + + p = __f2fs_get_acl(dir, ACL_TYPE_DEFAULT, dpage); + if (IS_ERR(p)) { + if (p == ERR_PTR(-EOPNOTSUPP)) + goto apply_umask; + return PTR_ERR(p); + } + + if (!p) + goto apply_umask; + + *acl = f2fs_acl_clone(p, GFP_NOFS); + if (!*acl) + return -ENOMEM; + + ret = f2fs_acl_create_masq(*acl, mode); + if (ret < 0) { + posix_acl_release(*acl); + return -ENOMEM; + } + + if (ret == 0) { + posix_acl_release(*acl); + *acl = NULL; + } + + if (!S_ISDIR(*mode)) { + posix_acl_release(p); + *default_acl = NULL; + } else { + *default_acl = p; + } + return 0; + +apply_umask: + *mode &= ~current_umask(); +no_acl: + *default_acl = NULL; + *acl = NULL; + return 0; +} + +int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage, + struct page *dpage) { - struct posix_acl *default_acl, *acl; + struct posix_acl *default_acl = NULL, *acl = NULL; int error = 0; - error = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); + error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage); if (error) return error; diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index e086465..997ca8e 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -38,14 +38,15 @@ struct f2fs_acl_header { extern struct posix_acl *f2fs_get_acl(struct inode *, int); extern int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type); -extern int f2fs_init_acl(struct inode *, struct inode *, struct page *); +extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, + struct page *); #else #define f2fs_check_acl NULL #define f2fs_get_acl NULL #define f2fs_set_acl NULL static inline int f2fs_init_acl(struct inode *inode, struct inode *dir, - struct page *page) + struct page *ipage, struct page *dpage) { return 0; } diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index dd10a03..e6c271f 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -72,36 +72,36 @@ out: return page; } -struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index) -{ - bool readahead = false; - struct page *page; - - page = find_get_page(META_MAPPING(sbi), index); - if (!page || (page && !PageUptodate(page))) - readahead = true; - f2fs_put_page(page, 0); - - if (readahead) - ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); - return get_meta_page(sbi, index); -} - -static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type) +static inline bool is_valid_blkaddr(struct f2fs_sb_info *sbi, + block_t blkaddr, int type) { switch (type) { case META_NAT: - return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK; + break; case META_SIT: - return SIT_BLK_CNT(sbi); + if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) + return false; + break; case META_SSA: + if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || + blkaddr < SM_I(sbi)->ssa_blkaddr)) + return false; + break; case META_CP: - return 0; + if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || + blkaddr < __start_cp_addr(sbi))) + return false; + break; case META_POR: - return MAX_BLKADDR(sbi); + if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || + blkaddr < MAIN_BLKADDR(sbi))) + return false; + break; default: BUG(); } + + return true; } /* @@ -112,7 +112,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type block_t prev_blk_addr = 0; struct page *page; block_t blkno = start; - block_t max_blks = get_max_meta_blks(sbi, type); struct f2fs_io_info fio = { .type = META, @@ -122,18 +121,20 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type for (; nrpages-- > 0; blkno++) { block_t blk_addr; + if (!is_valid_blkaddr(sbi, blkno, type)) + goto out; + switch (type) { case META_NAT: - /* get nat block addr */ - if (unlikely(blkno >= max_blks)) + if (unlikely(blkno >= + NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid))) blkno = 0; + /* get nat block addr */ blk_addr = current_nat_addr(sbi, blkno * NAT_ENTRY_PER_BLOCK); break; case META_SIT: /* get sit block addr */ - if (unlikely(blkno >= max_blks)) - goto out; blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); if (blkno != start && prev_blk_addr + 1 != blk_addr) @@ -143,10 +144,6 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type case META_SSA: case META_CP: case META_POR: - if (unlikely(blkno >= max_blks)) - goto out; - if (unlikely(blkno < SEG0_BLKADDR(sbi))) - goto out; blk_addr = blkno; break; default: @@ -169,6 +166,20 @@ out: return blkno - start; } +void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index) +{ + struct page *page; + bool readahead = false; + + page = find_get_page(META_MAPPING(sbi), index); + if (!page || (page && !PageUptodate(page))) + readahead = true; + f2fs_put_page(page, 0); + + if (readahead) + ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR); +} + static int f2fs_write_meta_page(struct page *page, struct writeback_control *wbc) { @@ -178,7 +189,7 @@ static int f2fs_write_meta_page(struct page *page, if (unlikely(sbi->por_doing)) goto redirty_out; - if (wbc->for_reclaim) + if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0)) goto redirty_out; if (unlikely(f2fs_cp_error(sbi))) goto redirty_out; @@ -187,6 +198,9 @@ static int f2fs_write_meta_page(struct page *page, write_meta_page(sbi, page); dec_page_count(sbi, F2FS_DIRTY_META); unlock_page(page); + + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, META, WRITE); return 0; redirty_out: @@ -298,46 +312,57 @@ const struct address_space_operations f2fs_meta_aops = { static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { + struct inode_management *im = &sbi->im[type]; struct ino_entry *e; retry: - spin_lock(&sbi->ino_lock[type]); + if (radix_tree_preload(GFP_NOFS)) { + cond_resched(); + goto retry; + } + + spin_lock(&im->ino_lock); - e = radix_tree_lookup(&sbi->ino_root[type], ino); + e = radix_tree_lookup(&im->ino_root, ino); if (!e) { e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC); if (!e) { - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); + radix_tree_preload_end(); goto retry; } - if (radix_tree_insert(&sbi->ino_root[type], ino, e)) { - spin_unlock(&sbi->ino_lock[type]); + if (radix_tree_insert(&im->ino_root, ino, e)) { + spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); + radix_tree_preload_end(); goto retry; } memset(e, 0, sizeof(struct ino_entry)); e->ino = ino; - list_add_tail(&e->list, &sbi->ino_list[type]); + list_add_tail(&e->list, &im->ino_list); + if (type != ORPHAN_INO) + im->ino_num++; } - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); + radix_tree_preload_end(); } static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) { + struct inode_management *im = &sbi->im[type]; struct ino_entry *e; - spin_lock(&sbi->ino_lock[type]); - e = radix_tree_lookup(&sbi->ino_root[type], ino); + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); if (e) { list_del(&e->list); - radix_tree_delete(&sbi->ino_root[type], ino); - if (type == ORPHAN_INO) - sbi->n_orphans--; - spin_unlock(&sbi->ino_lock[type]); + radix_tree_delete(&im->ino_root, ino); + im->ino_num--; + spin_unlock(&im->ino_lock); kmem_cache_free(ino_entry_slab, e); return; } - spin_unlock(&sbi->ino_lock[type]); + spin_unlock(&im->ino_lock); } void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) @@ -355,10 +380,12 @@ void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type) /* mode should be APPEND_INO or UPDATE_INO */ bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode) { + struct inode_management *im = &sbi->im[mode]; struct ino_entry *e; - spin_lock(&sbi->ino_lock[mode]); - e = radix_tree_lookup(&sbi->ino_root[mode], ino); - spin_unlock(&sbi->ino_lock[mode]); + + spin_lock(&im->ino_lock); + e = radix_tree_lookup(&im->ino_root, ino); + spin_unlock(&im->ino_lock); return e ? true : false; } @@ -368,36 +395,42 @@ void release_dirty_inode(struct f2fs_sb_info *sbi) int i; for (i = APPEND_INO; i <= UPDATE_INO; i++) { - spin_lock(&sbi->ino_lock[i]); - list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) { + struct inode_management *im = &sbi->im[i]; + + spin_lock(&im->ino_lock); + list_for_each_entry_safe(e, tmp, &im->ino_list, list) { list_del(&e->list); - radix_tree_delete(&sbi->ino_root[i], e->ino); + radix_tree_delete(&im->ino_root, e->ino); kmem_cache_free(ino_entry_slab, e); + im->ino_num--; } - spin_unlock(&sbi->ino_lock[i]); + spin_unlock(&im->ino_lock); } } int acquire_orphan_inode(struct f2fs_sb_info *sbi) { + struct inode_management *im = &sbi->im[ORPHAN_INO]; int err = 0; - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - if (unlikely(sbi->n_orphans >= sbi->max_orphans)) + spin_lock(&im->ino_lock); + if (unlikely(im->ino_num >= sbi->max_orphans)) err = -ENOSPC; else - sbi->n_orphans++; - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + im->ino_num++; + spin_unlock(&im->ino_lock); return err; } void release_orphan_inode(struct f2fs_sb_info *sbi) { - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - f2fs_bug_on(sbi, sbi->n_orphans == 0); - sbi->n_orphans--; - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + struct inode_management *im = &sbi->im[ORPHAN_INO]; + + spin_lock(&im->ino_lock); + f2fs_bug_on(sbi, im->ino_num == 0); + im->ino_num--; + spin_unlock(&im->ino_lock); } void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) @@ -460,17 +493,19 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) struct f2fs_orphan_block *orphan_blk = NULL; unsigned int nentries = 0; unsigned short index; - unsigned short orphan_blocks = - (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans); + unsigned short orphan_blocks; struct page *page = NULL; struct ino_entry *orphan = NULL; + struct inode_management *im = &sbi->im[ORPHAN_INO]; + + orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num); for (index = 0; index < orphan_blocks; index++) grab_meta_page(sbi, start_blk + index); index = 1; - spin_lock(&sbi->ino_lock[ORPHAN_INO]); - head = &sbi->ino_list[ORPHAN_INO]; + spin_lock(&im->ino_lock); + head = &im->ino_list; /* loop for each orphan inode entry and write them in Jornal block */ list_for_each_entry(orphan, head, list) { @@ -510,7 +545,7 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) f2fs_put_page(page, 1); } - spin_unlock(&sbi->ino_lock[ORPHAN_INO]); + spin_unlock(&im->ino_lock); } static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, @@ -731,6 +766,9 @@ void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) struct dir_inode_entry *entry; struct inode *inode; retry: + if (unlikely(f2fs_cp_error(sbi))) + return; + spin_lock(&sbi->dir_inode_lock); head = &sbi->dir_inode_list; @@ -830,6 +868,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); struct f2fs_nm_info *nm_i = NM_I(sbi); + unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; nid_t last_nid = nm_i->next_scan_nid; block_t start_blk; struct page *cp_page; @@ -889,7 +928,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) else clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); - orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans); + orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + orphan_blocks); @@ -905,7 +944,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) orphan_blocks); } - if (sbi->n_orphans) + if (orphan_num) set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); else clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); @@ -940,7 +979,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_put_page(cp_page, 1); } - if (sbi->n_orphans) { + if (orphan_num) { write_orphan_inodes(sbi, start_blk); start_blk += orphan_blocks; } @@ -975,6 +1014,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* Here, we only have one bio having CP pack */ sync_meta_pages(sbi, META_FLUSH, LONG_MAX); + /* wait for previous submitted meta pages writeback */ + wait_on_all_pages_writeback(sbi); + release_dirty_inode(sbi); if (unlikely(f2fs_cp_error(sbi))) @@ -1036,9 +1078,12 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) int i; for (i = 0; i < MAX_INO_ENTRY; i++) { - INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC); - spin_lock_init(&sbi->ino_lock[i]); - INIT_LIST_HEAD(&sbi->ino_list[i]); + struct inode_management *im = &sbi->im[i]; + + INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC); + spin_lock_init(&im->ino_lock); + INIT_LIST_HEAD(&im->ino_list); + im->ino_num = 0; } /* @@ -1047,7 +1092,6 @@ void init_ino_entry_info(struct f2fs_sb_info *sbi) * orphan entries with the limitation one reserved segment * for cp pack we can have max 1020*504 orphan entries */ - sbi->n_orphans = 0; sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS - NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8e58c4c..7ec697b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -61,11 +61,6 @@ static void f2fs_write_end_io(struct bio *bio, int err) dec_page_count(sbi, F2FS_WRITEBACK); } - if (sbi->wait_io) { - complete(sbi->wait_io); - sbi->wait_io = NULL; - } - if (!get_pages(sbi, F2FS_WRITEBACK) && !list_empty(&sbi->cp_wait.task_list)) wake_up(&sbi->cp_wait); @@ -95,34 +90,18 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, static void __submit_merged_bio(struct f2fs_bio_info *io) { struct f2fs_io_info *fio = &io->fio; - int rw; if (!io->bio) return; - rw = fio->rw; - - if (is_read_io(rw)) { - trace_f2fs_submit_read_bio(io->sbi->sb, rw, - fio->type, io->bio); - submit_bio(rw, io->bio); - } else { - trace_f2fs_submit_write_bio(io->sbi->sb, rw, - fio->type, io->bio); - /* - * META_FLUSH is only from the checkpoint procedure, and we - * should wait this metadata bio for FS consistency. - */ - if (fio->type == META_FLUSH) { - DECLARE_COMPLETION_ONSTACK(wait); - io->sbi->wait_io = &wait; - submit_bio(rw, io->bio); - wait_for_completion(&wait); - } else { - submit_bio(rw, io->bio); - } - } + if (is_read_io(fio->rw)) + trace_f2fs_submit_read_bio(io->sbi->sb, fio->rw, + fio->type, io->bio); + else + trace_f2fs_submit_write_bio(io->sbi->sb, fio->rw, + fio->type, io->bio); + submit_bio(fio->rw, io->bio); io->bio = NULL; } @@ -257,9 +236,6 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) bool need_put = dn->inode_page ? false : true; int err; - /* if inode_page exists, index should be zero */ - f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index); - err = get_dnode_of_data(dn, index, ALLOC_NODE); if (err) return err; @@ -740,14 +716,14 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, static int f2fs_read_data_page(struct file *file, struct page *page) { struct inode *inode = page->mapping->host; - int ret; + int ret = -EAGAIN; trace_f2fs_readpage(page, DATA); /* If the file has inline data, try to read it directly */ if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); - else + if (ret == -EAGAIN) ret = mpage_readpage(page, get_data_block); return ret; @@ -859,10 +835,11 @@ write: else if (has_not_enough_free_secs(sbi, 0)) goto redirty_out; + err = -EAGAIN; f2fs_lock_op(sbi); - if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) - err = f2fs_write_inline_data(inode, page, offset); - else + if (f2fs_has_inline_data(inode)) + err = f2fs_write_inline_data(inode, page); + if (err == -EAGAIN) err = do_write_data_page(page, &fio); f2fs_unlock_op(sbi); done: @@ -951,7 +928,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - struct page *page; + struct page *page, *ipage; pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT; struct dnode_of_data dn; int err = 0; @@ -959,45 +936,60 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping, trace_f2fs_write_begin(inode, pos, len, flags); f2fs_balance_fs(sbi); -repeat: - err = f2fs_convert_inline_data(inode, pos + len, NULL); - if (err) - goto fail; + /* + * We should check this at this moment to avoid deadlock on inode page + * and #0 page. The locking rule for inline_data conversion should be: + * lock_page(page #0) -> lock_page(inode_page) + */ + if (index != 0) { + err = f2fs_convert_inline_inode(inode); + if (err) + goto fail; + } +repeat: page = grab_cache_page_write_begin(mapping, index, flags); if (!page) { err = -ENOMEM; goto fail; } - /* to avoid latency during memory pressure */ - unlock_page(page); - *pagep = page; - if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA) - goto inline_data; - f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); - err = f2fs_reserve_block(&dn, index); - f2fs_unlock_op(sbi); - if (err) { - f2fs_put_page(page, 0); - goto fail; - } -inline_data: - lock_page(page); - if (unlikely(page->mapping != mapping)) { - f2fs_put_page(page, 1); - goto repeat; + + /* check inline_data */ + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto unlock_fail; } - f2fs_wait_on_page_writeback(page, DATA); + set_new_dnode(&dn, inode, ipage, ipage, 0); + + if (f2fs_has_inline_data(inode)) { + if (pos + len <= MAX_INLINE_DATA) { + read_inline_data(page, ipage); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); + goto put_next; + } + err = f2fs_convert_inline_page(&dn, page); + if (err) + goto put_fail; + } + err = f2fs_reserve_block(&dn, index); + if (err) + goto put_fail; +put_next: + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) return 0; + f2fs_wait_on_page_writeback(page, DATA); + if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) { unsigned start = pos & (PAGE_CACHE_SIZE - 1); unsigned end = start + len; @@ -1010,18 +1002,10 @@ inline_data: if (dn.data_blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_CACHE_SIZE); } else { - if (f2fs_has_inline_data(inode)) { - err = f2fs_read_inline_data(inode, page); - if (err) { - page_cache_release(page); - goto fail; - } - } else { - err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, - READ_SYNC); - if (err) - goto fail; - } + err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, + READ_SYNC); + if (err) + goto fail; lock_page(page); if (unlikely(!PageUptodate(page))) { @@ -1038,6 +1022,12 @@ out: SetPageUptodate(page); clear_cold_data(page); return 0; + +put_fail: + f2fs_put_dnode(&dn); +unlock_fail: + f2fs_unlock_op(sbi); + f2fs_put_page(page, 1); fail: f2fs_write_failed(mapping, pos + len); return err; @@ -1052,10 +1042,7 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); - if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) - register_inmem_page(inode, page); - else - set_page_dirty(page); + set_page_dirty(page); if (pos + copied > i_size_read(inode)) { i_size_write(inode, pos + copied); @@ -1093,9 +1080,12 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_iter_count(iter); int err; - /* Let buffer I/O handle the inline data case. */ - if (f2fs_has_inline_data(inode)) - return 0; + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } if (check_direct_IO(inode, rw, iter, offset)) return 0; @@ -1119,6 +1109,9 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE) return; + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + invalidate_inmem_page(inode, page); + if (PageDirty(page)) inode_dec_dirty_pages(inode); ClearPagePrivate(page); @@ -1138,6 +1131,12 @@ static int f2fs_set_data_page_dirty(struct page *page) trace_f2fs_set_page_dirty(page, DATA); SetPageUptodate(page); + + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) { + register_inmem_page(inode, page); + return 1; + } + mark_inode_dirty(inode); if (!PageDirty(page)) { @@ -1152,9 +1151,12 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) { struct inode *inode = mapping->host; - if (f2fs_has_inline_data(inode)) - return 0; - + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } return generic_block_bmap(mapping, block, get_data_block); } diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0a91ab8..91e8f69 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -39,13 +39,15 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_dirs = sbi->n_dirty_dirs; si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); + si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); si->valid_count = valid_user_blocks(sbi); si->valid_node_count = valid_node_count(sbi); si->valid_inode_count = valid_inode_count(sbi); - si->inline_inode = sbi->inline_inode; + si->inline_inode = atomic_read(&sbi->inline_inode); + si->inline_dir = atomic_read(&sbi->inline_dir); si->utilization = utilization(sbi); si->free_segs = free_segments(sbi); @@ -118,6 +120,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi) { struct f2fs_stat_info *si = F2FS_STAT(sbi); unsigned npages; + int i; if (si->base_mem) goto get_cache; @@ -167,8 +170,9 @@ get_cache: si->cache_mem += npages << PAGE_CACHE_SHIFT; npages = META_MAPPING(sbi)->nrpages; si->cache_mem += npages << PAGE_CACHE_SHIFT; - si->cache_mem += sbi->n_orphans * sizeof(struct ino_entry); si->cache_mem += sbi->n_dirty_dirs * sizeof(struct dir_inode_entry); + for (i = 0; i <= UPDATE_INO; i++) + si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry); } static int stat_show(struct seq_file *s, void *v) @@ -200,6 +204,8 @@ static int stat_show(struct seq_file *s, void *v) si->valid_count - si->valid_node_count); seq_printf(s, " - Inline_data Inode: %u\n", si->inline_inode); + seq_printf(s, " - Inline_dentry Inode: %u\n", + si->inline_dir); seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n", si->main_area_segs, si->main_area_sections, si->main_area_zones); @@ -244,6 +250,8 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, "\nExtent Hit Ratio: %d / %d\n", si->hit_ext, si->total_ext); seq_puts(s, "\nBalancing F2FS Async:\n"); + seq_printf(s, " - inmem: %4d\n", + si->inmem_pages); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d\n", @@ -321,6 +329,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) si->sbi = sbi; sbi->stat_info = si; + atomic_set(&sbi->inline_inode, 0); + atomic_set(&sbi->inline_dir, 0); + mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); mutex_unlock(&f2fs_stat_mutex); diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b54f871..b1a7d57 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -37,7 +37,7 @@ static unsigned int bucket_blocks(unsigned int level) return 4; } -static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { +unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { [F2FS_FT_UNKNOWN] = DT_UNKNOWN, [F2FS_FT_REG_FILE] = DT_REG, [F2FS_FT_DIR] = DT_DIR, @@ -59,7 +59,7 @@ static unsigned char f2fs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFLNK >> S_SHIFT] = F2FS_FT_SYMLINK, }; -static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) +void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) { umode_t mode = inode->i_mode; de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; @@ -90,51 +90,70 @@ static bool early_match_name(size_t namelen, f2fs_hash_t namehash, } static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, - struct qstr *name, int *max_slots, - f2fs_hash_t namehash, struct page **res_page) + struct qstr *name, int *max_slots, + struct page **res_page) +{ + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; + + dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); + + make_dentry_ptr(&d, (void *)dentry_blk, 1); + de = find_target_dentry(name, max_slots, &d); + + if (de) + *res_page = dentry_page; + else + kunmap(dentry_page); + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs has occurred. + */ + f2fs_bug_on(F2FS_P_SB(dentry_page), d.max < 0); + return de; +} + +struct f2fs_dir_entry *find_target_dentry(struct qstr *name, int *max_slots, + struct f2fs_dentry_ptr *d) { struct f2fs_dir_entry *de; unsigned long bit_pos = 0; - struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); - const void *dentry_bits = &dentry_blk->dentry_bitmap; + f2fs_hash_t namehash = f2fs_dentry_hash(name); int max_len = 0; - while (bit_pos < NR_DENTRY_IN_BLOCK) { - if (!test_bit_le(bit_pos, dentry_bits)) { + if (max_slots) + *max_slots = 0; + while (bit_pos < d->max) { + if (!test_bit_le(bit_pos, d->bitmap)) { if (bit_pos == 0) max_len = 1; - else if (!test_bit_le(bit_pos - 1, dentry_bits)) + else if (!test_bit_le(bit_pos - 1, d->bitmap)) max_len++; bit_pos++; continue; } - de = &dentry_blk->dentry[bit_pos]; - if (early_match_name(name->len, namehash, de)) { - if (!memcmp(dentry_blk->filename[bit_pos], - name->name, - name->len)) { - *res_page = dentry_page; - goto found; - } - } - if (max_len > *max_slots) { + de = &d->dentry[bit_pos]; + if (early_match_name(name->len, namehash, de) && + !memcmp(d->filename[bit_pos], name->name, name->len)) + goto found; + + if (max_slots && *max_slots >= 0 && max_len > *max_slots) { *max_slots = max_len; max_len = 0; } - /* - * For the most part, it should be a bug when name_len is zero. - * We stop here for figuring out where the bugs has occurred. - */ - f2fs_bug_on(F2FS_P_SB(dentry_page), !de->name_len); + /* remain bug on condition */ + if (unlikely(!de->name_len)) + d->max = -1; bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); } de = NULL; - kunmap(dentry_page); found: - if (max_len > *max_slots) + if (max_slots && max_len > *max_slots) *max_slots = max_len; return de; } @@ -149,7 +168,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, struct page *dentry_page; struct f2fs_dir_entry *de = NULL; bool room = false; - int max_slots = 0; + int max_slots; f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH); @@ -168,8 +187,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, continue; } - de = find_in_block(dentry_page, name, &max_slots, - namehash, res_page); + de = find_in_block(dentry_page, name, &max_slots, res_page); if (de) break; @@ -201,6 +219,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, unsigned int max_depth; unsigned int level; + if (f2fs_has_inline_dentry(dir)) + return find_in_inline_dir(dir, child, res_page); + if (npages == 0) return NULL; @@ -227,6 +248,9 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) struct f2fs_dir_entry *de; struct f2fs_dentry_block *dentry_blk; + if (f2fs_has_inline_dentry(dir)) + return f2fs_parent_inline_dir(dir, p); + page = get_lock_data_page(dir, 0); if (IS_ERR(page)) return NULL; @@ -247,7 +271,7 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) de = f2fs_find_entry(dir, qstr, &page); if (de) { res = le32_to_cpu(de->ino); - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); } @@ -257,11 +281,13 @@ ino_t f2fs_inode_by_name(struct inode *dir, struct qstr *qstr) void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode) { + enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA; lock_page(page); - f2fs_wait_on_page_writeback(page, DATA); + f2fs_wait_on_page_writeback(page, type); de->ino = cpu_to_le32(inode->i_ino); set_de_type(de, inode); - kunmap(page); + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); set_page_dirty(page); dir->i_mtime = dir->i_ctime = CURRENT_TIME; mark_inode_dirty(dir); @@ -296,36 +322,48 @@ int update_dent_inode(struct inode *inode, const struct qstr *name) return 0; } -static int make_empty_dir(struct inode *inode, - struct inode *parent, struct page *page) +void do_make_empty_dir(struct inode *inode, struct inode *parent, + struct f2fs_dentry_ptr *d) { - struct page *dentry_page; - struct f2fs_dentry_block *dentry_blk; struct f2fs_dir_entry *de; - dentry_page = get_new_data_page(inode, page, 0, true); - if (IS_ERR(dentry_page)) - return PTR_ERR(dentry_page); - - - dentry_blk = kmap_atomic(dentry_page); - - de = &dentry_blk->dentry[0]; + de = &d->dentry[0]; de->name_len = cpu_to_le16(1); de->hash_code = 0; de->ino = cpu_to_le32(inode->i_ino); - memcpy(dentry_blk->filename[0], ".", 1); + memcpy(d->filename[0], ".", 1); set_de_type(de, inode); - de = &dentry_blk->dentry[1]; + de = &d->dentry[1]; de->hash_code = 0; de->name_len = cpu_to_le16(2); de->ino = cpu_to_le32(parent->i_ino); - memcpy(dentry_blk->filename[1], "..", 2); + memcpy(d->filename[1], "..", 2); set_de_type(de, inode); - test_and_set_bit_le(0, &dentry_blk->dentry_bitmap); - test_and_set_bit_le(1, &dentry_blk->dentry_bitmap); + test_and_set_bit_le(0, (void *)d->bitmap); + test_and_set_bit_le(1, (void *)d->bitmap); +} + +static int make_empty_dir(struct inode *inode, + struct inode *parent, struct page *page) +{ + struct page *dentry_page; + struct f2fs_dentry_block *dentry_blk; + struct f2fs_dentry_ptr d; + + if (f2fs_has_inline_dentry(inode)) + return make_empty_inline_dir(inode, parent, page); + + dentry_page = get_new_data_page(inode, page, 0, true); + if (IS_ERR(dentry_page)) + return PTR_ERR(dentry_page); + + dentry_blk = kmap_atomic(dentry_page); + + make_dentry_ptr(&d, (void *)dentry_blk, 1); + do_make_empty_dir(inode, parent, &d); + kunmap_atomic(dentry_blk); set_page_dirty(dentry_page); @@ -333,8 +371,8 @@ static int make_empty_dir(struct inode *inode, return 0; } -static struct page *init_inode_metadata(struct inode *inode, - struct inode *dir, const struct qstr *name) +struct page *init_inode_metadata(struct inode *inode, struct inode *dir, + const struct qstr *name, struct page *dpage) { struct page *page; int err; @@ -350,7 +388,7 @@ static struct page *init_inode_metadata(struct inode *inode, goto error; } - err = f2fs_init_acl(inode, dir, page); + err = f2fs_init_acl(inode, dir, page, dpage); if (err) goto put_error; @@ -395,7 +433,7 @@ error: return ERR_PTR(err); } -static void update_parent_metadata(struct inode *dir, struct inode *inode, +void update_parent_metadata(struct inode *dir, struct inode *inode, unsigned int current_depth) { if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { @@ -417,27 +455,23 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode, clear_inode_flag(F2FS_I(inode), FI_INC_LINK); } -static int room_for_filename(struct f2fs_dentry_block *dentry_blk, int slots) +int room_for_filename(const void *bitmap, int slots, int max_slots) { int bit_start = 0; int zero_start, zero_end; next: - zero_start = find_next_zero_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_start); - if (zero_start >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; + zero_start = find_next_zero_bit_le(bitmap, max_slots, bit_start); + if (zero_start >= max_slots) + return max_slots; - zero_end = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - zero_start); + zero_end = find_next_bit_le(bitmap, max_slots, zero_start); if (zero_end - zero_start >= slots) return zero_start; bit_start = zero_end + 1; - if (zero_end + 1 >= NR_DENTRY_IN_BLOCK) - return NR_DENTRY_IN_BLOCK; + if (zero_end + 1 >= max_slots) + return max_slots; goto next; } @@ -463,6 +497,14 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, int err = 0; int i; + if (f2fs_has_inline_dentry(dir)) { + err = f2fs_add_inline_entry(dir, name, inode); + if (!err || err != -EAGAIN) + return err; + else + err = 0; + } + dentry_hash = f2fs_dentry_hash(name); level = 0; current_depth = F2FS_I(dir)->i_current_depth; @@ -491,7 +533,8 @@ start: return PTR_ERR(dentry_page); dentry_blk = kmap(dentry_page); - bit_pos = room_for_filename(dentry_blk, slots); + bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + slots, NR_DENTRY_IN_BLOCK); if (bit_pos < NR_DENTRY_IN_BLOCK) goto add_dentry; @@ -506,7 +549,7 @@ add_dentry: f2fs_wait_on_page_writeback(dentry_page, DATA); down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name); + page = init_inode_metadata(inode, dir, name, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -545,7 +588,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) int err = 0; down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, NULL); + page = init_inode_metadata(inode, dir, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -560,26 +603,57 @@ fail: return err; } +void f2fs_drop_nlink(struct inode *dir, struct inode *inode, struct page *page) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + + down_write(&F2FS_I(inode)->i_sem); + + if (S_ISDIR(inode->i_mode)) { + drop_nlink(dir); + if (page) + update_inode(dir, page); + else + update_inode_page(dir); + } + inode->i_ctime = CURRENT_TIME; + + drop_nlink(inode); + if (S_ISDIR(inode->i_mode)) { + drop_nlink(inode); + i_size_write(inode, 0); + } + up_write(&F2FS_I(inode)->i_sem); + update_inode_page(inode); + + if (inode->i_nlink == 0) + add_orphan_inode(sbi, inode->i_ino); + else + release_orphan_inode(sbi); +} + /* * It only removes the dentry from the dentry page, corresponding name * entry in name page does not need to be touched during deletion. */ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, - struct inode *inode) + struct inode *dir, struct inode *inode) { struct f2fs_dentry_block *dentry_blk; unsigned int bit_pos; - struct inode *dir = page->mapping->host; int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); int i; + if (f2fs_has_inline_dentry(dir)) + return f2fs_delete_inline_entry(dentry, page, dir, inode); + lock_page(page); f2fs_wait_on_page_writeback(page, DATA); dentry_blk = page_address(page); bit_pos = dentry - dentry_blk->dentry; for (i = 0; i < slots; i++) - test_and_clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + clear_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); /* Let's check and deallocate this dentry page */ bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, @@ -590,29 +664,8 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, dir->i_ctime = dir->i_mtime = CURRENT_TIME; - if (inode) { - struct f2fs_sb_info *sbi = F2FS_I_SB(dir); - - down_write(&F2FS_I(inode)->i_sem); - - if (S_ISDIR(inode->i_mode)) { - drop_nlink(dir); - update_inode_page(dir); - } - inode->i_ctime = CURRENT_TIME; - drop_nlink(inode); - if (S_ISDIR(inode->i_mode)) { - drop_nlink(inode); - i_size_write(inode, 0); - } - up_write(&F2FS_I(inode)->i_sem); - update_inode_page(inode); - - if (inode->i_nlink == 0) - add_orphan_inode(sbi, inode->i_ino); - else - release_orphan_inode(sbi); - } + if (inode) + f2fs_drop_nlink(dir, inode, NULL); if (bit_pos == NR_DENTRY_IN_BLOCK) { truncate_hole(dir, page->index, page->index + 1); @@ -628,9 +681,12 @@ bool f2fs_empty_dir(struct inode *dir) unsigned long bidx; struct page *dentry_page; unsigned int bit_pos; - struct f2fs_dentry_block *dentry_blk; + struct f2fs_dentry_block *dentry_blk; unsigned long nblock = dir_blocks(dir); + if (f2fs_has_inline_dentry(dir)) + return f2fs_empty_inline_dir(dir); + for (bidx = 0; bidx < nblock; bidx++) { dentry_page = get_lock_data_page(dir, bidx); if (IS_ERR(dentry_page)) { @@ -640,7 +696,6 @@ bool f2fs_empty_dir(struct inode *dir) return false; } - dentry_blk = kmap_atomic(dentry_page); if (bidx == 0) bit_pos = 2; @@ -659,19 +714,48 @@ bool f2fs_empty_dir(struct inode *dir) return true; } +bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, + unsigned int start_pos) +{ + unsigned char d_type = DT_UNKNOWN; + unsigned int bit_pos; + struct f2fs_dir_entry *de = NULL; + + bit_pos = ((unsigned long)ctx->pos % d->max); + + while (bit_pos < d->max) { + bit_pos = find_next_bit_le(d->bitmap, d->max, bit_pos); + if (bit_pos >= d->max) + break; + + de = &d->dentry[bit_pos]; + if (de->file_type < F2FS_FT_MAX) + d_type = f2fs_filetype_table[de->file_type]; + else + d_type = DT_UNKNOWN; + if (!dir_emit(ctx, d->filename[bit_pos], + le16_to_cpu(de->name_len), + le32_to_cpu(de->ino), d_type)) + return true; + + bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); + ctx->pos = start_pos + bit_pos; + } + return false; +} + static int f2fs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); unsigned long npages = dir_blocks(inode); - unsigned int bit_pos = 0; struct f2fs_dentry_block *dentry_blk = NULL; - struct f2fs_dir_entry *de = NULL; struct page *dentry_page = NULL; struct file_ra_state *ra = &file->f_ra; unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); - unsigned char d_type = DT_UNKNOWN; + struct f2fs_dentry_ptr d; - bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); + if (f2fs_has_inline_dentry(inode)) + return f2fs_read_inline_dir(file, ctx); /* readahead for multi pages of dir */ if (npages - n > 1 && !ra_has_index(ra, n)) @@ -684,28 +768,12 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) continue; dentry_blk = kmap(dentry_page); - while (bit_pos < NR_DENTRY_IN_BLOCK) { - bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, - NR_DENTRY_IN_BLOCK, - bit_pos); - if (bit_pos >= NR_DENTRY_IN_BLOCK) - break; - - de = &dentry_blk->dentry[bit_pos]; - if (de->file_type < F2FS_FT_MAX) - d_type = f2fs_filetype_table[de->file_type]; - else - d_type = DT_UNKNOWN; - if (!dir_emit(ctx, - dentry_blk->filename[bit_pos], - le16_to_cpu(de->name_len), - le32_to_cpu(de->ino), d_type)) - goto stop; - bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); - ctx->pos = n * NR_DENTRY_IN_BLOCK + bit_pos; - } - bit_pos = 0; + make_dentry_ptr(&d, (void *)dentry_blk, 1); + + if (f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK)) + goto stop; + ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; kunmap(dentry_page); f2fs_put_page(dentry_page, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8171e80..ec58bb2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -46,8 +46,10 @@ #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 #define F2FS_MOUNT_INLINE_XATTR 0x00000080 #define F2FS_MOUNT_INLINE_DATA 0x00000100 -#define F2FS_MOUNT_FLUSH_MERGE 0x00000200 -#define F2FS_MOUNT_NOBARRIER 0x00000400 +#define F2FS_MOUNT_INLINE_DENTRY 0x00000200 +#define F2FS_MOUNT_FLUSH_MERGE 0x00000400 +#define F2FS_MOUNT_NOBARRIER 0x00000800 +#define F2FS_MOUNT_FASTBOOT 0x00001000 #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) @@ -211,6 +213,32 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size, /* * For INODE and NODE manager */ +/* for directory operations */ +struct f2fs_dentry_ptr { + const void *bitmap; + struct f2fs_dir_entry *dentry; + __u8 (*filename)[F2FS_SLOT_LEN]; + int max; +}; + +static inline void make_dentry_ptr(struct f2fs_dentry_ptr *d, + void *src, int type) +{ + if (type == 1) { + struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; + d->max = NR_DENTRY_IN_BLOCK; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; + } else { + struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src; + d->max = NR_INLINE_DENTRY; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; + } +} + /* * XATTR_NODE_OFFSET stores xattrs to one node block per file keeping -1 * as its node offset to distinguish from index node blocks. @@ -269,6 +297,7 @@ struct f2fs_inode_info { struct extent_info ext; /* in-memory extent cache entry */ struct dir_inode_entry *dirty_dir; /* the pointer of dirty dir */ + struct radix_tree_root inmem_root; /* radix tree for inmem pages */ struct list_head inmem_pages; /* inmemory pages managed by f2fs */ struct mutex inmem_lock; /* lock for inmemory pages */ }; @@ -303,7 +332,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - rwlock_t nat_tree_lock; /* protect nat_tree_lock */ + struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ @@ -433,6 +462,7 @@ enum count_type { F2FS_DIRTY_DENTS, F2FS_DIRTY_NODES, F2FS_DIRTY_META, + F2FS_INMEM_PAGES, NR_COUNT_TYPE, }; @@ -470,6 +500,14 @@ struct f2fs_bio_info { struct rw_semaphore io_rwsem; /* blocking op for bio */ }; +/* for inner inode cache management */ +struct inode_management { + struct radix_tree_root ino_root; /* ino entry array */ + spinlock_t ino_lock; /* for ino entry lock */ + struct list_head ino_list; /* inode list head */ + unsigned long ino_num; /* number of entries */ +}; + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -488,7 +526,6 @@ struct f2fs_sb_info { /* for bio operations */ struct f2fs_bio_info read_io; /* for read bios */ struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ - struct completion *wait_io; /* for completion bios */ /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ @@ -500,13 +537,9 @@ struct f2fs_sb_info { bool por_doing; /* recovery is doing or not */ wait_queue_head_t cp_wait; - /* for inode management */ - struct radix_tree_root ino_root[MAX_INO_ENTRY]; /* ino entry array */ - spinlock_t ino_lock[MAX_INO_ENTRY]; /* for ino entry lock */ - struct list_head ino_list[MAX_INO_ENTRY]; /* inode list head */ + struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ /* for orphan inode, use 0'th array */ - unsigned int n_orphans; /* # of orphan inodes */ unsigned int max_orphans; /* max orphan inodes */ /* for directory inode management */ @@ -557,7 +590,8 @@ struct f2fs_sb_info { unsigned int segment_count[2]; /* # of allocated segments */ unsigned int block_count[2]; /* # of allocated blocks */ int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ - int inline_inode; /* # of inline_data inodes */ + atomic_t inline_inode; /* # of inline_data inodes */ + atomic_t inline_dir; /* # of inline_dentry inodes */ int bg_gc; /* background gc calls */ unsigned int n_dirty_dirs; /* # of dir inodes */ #endif @@ -988,6 +1022,13 @@ retry: return entry; } +static inline void f2fs_radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) +{ + while (radix_tree_insert(root, index, item)) + cond_resched(); +} + #define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) static inline bool IS_INODE(struct page *page) @@ -1020,7 +1061,7 @@ static inline int f2fs_test_bit(unsigned int nr, char *addr) return mask & *addr; } -static inline int f2fs_set_bit(unsigned int nr, char *addr) +static inline int f2fs_test_and_set_bit(unsigned int nr, char *addr) { int mask; int ret; @@ -1032,7 +1073,7 @@ static inline int f2fs_set_bit(unsigned int nr, char *addr) return ret; } -static inline int f2fs_clear_bit(unsigned int nr, char *addr) +static inline int f2fs_test_and_clear_bit(unsigned int nr, char *addr) { int mask; int ret; @@ -1044,6 +1085,15 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) return ret; } +static inline void f2fs_change_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + *addr ^= mask; +} + /* used for f2fs_inode_info->flags */ enum { FI_NEW_INODE, /* indicate newly allocated inode */ @@ -1057,11 +1107,13 @@ enum { FI_NO_EXTENT, /* not to use the extent cache */ FI_INLINE_XATTR, /* used for inline xattr */ FI_INLINE_DATA, /* used for inline data*/ + FI_INLINE_DENTRY, /* used for inline dentry */ FI_APPEND_WRITE, /* inode has appended data */ FI_UPDATE_WRITE, /* inode has in-place-update data */ FI_NEED_IPU, /* used for ipu per file */ FI_ATOMIC_FILE, /* indicate atomic file */ FI_VOLATILE_FILE, /* indicate volatile file */ + FI_DATA_EXIST, /* indicate data exists */ }; static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) @@ -1087,15 +1139,6 @@ static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) set_inode_flag(fi, FI_ACL_MODE); } -static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) -{ - if (is_inode_flag_set(fi, FI_ACL_MODE)) { - clear_inode_flag(fi, FI_ACL_MODE); - return 1; - } - return 0; -} - static inline void get_inline_info(struct f2fs_inode_info *fi, struct f2fs_inode *ri) { @@ -1103,6 +1146,10 @@ static inline void get_inline_info(struct f2fs_inode_info *fi, set_inode_flag(fi, FI_INLINE_XATTR); if (ri->i_inline & F2FS_INLINE_DATA) set_inode_flag(fi, FI_INLINE_DATA); + if (ri->i_inline & F2FS_INLINE_DENTRY) + set_inode_flag(fi, FI_INLINE_DENTRY); + if (ri->i_inline & F2FS_DATA_EXIST) + set_inode_flag(fi, FI_DATA_EXIST); } static inline void set_raw_inline(struct f2fs_inode_info *fi, @@ -1114,6 +1161,10 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, ri->i_inline |= F2FS_INLINE_XATTR; if (is_inode_flag_set(fi, FI_INLINE_DATA)) ri->i_inline |= F2FS_INLINE_DATA; + if (is_inode_flag_set(fi, FI_INLINE_DENTRY)) + ri->i_inline |= F2FS_INLINE_DENTRY; + if (is_inode_flag_set(fi, FI_DATA_EXIST)) + ri->i_inline |= F2FS_DATA_EXIST; } static inline int f2fs_has_inline_xattr(struct inode *inode) @@ -1148,6 +1199,17 @@ static inline int f2fs_has_inline_data(struct inode *inode) return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA); } +static inline void f2fs_clear_inline_inode(struct inode *inode) +{ + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST); +} + +static inline int f2fs_exist_data(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST); +} + static inline bool f2fs_is_atomic_file(struct inode *inode) { return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE); @@ -1164,6 +1226,23 @@ static inline void *inline_data_addr(struct page *page) return (void *)&(ri->i_addr[1]); } +static inline int f2fs_has_inline_dentry(struct inode *inode) +{ + return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY); +} + +static inline void *inline_dentry_addr(struct page *page) +{ + struct f2fs_inode *ri = F2FS_INODE(page); + return (void *)&(ri->i_addr[1]); +} + +static inline void f2fs_dentry_kunmap(struct inode *dir, struct page *page) +{ + if (!f2fs_has_inline_dentry(dir)) + kunmap(page); +} + static inline int f2fs_readonly(struct super_block *sb) { return sb->s_flags & MS_RDONLY; @@ -1224,6 +1303,19 @@ struct dentry *f2fs_get_parent(struct dentry *child); /* * dir.c */ +extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; +void set_de_type(struct f2fs_dir_entry *, struct inode *); +struct f2fs_dir_entry *find_target_dentry(struct qstr *, int *, + struct f2fs_dentry_ptr *); +bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, + unsigned int); +void do_make_empty_dir(struct inode *, struct inode *, + struct f2fs_dentry_ptr *); +struct page *init_inode_metadata(struct inode *, struct inode *, + const struct qstr *, struct page *); +void update_parent_metadata(struct inode *, struct inode *, unsigned int); +int room_for_filename(const void *, int, int); +void f2fs_drop_nlink(struct inode *, struct inode *, struct page *); struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, struct page **); struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); @@ -1232,7 +1324,8 @@ void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, struct page *, struct inode *); int update_dent_inode(struct inode *, const struct qstr *); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); -void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, + struct inode *); int f2fs_do_tmpfile(struct inode *, struct inode *); int f2fs_make_empty(struct inode *, struct inode *); bool f2fs_empty_dir(struct inode *); @@ -1296,6 +1389,7 @@ void destroy_node_manager_caches(void); * segment.c */ void register_inmem_page(struct inode *, struct page *); +void invalidate_inmem_page(struct inode *, struct page *); void commit_inmem_pages(struct inode *, bool); void f2fs_balance_fs(struct f2fs_sb_info *); void f2fs_balance_fs_bg(struct f2fs_sb_info *); @@ -1337,8 +1431,8 @@ void destroy_segment_manager_caches(void); */ struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); -struct page *get_meta_page_ra(struct f2fs_sb_info *, pgoff_t); int ra_meta_pages(struct f2fs_sb_info *, block_t, int, int); +void ra_meta_pages_cond(struct f2fs_sb_info *, pgoff_t); long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); void add_dirty_inode(struct f2fs_sb_info *, nid_t, int type); void remove_dirty_inode(struct f2fs_sb_info *, nid_t, int type); @@ -1405,7 +1499,7 @@ struct f2fs_stat_info { int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; int nats, sits, fnids; int total_count, utilization; - int bg_gc, inline_inode; + int bg_gc, inline_inode, inline_dir, inmem_pages; unsigned int valid_count, valid_node_count, valid_inode_count; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; @@ -1438,14 +1532,23 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_I_SB(inode))->inline_inode++); \ + (atomic_inc(&F2FS_I_SB(inode)->inline_inode)); \ } while (0) #define stat_dec_inline_inode(inode) \ do { \ if (f2fs_has_inline_data(inode)) \ - ((F2FS_I_SB(inode))->inline_inode--); \ + (atomic_dec(&F2FS_I_SB(inode)->inline_inode)); \ + } while (0) +#define stat_inc_inline_dir(inode) \ + do { \ + if (f2fs_has_inline_dentry(inode)) \ + (atomic_inc(&F2FS_I_SB(inode)->inline_dir)); \ + } while (0) +#define stat_dec_inline_dir(inode) \ + do { \ + if (f2fs_has_inline_dentry(inode)) \ + (atomic_dec(&F2FS_I_SB(inode)->inline_dir)); \ } while (0) - #define stat_inc_seg_type(sbi, curseg) \ ((sbi)->segment_count[(curseg)->alloc_type]++) #define stat_inc_block_count(sbi, curseg) \ @@ -1492,6 +1595,8 @@ void f2fs_destroy_root_stats(void); #define stat_inc_read_hit(sb) #define stat_inc_inline_inode(inode) #define stat_dec_inline_inode(inode) +#define stat_inc_inline_dir(inode) +#define stat_dec_inline_dir(inode) #define stat_inc_seg_type(sbi, curseg) #define stat_inc_block_count(sbi, curseg) #define stat_inc_seg_count(si, type) @@ -1519,9 +1624,20 @@ extern const struct inode_operations f2fs_special_inode_operations; * inline.c */ bool f2fs_may_inline(struct inode *); +void read_inline_data(struct page *, struct page *); int f2fs_read_inline_data(struct inode *, struct page *); -int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *); -int f2fs_write_inline_data(struct inode *, struct page *, unsigned int); -void truncate_inline_data(struct inode *, u64); +int f2fs_convert_inline_page(struct dnode_of_data *, struct page *); +int f2fs_convert_inline_inode(struct inode *); +int f2fs_write_inline_data(struct inode *, struct page *); +void truncate_inline_data(struct page *, u64); bool recover_inline_data(struct inode *, struct page *); +struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *, + struct page **); +struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **); +int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); +int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *); +void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, + struct inode *, struct inode *); +bool f2fs_empty_inline_dir(struct inode *); +int f2fs_read_inline_dir(struct file *, struct dir_context *); #endif diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 8e68bb6..3c27e0e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -41,18 +41,18 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, sb_start_pagefault(inode->i_sb); - /* force to convert with normal data indices */ - err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page); - if (err) - goto out; + f2fs_bug_on(sbi, f2fs_has_inline_data(inode)); /* block allocation */ f2fs_lock_op(sbi); set_new_dnode(&dn, inode, NULL, NULL, 0); err = f2fs_reserve_block(&dn, page->index); - f2fs_unlock_op(sbi); - if (err) + if (err) { + f2fs_unlock_op(sbi); goto out; + } + f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); file_update_time(vma->vm_file); lock_page(page); @@ -130,10 +130,45 @@ static inline bool need_do_checkpoint(struct inode *inode) need_cp = true; else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) need_cp = true; + else if (test_opt(sbi, FASTBOOT)) + need_cp = true; + else if (sbi->active_logs == 2) + need_cp = true; return need_cp; } +static bool need_inode_page_update(struct f2fs_sb_info *sbi, nid_t ino) +{ + struct page *i = find_get_page(NODE_MAPPING(sbi), ino); + bool ret = false; + /* But we need to avoid that there are some inode updates */ + if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) + ret = true; + f2fs_put_page(i, 0); + return ret; +} + +static void try_to_fix_pino(struct inode *inode) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + nid_t pino; + + down_write(&fi->i_sem); + fi->xattr_ver = 0; + if (file_wrong_pino(inode) && inode->i_nlink == 1 && + get_parent_ino(inode, &pino)) { + fi->i_pino = pino; + file_got_pino(inode); + up_write(&fi->i_sem); + + mark_inode_dirty_sync(inode); + f2fs_write_inode(inode, NULL); + } else { + up_write(&fi->i_sem); + } +} + int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -164,19 +199,21 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) return ret; } + /* if the inode is dirty, let's recover all the time */ + if (!datasync && is_inode_flag_set(fi, FI_DIRTY_INODE)) { + update_inode_page(inode); + goto go_write; + } + /* * if there is no written data, don't waste time to write recovery info. */ if (!is_inode_flag_set(fi, FI_APPEND_WRITE) && !exist_written_data(sbi, ino, APPEND_INO)) { - struct page *i = find_get_page(NODE_MAPPING(sbi), ino); - /* But we need to avoid that there are some inode updates */ - if ((i && PageDirty(i)) || need_inode_block_update(sbi, ino)) { - f2fs_put_page(i, 0); + /* it may call write_inode just prior to fsync */ + if (need_inode_page_update(sbi, ino)) goto go_write; - } - f2fs_put_page(i, 0); if (is_inode_flag_set(fi, FI_UPDATE_WRITE) || exist_written_data(sbi, ino, UPDATE_INO)) @@ -196,49 +233,36 @@ go_write: up_read(&fi->i_sem); if (need_cp) { - nid_t pino; - /* all the dirty node pages should be flushed for POR */ ret = f2fs_sync_fs(inode->i_sb, 1); - down_write(&fi->i_sem); - F2FS_I(inode)->xattr_ver = 0; - if (file_wrong_pino(inode) && inode->i_nlink == 1 && - get_parent_ino(inode, &pino)) { - F2FS_I(inode)->i_pino = pino; - file_got_pino(inode); - up_write(&fi->i_sem); - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - } else { - up_write(&fi->i_sem); - } - } else { + /* + * We've secured consistency through sync_fs. Following pino + * will be used only for fsynced inodes after checkpoint. + */ + try_to_fix_pino(inode); + goto out; + } sync_nodes: - sync_node_pages(sbi, ino, &wbc); - - if (need_inode_block_update(sbi, ino)) { - mark_inode_dirty_sync(inode); - ret = f2fs_write_inode(inode, NULL); - if (ret) - goto out; - goto sync_nodes; - } + sync_node_pages(sbi, ino, &wbc); - ret = wait_on_node_pages_writeback(sbi, ino); - if (ret) - goto out; + if (need_inode_block_update(sbi, ino)) { + mark_inode_dirty_sync(inode); + f2fs_write_inode(inode, NULL); + goto sync_nodes; + } + + ret = wait_on_node_pages_writeback(sbi, ino); + if (ret) + goto out; - /* once recovery info is written, don't need to tack this */ - remove_dirty_inode(sbi, ino, APPEND_INO); - clear_inode_flag(fi, FI_APPEND_WRITE); + /* once recovery info is written, don't need to tack this */ + remove_dirty_inode(sbi, ino, APPEND_INO); + clear_inode_flag(fi, FI_APPEND_WRITE); flush_out: - remove_dirty_inode(sbi, ino, UPDATE_INO); - clear_inode_flag(fi, FI_UPDATE_WRITE); - ret = f2fs_issue_flush(F2FS_I_SB(inode)); - } + remove_dirty_inode(sbi, ino, UPDATE_INO); + clear_inode_flag(fi, FI_UPDATE_WRITE); + ret = f2fs_issue_flush(sbi); out: trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); return ret; @@ -296,7 +320,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence) goto fail; /* handle inline data case */ - if (f2fs_has_inline_data(inode)) { + if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) { if (whence == SEEK_HOLE) data_ofs = isize; goto found; @@ -374,6 +398,15 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence) static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) { + struct inode *inode = file_inode(file); + + /* we don't need to use inline_data strictly */ + if (f2fs_has_inline_data(inode)) { + int err = f2fs_convert_inline_inode(inode); + if (err) + return err; + } + file_accessed(file); vma->vm_ops = &f2fs_file_vm_ops; return 0; @@ -415,20 +448,17 @@ void truncate_data_blocks(struct dnode_of_data *dn) truncate_data_blocks_range(dn, ADDRS_PER_BLOCK); } -static void truncate_partial_data_page(struct inode *inode, u64 from) +static int truncate_partial_data_page(struct inode *inode, u64 from) { unsigned offset = from & (PAGE_CACHE_SIZE - 1); struct page *page; - if (f2fs_has_inline_data(inode)) - return truncate_inline_data(inode, from); - if (!offset) - return; + return 0; page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false); if (IS_ERR(page)) - return; + return 0; lock_page(page); if (unlikely(!PageUptodate(page) || @@ -438,9 +468,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) f2fs_wait_on_page_writeback(page, DATA); zero_user(page, offset, PAGE_CACHE_SIZE - offset); set_page_dirty(page); - out: f2fs_put_page(page, 1); + return 0; } int truncate_blocks(struct inode *inode, u64 from, bool lock) @@ -450,27 +480,33 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) struct dnode_of_data dn; pgoff_t free_from; int count = 0, err = 0; + struct page *ipage; trace_f2fs_truncate_blocks_enter(inode, from); - if (f2fs_has_inline_data(inode)) - goto done; - free_from = (pgoff_t) - ((from + blocksize - 1) >> (sbi->log_blocksize)); + ((from + blocksize - 1) >> (sbi->log_blocksize)); if (lock) f2fs_lock_op(sbi); - set_new_dnode(&dn, inode, NULL, NULL, 0); + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; + } + + if (f2fs_has_inline_data(inode)) { + f2fs_put_page(ipage, 1); + goto out; + } + + set_new_dnode(&dn, inode, ipage, NULL, 0); err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE); if (err) { if (err == -ENOENT) goto free_next; - if (lock) - f2fs_unlock_op(sbi); - trace_f2fs_truncate_blocks_exit(inode, err); - return err; + goto out; } count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); @@ -486,11 +522,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) f2fs_put_dnode(&dn); free_next: err = truncate_inode_blocks(inode, free_from); +out: if (lock) f2fs_unlock_op(sbi); -done: + /* lastly zero out the first data page */ - truncate_partial_data_page(inode, from); + if (!err) + err = truncate_partial_data_page(inode, from); trace_f2fs_truncate_blocks_exit(inode, err); return err; @@ -504,6 +542,12 @@ void f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); + /* we should check inline_data size */ + if (f2fs_has_inline_data(inode) && !f2fs_may_inline(inode)) { + if (f2fs_convert_inline_inode(inode)) + return; + } + if (!truncate_blocks(inode, i_size_read(inode), true)) { inode->i_mtime = inode->i_ctime = CURRENT_TIME; mark_inode_dirty(inode); @@ -561,10 +605,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (attr->ia_valid & ATTR_SIZE) { - err = f2fs_convert_inline_data(inode, attr->ia_size, NULL); - if (err) - return err; - if (attr->ia_size != i_size_read(inode)) { truncate_setsize(inode, attr->ia_size); f2fs_truncate(inode); @@ -665,9 +705,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len) if (offset >= inode->i_size) return ret; - ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); - if (ret) - return ret; + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -721,9 +763,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset, if (ret) return ret; - ret = f2fs_convert_inline_data(inode, offset + len, NULL); - if (ret) - return ret; + if (f2fs_has_inline_data(inode)) { + ret = f2fs_convert_inline_inode(inode); + if (ret) + return ret; + } pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT; pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT; @@ -874,7 +918,15 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE); - return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL); + return f2fs_convert_inline_inode(inode); +} + +static int f2fs_release_file(struct inode *inode, struct file *filp) +{ + /* some remained atomic pages should discarded */ + if (f2fs_is_atomic_file(inode) || f2fs_is_volatile_file(inode)) + commit_inmem_pages(inode, true); + return 0; } static int f2fs_ioc_commit_atomic_write(struct file *filp) @@ -908,7 +960,8 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) return -EACCES; set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE); - return 0; + + return f2fs_convert_inline_inode(inode); } static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg) @@ -985,6 +1038,7 @@ const struct file_operations f2fs_file_operations = { .read_iter = generic_file_read_iter, .write_iter = generic_file_write_iter, .open = generic_file_open, + .release = f2fs_release_file, .mmap = f2fs_file_mmap, .fsync = f2fs_sync_file, .fallocate = f2fs_fallocate, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 2a8f4ac..eec0933 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -96,8 +96,6 @@ int start_gc_thread(struct f2fs_sb_info *sbi) dev_t dev = sbi->sb->s_bdev->bd_dev; int err = 0; - if (!test_opt(sbi, BG_GC)) - goto out; gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); if (!gc_th) { err = -ENOMEM; @@ -340,34 +338,39 @@ static const struct victim_selection default_v_ops = { .get_victim = get_victim_by_default, }; -static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) +static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino) { struct inode_entry *ie; - list_for_each_entry(ie, ilist, list) - if (ie->inode->i_ino == ino) - return ie->inode; + ie = radix_tree_lookup(&gc_list->iroot, ino); + if (ie) + return ie->inode; return NULL; } -static void add_gc_inode(struct inode *inode, struct list_head *ilist) +static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode) { struct inode_entry *new_ie; - if (inode == find_gc_inode(inode->i_ino, ilist)) { + if (inode == find_gc_inode(gc_list, inode->i_ino)) { iput(inode); return; } - new_ie = f2fs_kmem_cache_alloc(winode_slab, GFP_NOFS); new_ie->inode = inode; - list_add_tail(&new_ie->list, ilist); +retry: + if (radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie)) { + cond_resched(); + goto retry; + } + list_add_tail(&new_ie->list, &gc_list->ilist); } -static void put_gc_inode(struct list_head *ilist) +static void put_gc_inode(struct gc_inode_list *gc_list) { struct inode_entry *ie, *next_ie; - list_for_each_entry_safe(ie, next_ie, ilist, list) { + list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) { + radix_tree_delete(&gc_list->iroot, ie->inode->i_ino); iput(ie->inode); list_del(&ie->list); kmem_cache_free(winode_slab, ie); @@ -553,7 +556,7 @@ out: * the victim data block is ignored. */ static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, - struct list_head *ilist, unsigned int segno, int gc_type) + struct gc_inode_list *gc_list, unsigned int segno, int gc_type) { struct super_block *sb = sbi->sb; struct f2fs_summary *entry; @@ -605,27 +608,27 @@ next_step: data_page = find_data_page(inode, start_bidx + ofs_in_node, false); - if (IS_ERR(data_page)) - goto next_iput; + if (IS_ERR(data_page)) { + iput(inode); + continue; + } f2fs_put_page(data_page, 0); - add_gc_inode(inode, ilist); - } else { - inode = find_gc_inode(dni.ino, ilist); - if (inode) { - start_bidx = start_bidx_of_node(nofs, - F2FS_I(inode)); - data_page = get_lock_data_page(inode, + add_gc_inode(gc_list, inode); + continue; + } + + /* phase 3 */ + inode = find_gc_inode(gc_list, dni.ino); + if (inode) { + start_bidx = start_bidx_of_node(nofs, F2FS_I(inode)); + data_page = get_lock_data_page(inode, start_bidx + ofs_in_node); - if (IS_ERR(data_page)) - continue; - move_data_page(inode, data_page, gc_type); - stat_inc_data_blk_count(sbi, 1); - } + if (IS_ERR(data_page)) + continue; + move_data_page(inode, data_page, gc_type); + stat_inc_data_blk_count(sbi, 1); } - continue; -next_iput: - iput(inode); } if (++phase < 4) @@ -646,18 +649,20 @@ next_iput: } static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim, - int gc_type, int type) + int gc_type) { struct sit_info *sit_i = SIT_I(sbi); int ret; + mutex_lock(&sit_i->sentry_lock); - ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, type, LFS); + ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type, + NO_CHECK_TYPE, LFS); mutex_unlock(&sit_i->sentry_lock); return ret; } static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, - struct list_head *ilist, int gc_type) + struct gc_inode_list *gc_list, int gc_type) { struct page *sum_page; struct f2fs_summary_block *sum; @@ -675,7 +680,7 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, gc_node_segment(sbi, sum->entries, segno, gc_type); break; case SUM_TYPE_DATA: - gc_data_segment(sbi, sum->entries, ilist, segno, gc_type); + gc_data_segment(sbi, sum->entries, gc_list, segno, gc_type); break; } blk_finish_plug(&plug); @@ -688,16 +693,18 @@ static void do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno, int f2fs_gc(struct f2fs_sb_info *sbi) { - struct list_head ilist; unsigned int segno, i; int gc_type = BG_GC; int nfree = 0; int ret = -1; - struct cp_control cpc = { - .reason = CP_SYNC, + struct cp_control cpc; + struct gc_inode_list gc_list = { + .ilist = LIST_HEAD_INIT(gc_list.ilist), + .iroot = RADIX_TREE_INIT(GFP_NOFS), }; - INIT_LIST_HEAD(&ilist); + cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; + gc_more: if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) goto stop; @@ -709,7 +716,7 @@ gc_more: write_checkpoint(sbi, &cpc); } - if (!__get_victim(sbi, &segno, gc_type, NO_CHECK_TYPE)) + if (!__get_victim(sbi, &segno, gc_type)) goto stop; ret = 0; @@ -719,7 +726,7 @@ gc_more: META_SSA); for (i = 0; i < sbi->segs_per_sec; i++) - do_garbage_collect(sbi, segno + i, &ilist, gc_type); + do_garbage_collect(sbi, segno + i, &gc_list, gc_type); if (gc_type == FG_GC) { sbi->cur_victim_sec = NULL_SEGNO; @@ -735,7 +742,7 @@ gc_more: stop: mutex_unlock(&sbi->gc_mutex); - put_gc_inode(&ilist); + put_gc_inode(&gc_list); return ret; } diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 16f0b2b..6ff7ad3 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -40,6 +40,11 @@ struct inode_entry { struct inode *inode; }; +struct gc_inode_list { + struct list_head ilist; + struct radix_tree_root iroot; +}; + /* * inline functions */ diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 88036fd..f2d3c58 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -15,35 +15,44 @@ bool f2fs_may_inline(struct inode *inode) { - block_t nr_blocks; - loff_t i_size; - if (!test_opt(F2FS_I_SB(inode), INLINE_DATA)) return false; if (f2fs_is_atomic_file(inode)) return false; - nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2; - if (inode->i_blocks > nr_blocks) + if (!S_ISREG(inode->i_mode)) return false; - i_size = i_size_read(inode); - if (i_size > MAX_INLINE_DATA) + if (i_size_read(inode) > MAX_INLINE_DATA) return false; return true; } -int f2fs_read_inline_data(struct inode *inode, struct page *page) +void read_inline_data(struct page *page, struct page *ipage) { - struct page *ipage; void *src_addr, *dst_addr; - if (page->index) { - zero_user_segment(page, 0, PAGE_CACHE_SIZE); - goto out; - } + if (PageUptodate(page)) + return; + + f2fs_bug_on(F2FS_P_SB(page), page->index); + + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); + + /* Copy the whole inline data block */ + src_addr = inline_data_addr(ipage); + dst_addr = kmap_atomic(page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + flush_dcache_page(page); + kunmap_atomic(dst_addr); + SetPageUptodate(page); +} + +int f2fs_read_inline_data(struct inode *inode, struct page *page) +{ + struct page *ipage; ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); if (IS_ERR(ipage)) { @@ -51,112 +60,116 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) return PTR_ERR(ipage); } - zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); + if (!f2fs_has_inline_data(inode)) { + f2fs_put_page(ipage, 1); + return -EAGAIN; + } - /* Copy the whole inline data block */ - src_addr = inline_data_addr(ipage); - dst_addr = kmap(page); - memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - kunmap(page); - f2fs_put_page(ipage, 1); + if (page->index) + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + else + read_inline_data(page, ipage); -out: SetPageUptodate(page); + f2fs_put_page(ipage, 1); unlock_page(page); - return 0; } -static int __f2fs_convert_inline_data(struct inode *inode, struct page *page) +int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) { - int err = 0; - struct page *ipage; - struct dnode_of_data dn; void *src_addr, *dst_addr; block_t new_blk_addr; - struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_io_info fio = { .type = DATA, .rw = WRITE_SYNC | REQ_PRIO, }; + int dirty, err; - f2fs_lock_op(sbi); - ipage = get_node_page(sbi, inode->i_ino); - if (IS_ERR(ipage)) { - err = PTR_ERR(ipage); - goto out; - } + f2fs_bug_on(F2FS_I_SB(dn->inode), page->index); - /* someone else converted inline_data already */ - if (!f2fs_has_inline_data(inode)) - goto out; + if (!f2fs_exist_data(dn->inode)) + goto clear_out; - /* - * i_addr[0] is not used for inline data, - * so reserving new block will not destroy inline data - */ - set_new_dnode(&dn, inode, ipage, NULL, 0); - err = f2fs_reserve_block(&dn, 0); + err = f2fs_reserve_block(dn, 0); if (err) - goto out; + return err; f2fs_wait_on_page_writeback(page, DATA); + + if (PageUptodate(page)) + goto no_update; + zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); /* Copy the whole inline data block */ - src_addr = inline_data_addr(ipage); - dst_addr = kmap(page); + src_addr = inline_data_addr(dn->inode_page); + dst_addr = kmap_atomic(page); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); - kunmap(page); + flush_dcache_page(page); + kunmap_atomic(dst_addr); SetPageUptodate(page); +no_update: + /* clear dirty state */ + dirty = clear_page_dirty_for_io(page); /* write data page to try to make data consistent */ set_page_writeback(page); - write_data_page(page, &dn, &new_blk_addr, &fio); - update_extent_cache(new_blk_addr, &dn); + + write_data_page(page, dn, &new_blk_addr, &fio); + update_extent_cache(new_blk_addr, dn); f2fs_wait_on_page_writeback(page, DATA); + if (dirty) + inode_dec_dirty_pages(dn->inode); - /* clear inline data and flag after data writeback */ - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_dec_inline_inode(inode); + /* this converted inline_data should be recovered. */ + set_inode_flag(F2FS_I(dn->inode), FI_APPEND_WRITE); - sync_inode_page(&dn); - f2fs_put_dnode(&dn); -out: - f2fs_unlock_op(sbi); - return err; + /* clear inline data and flag after data writeback */ + truncate_inline_data(dn->inode_page, 0); +clear_out: + stat_dec_inline_inode(dn->inode); + f2fs_clear_inline_inode(dn->inode); + sync_inode_page(dn); + f2fs_put_dnode(dn); + return 0; } -int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size, - struct page *page) +int f2fs_convert_inline_inode(struct inode *inode) { - struct page *new_page = page; - int err; + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct dnode_of_data dn; + struct page *ipage, *page; + int err = 0; - if (!f2fs_has_inline_data(inode)) - return 0; - else if (to_size <= MAX_INLINE_DATA) - return 0; + page = grab_cache_page(inode->i_mapping, 0); + if (!page) + return -ENOMEM; + + f2fs_lock_op(sbi); - if (!page || page->index != 0) { - new_page = grab_cache_page(inode->i_mapping, 0); - if (!new_page) - return -ENOMEM; + ipage = get_node_page(sbi, inode->i_ino); + if (IS_ERR(ipage)) { + err = PTR_ERR(ipage); + goto out; } - err = __f2fs_convert_inline_data(inode, new_page); - if (!page || page->index != 0) - f2fs_put_page(new_page, 1); + set_new_dnode(&dn, inode, ipage, ipage, 0); + + if (f2fs_has_inline_data(inode)) + err = f2fs_convert_inline_page(&dn, page); + + f2fs_put_dnode(&dn); +out: + f2fs_unlock_op(sbi); + + f2fs_put_page(page, 1); return err; } -int f2fs_write_inline_data(struct inode *inode, - struct page *page, unsigned size) +int f2fs_write_inline_data(struct inode *inode, struct page *page) { void *src_addr, *dst_addr; - struct page *ipage; struct dnode_of_data dn; int err; @@ -164,47 +177,39 @@ int f2fs_write_inline_data(struct inode *inode, err = get_dnode_of_data(&dn, 0, LOOKUP_NODE); if (err) return err; - ipage = dn.inode_page; - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - src_addr = kmap(page); - dst_addr = inline_data_addr(ipage); - memcpy(dst_addr, src_addr, size); - kunmap(page); - - /* Release the first data block if it is allocated */ if (!f2fs_has_inline_data(inode)) { - truncate_data_blocks_range(&dn, 1); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); - stat_inc_inline_inode(inode); + f2fs_put_dnode(&dn); + return -EAGAIN; } + f2fs_bug_on(F2FS_I_SB(inode), page->index); + + f2fs_wait_on_page_writeback(dn.inode_page, NODE); + src_addr = kmap_atomic(page); + dst_addr = inline_data_addr(dn.inode_page); + memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + kunmap_atomic(src_addr); + set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + sync_inode_page(&dn); f2fs_put_dnode(&dn); - return 0; } -void truncate_inline_data(struct inode *inode, u64 from) +void truncate_inline_data(struct page *ipage, u64 from) { - struct page *ipage; + void *addr; if (from >= MAX_INLINE_DATA) return; - ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(ipage)) - return; - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET + from, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - set_page_dirty(ipage); - f2fs_put_page(ipage, 1); + addr = inline_data_addr(ipage); + memset(addr + from, 0, MAX_INLINE_DATA - from); } bool recover_inline_data(struct inode *inode, struct page *npage) @@ -236,6 +241,10 @@ process_inline: src_addr = inline_data_addr(npage); dst_addr = inline_data_addr(ipage); memcpy(dst_addr, src_addr, MAX_INLINE_DATA); + + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + update_inode(inode, ipage); f2fs_put_page(ipage, 1); return true; @@ -244,16 +253,279 @@ process_inline: if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - f2fs_wait_on_page_writeback(ipage, NODE); - zero_user_segment(ipage, INLINE_DATA_OFFSET, - INLINE_DATA_OFFSET + MAX_INLINE_DATA); - clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + truncate_inline_data(ipage, 0); + f2fs_clear_inline_inode(inode); update_inode(inode, ipage); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { truncate_blocks(inode, 0, false); - set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); goto process_inline; } return false; } + +struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, + struct qstr *name, struct page **res_page) +{ + struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); + struct f2fs_inline_dentry *inline_dentry; + struct f2fs_dir_entry *de; + struct f2fs_dentry_ptr d; + struct page *ipage; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return NULL; + + inline_dentry = inline_data_addr(ipage); + + make_dentry_ptr(&d, (void *)inline_dentry, 2); + de = find_target_dentry(name, NULL, &d); + + unlock_page(ipage); + if (de) + *res_page = ipage; + else + f2fs_put_page(ipage, 0); + + /* + * For the most part, it should be a bug when name_len is zero. + * We stop here for figuring out where the bugs has occurred. + */ + f2fs_bug_on(sbi, d.max < 0); + return de; +} + +struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *dir, + struct page **p) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + struct f2fs_dir_entry *de; + struct f2fs_inline_dentry *dentry_blk; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return NULL; + + dentry_blk = inline_data_addr(ipage); + de = &dentry_blk->dentry[1]; + *p = ipage; + unlock_page(ipage); + return de; +} + +int make_empty_inline_dir(struct inode *inode, struct inode *parent, + struct page *ipage) +{ + struct f2fs_inline_dentry *dentry_blk; + struct f2fs_dentry_ptr d; + + dentry_blk = inline_data_addr(ipage); + + make_dentry_ptr(&d, (void *)dentry_blk, 2); + do_make_empty_dir(inode, parent, &d); + + set_page_dirty(ipage); + + /* update i_size to MAX_INLINE_DATA */ + if (i_size_read(inode) < MAX_INLINE_DATA) { + i_size_write(inode, MAX_INLINE_DATA); + set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); + } + return 0; +} + +static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, + struct f2fs_inline_dentry *inline_dentry) +{ + struct page *page; + struct dnode_of_data dn; + struct f2fs_dentry_block *dentry_blk; + int err; + + page = grab_cache_page(dir->i_mapping, 0); + if (!page) + return -ENOMEM; + + set_new_dnode(&dn, dir, ipage, NULL, 0); + err = f2fs_reserve_block(&dn, 0); + if (err) + goto out; + + f2fs_wait_on_page_writeback(page, DATA); + zero_user_segment(page, 0, PAGE_CACHE_SIZE); + + dentry_blk = kmap_atomic(page); + + /* copy data from inline dentry block to new dentry block */ + memcpy(dentry_blk->dentry_bitmap, inline_dentry->dentry_bitmap, + INLINE_DENTRY_BITMAP_SIZE); + memcpy(dentry_blk->dentry, inline_dentry->dentry, + sizeof(struct f2fs_dir_entry) * NR_INLINE_DENTRY); + memcpy(dentry_blk->filename, inline_dentry->filename, + NR_INLINE_DENTRY * F2FS_SLOT_LEN); + + kunmap_atomic(dentry_blk); + SetPageUptodate(page); + set_page_dirty(page); + + /* clear inline dir and flag after data writeback */ + truncate_inline_data(ipage, 0); + + stat_dec_inline_dir(dir); + clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY); + + if (i_size_read(dir) < PAGE_CACHE_SIZE) { + i_size_write(dir, PAGE_CACHE_SIZE); + set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } + + sync_inode_page(&dn); +out: + f2fs_put_page(page, 1); + return err; +} + +int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, + struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + unsigned int bit_pos; + f2fs_hash_t name_hash; + struct f2fs_dir_entry *de; + size_t namelen = name->len; + struct f2fs_inline_dentry *dentry_blk = NULL; + int slots = GET_DENTRY_SLOTS(namelen); + struct page *page; + int err = 0; + int i; + + name_hash = f2fs_dentry_hash(name); + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + dentry_blk = inline_data_addr(ipage); + bit_pos = room_for_filename(&dentry_blk->dentry_bitmap, + slots, NR_INLINE_DENTRY); + if (bit_pos >= NR_INLINE_DENTRY) { + err = f2fs_convert_inline_dir(dir, ipage, dentry_blk); + if (!err) + err = -EAGAIN; + goto out; + } + + down_write(&F2FS_I(inode)->i_sem); + page = init_inode_metadata(inode, dir, name, ipage); + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto fail; + } + + f2fs_wait_on_page_writeback(ipage, NODE); + de = &dentry_blk->dentry[bit_pos]; + de->hash_code = name_hash; + de->name_len = cpu_to_le16(namelen); + memcpy(dentry_blk->filename[bit_pos], name->name, name->len); + de->ino = cpu_to_le32(inode->i_ino); + set_de_type(de, inode); + for (i = 0; i < slots; i++) + test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); + set_page_dirty(ipage); + + /* we don't need to mark_inode_dirty now */ + F2FS_I(inode)->i_pino = dir->i_ino; + update_inode(inode, page); + f2fs_put_page(page, 1); + + update_parent_metadata(dir, inode, 0); +fail: + up_write(&F2FS_I(inode)->i_sem); + + if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { + update_inode(dir, ipage); + clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); + } +out: + f2fs_put_page(ipage, 1); + return err; +} + +void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, + struct inode *dir, struct inode *inode) +{ + struct f2fs_inline_dentry *inline_dentry; + int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); + unsigned int bit_pos; + int i; + + lock_page(page); + f2fs_wait_on_page_writeback(page, NODE); + + inline_dentry = inline_data_addr(page); + bit_pos = dentry - inline_dentry->dentry; + for (i = 0; i < slots; i++) + test_and_clear_bit_le(bit_pos + i, + &inline_dentry->dentry_bitmap); + + set_page_dirty(page); + + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + + if (inode) + f2fs_drop_nlink(dir, inode, page); + + f2fs_put_page(page, 1); +} + +bool f2fs_empty_inline_dir(struct inode *dir) +{ + struct f2fs_sb_info *sbi = F2FS_I_SB(dir); + struct page *ipage; + unsigned int bit_pos = 2; + struct f2fs_inline_dentry *dentry_blk; + + ipage = get_node_page(sbi, dir->i_ino); + if (IS_ERR(ipage)) + return false; + + dentry_blk = inline_data_addr(ipage); + bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, + NR_INLINE_DENTRY, + bit_pos); + + f2fs_put_page(ipage, 1); + + if (bit_pos < NR_INLINE_DENTRY) + return false; + + return true; +} + +int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx) +{ + struct inode *inode = file_inode(file); + struct f2fs_inline_dentry *inline_dentry = NULL; + struct page *ipage = NULL; + struct f2fs_dentry_ptr d; + + if (ctx->pos == NR_INLINE_DENTRY) + return 0; + + ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino); + if (IS_ERR(ipage)) + return PTR_ERR(ipage); + + inline_dentry = inline_data_addr(ipage); + + make_dentry_ptr(&d, (void *)inline_dentry, 2); + + if (!f2fs_fill_dentries(ctx, &d, 0)) + ctx->pos = NR_INLINE_DENTRY; + + f2fs_put_page(ipage, 1); + return 0; +} diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0deead4..196cc78 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -67,12 +67,38 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri) } } +static int __recover_inline_status(struct inode *inode, struct page *ipage) +{ + void *inline_data = inline_data_addr(ipage); + struct f2fs_inode *ri; + void *zbuf; + + zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS); + if (!zbuf) + return -ENOMEM; + + if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) { + kfree(zbuf); + return 0; + } + kfree(zbuf); + + f2fs_wait_on_page_writeback(ipage, NODE); + set_inode_flag(F2FS_I(inode), FI_DATA_EXIST); + + ri = F2FS_INODE(ipage); + set_raw_inline(F2FS_I(inode), ri); + set_page_dirty(ipage); + return 0; +} + static int do_read_inode(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_inode_info *fi = F2FS_I(inode); struct page *node_page; struct f2fs_inode *ri; + int err = 0; /* Check if ino is within scope */ if (check_nid_range(sbi, inode->i_ino)) { @@ -114,11 +140,19 @@ static int do_read_inode(struct inode *inode) get_extent_info(&fi->ext, ri->i_ext); get_inline_info(fi, ri); + /* check data exist */ + if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode)) + err = __recover_inline_status(inode, node_page); + /* get rdev by using inline_info */ __get_inode_rdev(inode, ri); f2fs_put_page(node_page, 1); - return 0; + + stat_inc_inline_inode(inode); + stat_inc_inline_dir(inode); + + return err; } struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) @@ -156,7 +190,7 @@ make_now: inode->i_op = &f2fs_dir_inode_operations; inode->i_fop = &f2fs_dir_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); + mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); } else if (S_ISLNK(inode->i_mode)) { inode->i_op = &f2fs_symlink_inode_operations; inode->i_mapping->a_ops = &f2fs_dblock_aops; @@ -295,11 +329,12 @@ void f2fs_evict_inode(struct inode *inode) f2fs_lock_op(sbi); remove_inode_page(inode); - stat_dec_inline_inode(inode); f2fs_unlock_op(sbi); sb_end_intwrite(inode->i_sb); no_delete: + stat_dec_inline_dir(inode); + stat_dec_inline_inode(inode); invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); @@ -325,8 +360,9 @@ void handle_failed_inode(struct inode *inode) f2fs_truncate(inode); remove_inode_page(inode); - stat_dec_inline_inode(inode); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); alloc_nid_failed(sbi, inode->i_ino); f2fs_unlock_op(sbi); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 0d2526e..547a2de 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -54,6 +54,12 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode) nid_free = true; goto out; } + + if (f2fs_may_inline(inode)) + set_inode_flag(F2FS_I(inode), FI_INLINE_DATA); + if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode)) + set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY); + trace_f2fs_new_inode(inode, 0); mark_inode_dirty(inode); return inode; @@ -129,8 +135,12 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, alloc_nid_done(sbi, ino); + stat_inc_inline_inode(inode); d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: handle_failed_inode(inode); @@ -157,6 +167,9 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, f2fs_unlock_op(sbi); d_instantiate(dentry, inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: clear_inode_flag(F2FS_I(inode), FI_INC_LINK); @@ -187,14 +200,12 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, de = f2fs_find_entry(dir, &dentry->d_name, &page); if (de) { nid_t ino = le32_to_cpu(de->ino); - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); inode = f2fs_iget(dir->i_sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); - - stat_inc_inline_inode(inode); } return d_splice_alias(inode, dentry); @@ -219,15 +230,18 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry) err = acquire_orphan_inode(sbi); if (err) { f2fs_unlock_op(sbi); - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); goto fail; } - f2fs_delete_entry(de, page, inode); + f2fs_delete_entry(de, page, dir, inode); f2fs_unlock_op(sbi); /* In order to evict this inode, we set it dirty */ mark_inode_dirty(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); fail: trace_f2fs_unlink_exit(inode, err); return err; @@ -261,6 +275,9 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return err; out: handle_failed_inode(inode); @@ -291,11 +308,14 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) goto out_fail; f2fs_unlock_op(sbi); + stat_inc_inline_dir(inode); alloc_nid_done(sbi, inode->i_ino); d_instantiate(dentry, inode); unlock_new_inode(inode); + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out_fail: @@ -338,8 +358,12 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, f2fs_unlock_op(sbi); alloc_nid_done(sbi, inode->i_ino); + d_instantiate(dentry, inode); unlock_new_inode(inode); + + if (IS_DIRSYNC(dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out: handle_failed_inode(inode); @@ -435,7 +459,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_inode->i_ctime = CURRENT_TIME; mark_inode_dirty(old_inode); - f2fs_delete_entry(old_entry, old_page, NULL); + f2fs_delete_entry(old_entry, old_page, old_dir, NULL); if (old_dir_entry) { if (old_dir != new_dir) { @@ -443,7 +467,7 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, old_dir_page, new_dir); update_inode_page(old_inode); } else { - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } drop_nlink(old_dir); @@ -452,19 +476,22 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, } f2fs_unlock_op(sbi); + + if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; put_out_dir: f2fs_unlock_op(sbi); - kunmap(new_page); + f2fs_dentry_kunmap(new_dir, new_page); f2fs_put_page(new_page, 0); out_dir: if (old_dir_entry) { - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } out_old: - kunmap(old_page); + f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; @@ -588,6 +615,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, update_inode_page(new_dir); f2fs_unlock_op(sbi); + + if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) + f2fs_sync_fs(sbi->sb, 1); return 0; out_undo: /* Still we may fail to recover name info of f2fs_inode here */ @@ -596,19 +626,19 @@ out_unlock: f2fs_unlock_op(sbi); out_new_dir: if (new_dir_entry) { - kunmap(new_dir_page); + f2fs_dentry_kunmap(new_inode, new_dir_page); f2fs_put_page(new_dir_page, 0); } out_old_dir: if (old_dir_entry) { - kunmap(old_dir_page); + f2fs_dentry_kunmap(old_inode, old_dir_page); f2fs_put_page(old_dir_page, 0); } out_new: - kunmap(new_page); + f2fs_dentry_kunmap(new_dir, new_page); f2fs_put_page(new_page, 0); out_old: - kunmap(old_page); + f2fs_dentry_kunmap(old_dir, old_page); f2fs_put_page(old_page, 0); out: return err; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 44b8afe..f83326c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -31,22 +31,38 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct sysinfo val; + unsigned long avail_ram; unsigned long mem_size = 0; bool res = false; si_meminfo(&val); - /* give 25%, 25%, 50% memory for each components respectively */ + + /* only uses low memory */ + avail_ram = val.totalram - val.totalhigh; + + /* give 25%, 25%, 50%, 50% memory for each components respectively */ if (type == FREE_NIDS) { - mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> 12; - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> + PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { - mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> 12; - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 2); + mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> + PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == DIRTY_DENTS) { if (sbi->sb->s_bdi->dirty_exceeded) return false; mem_size = get_pages(sbi, F2FS_DIRTY_DENTS); - res = mem_size < ((val.totalram * nm_i->ram_thresh / 100) >> 1); + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); + } else if (type == INO_ENTRIES) { + int i; + + if (sbi->sb->s_bdi->dirty_exceeded) + return false; + for (i = 0; i <= UPDATE_INO; i++) + mem_size += (sbi->im[i].ino_num * + sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT; + res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } return res; } @@ -131,7 +147,7 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, if (get_nat_flag(ne, IS_DIRTY)) return; -retry: + head = radix_tree_lookup(&nm_i->nat_set_root, set); if (!head) { head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_ATOMIC); @@ -140,11 +156,7 @@ retry: INIT_LIST_HEAD(&head->set_list); head->set = set; head->entry_cnt = 0; - - if (radix_tree_insert(&nm_i->nat_set_root, set, head)) { - cond_resched(); - goto retry; - } + f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head); } list_move_tail(&ne->list, &head->entry_list); nm_i->dirty_nat_cnt++; @@ -155,7 +167,7 @@ retry: static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, struct nat_entry *ne) { - nid_t set = ne->ni.nid / NAT_ENTRY_PER_BLOCK; + nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); struct nat_entry_set *head; head = radix_tree_lookup(&nm_i->nat_set_root, set); @@ -180,11 +192,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -194,11 +206,11 @@ bool has_fsynced_inode(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool fsynced = false; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_FSYNCED_INODE)) fsynced = true; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return fsynced; } @@ -208,13 +220,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return need_update; } @@ -222,13 +234,8 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) { struct nat_entry *new; - new = kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); - if (!new) - return NULL; - if (radix_tree_insert(&nm_i->nat_root, nid, new)) { - kmem_cache_free(nat_entry_slab, new); - return NULL; - } + new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_ATOMIC); + f2fs_radix_tree_insert(&nm_i->nat_root, nid, new); memset(new, 0, sizeof(struct nat_entry)); nat_set_nid(new, nid); nat_reset_flag(new); @@ -241,18 +248,14 @@ static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid, struct f2fs_nat_entry *ne) { struct nat_entry *e; -retry: - write_lock(&nm_i->nat_tree_lock); + + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (!e) { e = grab_nat_entry(nm_i, nid); - if (!e) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; - } node_info_from_raw_nat(&e->ni, ne); } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, @@ -260,15 +263,11 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, { struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; -retry: - write_lock(&nm_i->nat_tree_lock); + + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); - if (!e) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; - } e->ni = *ni; f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR); } else if (new_blkaddr == NEW_ADDR) { @@ -310,7 +309,7 @@ retry: set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -320,7 +319,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) if (available_free_memory(sbi, NAT_ENTRIES)) return 0; - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while (nr_shrink && !list_empty(&nm_i->nat_entries)) { struct nat_entry *ne; ne = list_first_entry(&nm_i->nat_entries, @@ -328,7 +327,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) __del_from_nat_cache(nm_i, ne); nr_shrink--; } - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); return nr_shrink; } @@ -351,14 +350,14 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ni->nid = nid; /* Check nat cache */ - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); } - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); if (e) return; @@ -1298,16 +1297,22 @@ static int f2fs_write_node_page(struct page *page, return 0; } - if (wbc->for_reclaim) - goto redirty_out; - - down_read(&sbi->node_write); + if (wbc->for_reclaim) { + if (!down_read_trylock(&sbi->node_write)) + goto redirty_out; + } else { + down_read(&sbi->node_write); + } set_page_writeback(page); write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); dec_page_count(sbi, F2FS_DIRTY_NODES); up_read(&sbi->node_write); unlock_page(page); + + if (wbc->for_reclaim) + f2fs_submit_merged_bio(sbi, NODE, WRITE); + return 0; redirty_out: @@ -1410,13 +1415,13 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) if (build) { /* do not add allocated nids */ - read_lock(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); ne = __lookup_nat_cache(nm_i, nid); if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || nat_get_blkaddr(ne) != NULL_ADDR)) allocated = true; - read_unlock(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); if (allocated) return 0; } @@ -1425,15 +1430,22 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) i->nid = nid; i->state = NID_NEW; + if (radix_tree_preload(GFP_NOFS)) { + kmem_cache_free(free_nid_slab, i); + return 0; + } + spin_lock(&nm_i->free_nid_list_lock); if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) { spin_unlock(&nm_i->free_nid_list_lock); + radix_tree_preload_end(); kmem_cache_free(free_nid_slab, i); return 0; } list_add_tail(&i->list, &nm_i->free_nid_list); nm_i->fcnt++; spin_unlock(&nm_i->free_nid_list_lock); + radix_tree_preload_end(); return 1; } @@ -1804,21 +1816,15 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) nid_t nid = le32_to_cpu(nid_in_journal(sum, i)); raw_ne = nat_in_journal(sum, i); -retry: - write_lock(&nm_i->nat_tree_lock); - ne = __lookup_nat_cache(nm_i, nid); - if (ne) - goto found; - ne = grab_nat_entry(nm_i, nid); + down_write(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); if (!ne) { - write_unlock(&nm_i->nat_tree_lock); - goto retry; + ne = grab_nat_entry(nm_i, nid); + node_info_from_raw_nat(&ne->ni, &raw_ne); } - node_info_from_raw_nat(&ne->ni, &raw_ne); -found: __set_nat_cache_dirty(nm_i, ne); - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } update_nats_in_cursum(sum, -i); mutex_unlock(&curseg->curseg_mutex); @@ -1889,10 +1895,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, } raw_nat_from_node_info(raw_ne, &ne->ni); - write_lock(&NM_I(sbi)->nat_tree_lock); + down_write(&NM_I(sbi)->nat_tree_lock); nat_reset_flag(ne); __clear_nat_cache_dirty(NM_I(sbi), ne); - write_unlock(&NM_I(sbi)->nat_tree_lock); + up_write(&NM_I(sbi)->nat_tree_lock); if (nat_get_blkaddr(ne) == NULL_ADDR) add_free_nid(sbi, nid, false); @@ -1903,10 +1909,10 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, else f2fs_put_page(page, 1); - if (!set->entry_cnt) { - radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); - kmem_cache_free(nat_entry_set_slab, set); - } + f2fs_bug_on(sbi, set->entry_cnt); + + radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); + kmem_cache_free(nat_entry_set_slab, set); } /* @@ -1923,6 +1929,8 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) nid_t set_idx = 0; LIST_HEAD(sets); + if (!nm_i->dirty_nat_cnt) + return; /* * if there are no enough space in journal to store dirty nat * entries, remove all entries from journal and merge them @@ -1931,9 +1939,6 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL)) remove_nats_in_journal(sbi); - if (!nm_i->dirty_nat_cnt) - return; - while ((found = __gang_lookup_nat_set(nm_i, set_idx, NATVEC_SIZE, setvec))) { unsigned idx; @@ -1973,13 +1978,13 @@ static int init_node_manager(struct f2fs_sb_info *sbi) INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); INIT_LIST_HEAD(&nm_i->free_nid_list); - INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); - INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_ATOMIC); + INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); + INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); INIT_LIST_HEAD(&nm_i->nat_entries); mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->free_nid_list_lock); - rwlock_init(&nm_i->nat_tree_lock); + init_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -2035,7 +2040,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ - write_lock(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -2044,7 +2049,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) __del_from_nat_cache(nm_i, natvec[idx]); } f2fs_bug_on(sbi, nm_i->nat_cnt); - write_unlock(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); sbi->nm_info = NULL; @@ -2061,17 +2066,17 @@ int __init create_node_manager_caches(void) free_nid_slab = f2fs_kmem_cache_create("free_nid", sizeof(struct free_nid)); if (!free_nid_slab) - goto destory_nat_entry; + goto destroy_nat_entry; nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set", sizeof(struct nat_entry_set)); if (!nat_entry_set_slab) - goto destory_free_nid; + goto destroy_free_nid; return 0; -destory_free_nid: +destroy_free_nid: kmem_cache_destroy(free_nid_slab); -destory_nat_entry: +destroy_nat_entry: kmem_cache_destroy(nat_entry_slab); fail: return -ENOMEM; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 8d5e6e0d..d10b644 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -106,7 +106,8 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne, enum mem_type { FREE_NIDS, /* indicates the free nid list */ NAT_ENTRIES, /* indicates the cached nat entry */ - DIRTY_DENTS /* indicates dirty dentry pages */ + DIRTY_DENTS, /* indicates dirty dentry pages */ + INO_ENTRIES, /* indicates inode entries */ }; struct nat_entry_set { @@ -192,10 +193,7 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) { unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); - if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) - f2fs_clear_bit(block_off, nm_i->nat_bitmap); - else - f2fs_set_bit(block_off, nm_i->nat_bitmap); + f2fs_change_bit(block_off, nm_i->nat_bitmap); } static inline void fill_node_footer(struct page *page, nid_t nid, diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ebd0132..9160a37 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -111,7 +111,7 @@ retry: iput(einode); goto out_unmap_put; } - f2fs_delete_entry(de, page, einode); + f2fs_delete_entry(de, page, dir, einode); iput(einode); goto retry; } @@ -129,7 +129,7 @@ retry: goto out; out_unmap_put: - kunmap(page); + f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); out_err: iput(dir); @@ -170,13 +170,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); + ra_meta_pages(sbi, blkaddr, 1, META_POR); + while (1) { struct fsync_inode_entry *entry; if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) return 0; - page = get_meta_page_ra(sbi, blkaddr); + page = get_meta_page(sbi, blkaddr); if (cp_ver != cpver_of_node(page)) break; @@ -227,6 +229,8 @@ next: /* check next segment */ blkaddr = next_blkaddr_of_node(page); f2fs_put_page(page, 1); + + ra_meta_pages_cond(sbi, blkaddr); } f2fs_put_page(page, 1); return err; @@ -436,7 +440,9 @@ static int recover_data(struct f2fs_sb_info *sbi, if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi)) break; - page = get_meta_page_ra(sbi, blkaddr); + ra_meta_pages_cond(sbi, blkaddr); + + page = get_meta_page(sbi, blkaddr); if (cp_ver != cpver_of_node(page)) { f2fs_put_page(page, 1); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 923cb76..42607a6 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -178,17 +178,47 @@ void register_inmem_page(struct inode *inode, struct page *page) { struct f2fs_inode_info *fi = F2FS_I(inode); struct inmem_pages *new; + int err; + + SetPagePrivate(page); new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS); /* add atomic page indices to the list */ new->page = page; INIT_LIST_HEAD(&new->list); - +retry: /* increase reference count with clean state */ mutex_lock(&fi->inmem_lock); + err = radix_tree_insert(&fi->inmem_root, page->index, new); + if (err == -EEXIST) { + mutex_unlock(&fi->inmem_lock); + kmem_cache_free(inmem_entry_slab, new); + return; + } else if (err) { + mutex_unlock(&fi->inmem_lock); + goto retry; + } get_page(page); list_add_tail(&new->list, &fi->inmem_pages); + inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); + mutex_unlock(&fi->inmem_lock); +} + +void invalidate_inmem_page(struct inode *inode, struct page *page) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct inmem_pages *cur; + + mutex_lock(&fi->inmem_lock); + cur = radix_tree_lookup(&fi->inmem_root, page->index); + if (cur) { + radix_tree_delete(&fi->inmem_root, cur->page->index); + f2fs_put_page(cur->page, 0); + list_del(&cur->list); + kmem_cache_free(inmem_entry_slab, cur); + dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); + } mutex_unlock(&fi->inmem_lock); } @@ -203,7 +233,16 @@ void commit_inmem_pages(struct inode *inode, bool abort) .rw = WRITE_SYNC, }; - f2fs_balance_fs(sbi); + /* + * The abort is true only when f2fs_evict_inode is called. + * Basically, the f2fs_evict_inode doesn't produce any data writes, so + * that we don't need to call f2fs_balance_fs. + * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this + * inode becomes free by iget_locked in f2fs_iget. + */ + if (!abort) + f2fs_balance_fs(sbi); + f2fs_lock_op(sbi); mutex_lock(&fi->inmem_lock); @@ -216,9 +255,11 @@ void commit_inmem_pages(struct inode *inode, bool abort) do_write_data_page(cur->page, &fio); submit_bio = true; } + radix_tree_delete(&fi->inmem_root, cur->page->index); f2fs_put_page(cur->page, 1); list_del(&cur->list); kmem_cache_free(inmem_entry_slab, cur); + dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES); } if (submit_bio) f2fs_submit_merged_bio(sbi, DATA, WRITE); @@ -248,7 +289,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) { /* check the # of cached NAT entries and prefree segments */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) || - excess_prefree_segs(sbi)) + excess_prefree_segs(sbi) || + available_free_memory(sbi, INO_ENTRIES)) f2fs_sync_fs(sbi->sb, true); } @@ -441,10 +483,33 @@ void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) } } -static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) +static void __add_discard_entry(struct f2fs_sb_info *sbi, + struct cp_control *cpc, unsigned int start, unsigned int end) { struct list_head *head = &SM_I(sbi)->discard_list; - struct discard_entry *new; + struct discard_entry *new, *last; + + if (!list_empty(head)) { + last = list_last_entry(head, struct discard_entry, list); + if (START_BLOCK(sbi, cpc->trim_start) + start == + last->blkaddr + last->len) { + last->len += end - start; + goto done; + } + } + + new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); + INIT_LIST_HEAD(&new->list); + new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; + new->len = end - start; + list_add_tail(&new->list, head); +done: + SM_I(sbi)->nr_discards += end - start; + cpc->trimmed += end - start; +} + +static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) +{ int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long); int max_blocks = sbi->blocks_per_seg; struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start); @@ -473,13 +538,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) } mutex_unlock(&dirty_i->seglist_lock); - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start); - new->len = sbi->blocks_per_seg; - list_add_tail(&new->list, head); - SM_I(sbi)->nr_discards += sbi->blocks_per_seg; - cpc->trimmed += sbi->blocks_per_seg; + __add_discard_entry(sbi, cpc, 0, sbi->blocks_per_seg); return; } @@ -489,7 +548,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */ for (i = 0; i < entries; i++) - dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i]; + dmap[i] = ~(cur_map[i] | ckpt_map[i]); while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) { start = __find_rev_next_bit(dmap, max_blocks, end + 1); @@ -501,14 +560,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (end - start < cpc->trim_minlen) continue; - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; - new->len = end - start; - cpc->trimmed += end - start; - - list_add_tail(&new->list, head); - SM_I(sbi)->nr_discards += end - start; + __add_discard_entry(sbi, cpc, start, end); } } @@ -620,10 +672,10 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) /* Update valid block bitmap */ if (del > 0) { - if (f2fs_set_bit(offset, se->cur_valid_map)) + if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); } else { - if (!f2fs_clear_bit(offset, se->cur_valid_map)) + if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) @@ -1004,6 +1056,7 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) range->len < sbi->blocksize) return -EINVAL; + cpc.trimmed = 0; if (end <= MAIN_BLKADDR(sbi)) goto out; @@ -1015,10 +1068,11 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) cpc.trim_start = start_segno; cpc.trim_end = end_segno; cpc.trim_minlen = range->minlen >> sbi->log_blocksize; - cpc.trimmed = 0; /* do checkpoint to issue discard commands safely */ + mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); + mutex_unlock(&sbi->gc_mutex); out: range->len = cpc.trimmed << sbi->log_blocksize; return 0; @@ -1050,8 +1104,8 @@ static int __get_segment_type_4(struct page *page, enum page_type p_type) else return CURSEG_COLD_DATA; } else { - if (IS_DNODE(page) && !is_cold_node(page)) - return CURSEG_HOT_NODE; + if (IS_DNODE(page) && is_cold_node(page)) + return CURSEG_WARM_NODE; else return CURSEG_COLD_NODE; } @@ -1524,17 +1578,7 @@ int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type, static struct page *get_current_sit_page(struct f2fs_sb_info *sbi, unsigned int segno) { - struct sit_info *sit_i = SIT_I(sbi); - unsigned int offset = SIT_BLOCK_OFFSET(segno); - block_t blk_addr = sit_i->sit_base_addr + offset; - - check_seg_range(sbi, segno); - - /* calculate sit block address */ - if (f2fs_test_bit(offset, sit_i->sit_bitmap)) - blk_addr += sit_i->sit_blocks; - - return get_meta_page(sbi, blk_addr); + return get_meta_page(sbi, current_sit_addr(sbi, segno)); } static struct page *get_next_sit_page(struct f2fs_sb_info *sbi, @@ -1687,7 +1731,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) * #2, flush sit entries to sit page. */ list_for_each_entry_safe(ses, tmp, head, set_list) { - struct page *page; + struct page *page = NULL; struct f2fs_sit_block *raw_sit = NULL; unsigned int start_segno = ses->start_segno; unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK, @@ -2200,7 +2244,7 @@ int __init create_segment_manager_caches(void) goto fail; sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", - sizeof(struct nat_entry_set)); + sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) goto destory_discard_entry; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 2495bec..7f327c0 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -657,10 +657,7 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) { unsigned int block_off = SIT_BLOCK_OFFSET(start); - if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) - f2fs_clear_bit(block_off, sit_i->sit_bitmap); - else - f2fs_set_bit(block_off, sit_i->sit_bitmap); + f2fs_change_bit(block_off, sit_i->sit_bitmap); } static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) @@ -714,6 +711,9 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) */ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) { + if (sbi->sb->s_bdi->dirty_exceeded) + return 0; + if (type == DATA) return sbi->blocks_per_seg; else if (type == NODE) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 41d6f70..f71421d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -51,8 +51,10 @@ enum { Opt_disable_ext_identify, Opt_inline_xattr, Opt_inline_data, + Opt_inline_dentry, Opt_flush_merge, Opt_nobarrier, + Opt_fastboot, Opt_err, }; @@ -69,8 +71,10 @@ static match_table_t f2fs_tokens = { {Opt_disable_ext_identify, "disable_ext_identify"}, {Opt_inline_xattr, "inline_xattr"}, {Opt_inline_data, "inline_data"}, + {Opt_inline_dentry, "inline_dentry"}, {Opt_flush_merge, "flush_merge"}, {Opt_nobarrier, "nobarrier"}, + {Opt_fastboot, "fastboot"}, {Opt_err, NULL}, }; @@ -340,12 +344,18 @@ static int parse_options(struct super_block *sb, char *options) case Opt_inline_data: set_opt(sbi, INLINE_DATA); break; + case Opt_inline_dentry: + set_opt(sbi, INLINE_DENTRY); + break; case Opt_flush_merge: set_opt(sbi, FLUSH_MERGE); break; case Opt_nobarrier: set_opt(sbi, NOBARRIER); break; + case Opt_fastboot: + set_opt(sbi, FASTBOOT); + break; default: f2fs_msg(sb, KERN_ERR, "Unrecognized mount option \"%s\" or missing value", @@ -373,6 +383,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) fi->i_advise = 0; rwlock_init(&fi->ext.ext_lock); init_rwsem(&fi->i_sem); + INIT_RADIX_TREE(&fi->inmem_root, GFP_NOFS); INIT_LIST_HEAD(&fi->inmem_pages); mutex_init(&fi->inmem_lock); @@ -473,9 +484,9 @@ int f2fs_sync_fs(struct super_block *sb, int sync) trace_f2fs_sync_fs(sb, sync); if (sync) { - struct cp_control cpc = { - .reason = CP_SYNC, - }; + struct cp_control cpc; + + cpc.reason = test_opt(sbi, FASTBOOT) ? CP_UMOUNT : CP_SYNC; mutex_lock(&sbi->gc_mutex); write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); @@ -562,10 +573,14 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",disable_ext_identify"); if (test_opt(sbi, INLINE_DATA)) seq_puts(seq, ",inline_data"); + if (test_opt(sbi, INLINE_DENTRY)) + seq_puts(seq, ",inline_dentry"); if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) seq_puts(seq, ",flush_merge"); if (test_opt(sbi, NOBARRIER)) seq_puts(seq, ",nobarrier"); + if (test_opt(sbi, FASTBOOT)) + seq_puts(seq, ",fastboot"); seq_printf(seq, ",active_logs=%u", sbi->active_logs); return 0; @@ -654,7 +669,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) f2fs_sync_fs(sb, 1); need_restart_gc = true; } - } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { + } else if (!sbi->gc_thread) { err = start_gc_thread(sbi); if (err) goto restore_opts; @@ -667,7 +682,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) */ if ((*flags & MS_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) { destroy_flush_cmd_control(sbi); - } else if (test_opt(sbi, FLUSH_MERGE) && !SM_I(sbi)->cmd_control_info) { + } else if (!SM_I(sbi)->cmd_control_info) { err = create_flush_cmd_control(sbi); if (err) goto restore_gc; @@ -922,7 +937,7 @@ retry: static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; - struct f2fs_super_block *raw_super; + struct f2fs_super_block *raw_super = NULL; struct buffer_head *raw_super_buf; struct inode *root; long err = -EINVAL; @@ -1123,7 +1138,7 @@ try_onemore: * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (!f2fs_readonly(sb)) { + if (test_opt(sbi, BG_GC) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = start_gc_thread(sbi); if (err) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index deca872..5072bf9 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -83,7 +83,7 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, } if (strcmp(name, "") == 0) return -EINVAL; - return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); + return f2fs_getxattr(dentry->d_inode, type, name, buffer, size, NULL); } static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, @@ -398,7 +398,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } int f2fs_getxattr(struct inode *inode, int index, const char *name, - void *buffer, size_t buffer_size) + void *buffer, size_t buffer_size, struct page *ipage) { struct f2fs_xattr_entry *entry; void *base_addr; @@ -412,7 +412,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - base_addr = read_all_xattrs(inode, NULL); + base_addr = read_all_xattrs(inode, ipage); if (!base_addr) return -ENOMEM; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 34ab7db..969d792 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -115,7 +115,8 @@ extern const struct xattr_handler *f2fs_xattr_handlers[]; extern int f2fs_setxattr(struct inode *, int, const char *, const void *, size_t, struct page *, int); -extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); +extern int f2fs_getxattr(struct inode *, int, const char *, void *, + size_t, struct page *); extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); #else @@ -126,7 +127,8 @@ static inline int f2fs_setxattr(struct inode *inode, int index, return -EOPNOTSUPP; } static inline int f2fs_getxattr(struct inode *inode, int index, - const char *name, void *buffer, size_t buffer_size) + const char *name, void *buffer, + size_t buffer_size, struct page *dpage) { return -EOPNOTSUPP; } diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 860313a..87f14e9 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -33,7 +33,8 @@ #define F2FS_META_INO(sbi) (sbi->meta_ino_num) /* This flag is used by node and meta inodes, and by recovery */ -#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) +#define GFP_F2FS_ZERO (GFP_NOFS | __GFP_ZERO) +#define GFP_F2FS_HIGH_ZERO (GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM) /* * For further optimization on multi-head logs, on-disk layout supports maximum @@ -170,14 +171,12 @@ struct f2fs_extent { #define F2FS_INLINE_XATTR 0x01 /* file inline xattr flag */ #define F2FS_INLINE_DATA 0x02 /* file inline data flag */ +#define F2FS_INLINE_DENTRY 0x04 /* file inline dentry flag */ +#define F2FS_DATA_EXIST 0x08 /* file inline data exist flag */ #define MAX_INLINE_DATA (sizeof(__le32) * (DEF_ADDRS_PER_INODE - \ F2FS_INLINE_XATTR_ADDRS - 1)) -#define INLINE_DATA_OFFSET (PAGE_CACHE_SIZE - sizeof(struct node_footer) -\ - sizeof(__le32) * (DEF_ADDRS_PER_INODE + \ - DEF_NIDS_PER_INODE - 1)) - struct f2fs_inode { __le16 i_mode; /* file mode */ __u8 i_advise; /* file hints */ @@ -435,6 +434,24 @@ struct f2fs_dentry_block { __u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN]; } __packed; +/* for inline dir */ +#define NR_INLINE_DENTRY (MAX_INLINE_DATA * BITS_PER_BYTE / \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + BITS_PER_BYTE + 1)) +#define INLINE_DENTRY_BITMAP_SIZE ((NR_INLINE_DENTRY + \ + BITS_PER_BYTE - 1) / BITS_PER_BYTE) +#define INLINE_RESERVED_SIZE (MAX_INLINE_DATA - \ + ((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \ + NR_INLINE_DENTRY + INLINE_DENTRY_BITMAP_SIZE)) + +/* inline directory entry structure */ +struct f2fs_inline_dentry { + __u8 dentry_bitmap[INLINE_DENTRY_BITMAP_SIZE]; + __u8 reserved[INLINE_RESERVED_SIZE]; + struct f2fs_dir_entry dentry[NR_INLINE_DENTRY]; + __u8 filename[NR_INLINE_DENTRY][F2FS_SLOT_LEN]; +} __packed; + /* file types used in inode_info->flags */ enum { F2FS_FT_UNKNOWN, |