From bb413d6acd4e1c361daebf8486efc3923f429792 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 28 Jul 2016 12:12:38 +0800 Subject: f2fs: not allow to write illegal blkaddr we came across an error as below: [build_nat_area_bitmap:1710] nid[0x 1718] addr[0x 1c18ddc] ino[0x 1718] [build_nat_area_bitmap:1710] nid[0x 1719] addr[0x 1c193d5] ino[0x 1719] [build_nat_area_bitmap:1710] nid[0x 171a] addr[0x 1c1736e] ino[0x 171a] [build_nat_area_bitmap:1710] nid[0x 171b] addr[0x 58b3ee8f] ino[0x815f92ed] [build_nat_area_bitmap:1710] nid[0x 171c] addr[0x fcdc94b] ino[0x49366377] [build_nat_area_bitmap:1710] nid[0x 171d] addr[0x 7cd2facf] ino[0xb3c55300] [build_nat_area_bitmap:1710] nid[0x 171e] addr[0x bd4e25d0] ino[0x77c34c09] ... ... [build_nat_area_bitmap:1710] nid[0x 1718] addr[0x 1c18ddc] ino[0x 1718] [build_nat_area_bitmap:1710] nid[0x 1719] addr[0x 1c193d5] ino[0x 1719] [build_nat_area_bitmap:1710] nid[0x 171a] addr[0x 1c1736e] ino[0x 171a] [build_nat_area_bitmap:1710] nid[0x 171b] addr[0x 58b3ee8f] ino[0x815f92ed] [build_nat_area_bitmap:1710] nid[0x 171c] addr[0x fcdc94b] ino[0x49366377] [build_nat_area_bitmap:1710] nid[0x 171d] addr[0x 7cd2facf] ino[0xb3c55300] [build_nat_area_bitmap:1710] nid[0x 171e] addr[0x bd4e25d0] ino[0x77c34c09] One nat block may be stepped by a data block, so this patch forbid to write if the blkaddr is illegal Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index b33f73e..87156c7 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -587,8 +587,8 @@ static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) { - f2fs_bug_on(sbi, blk_addr < SEG0_BLKADDR(sbi) - || blk_addr >= MAX_BLKADDR(sbi)); + BUG_ON(blk_addr < SEG0_BLKADDR(sbi) + || blk_addr >= MAX_BLKADDR(sbi)); } /* -- cgit v0.10.2 From 3e025740b976c409820a789f8dabc4ec2c50a950 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 2 Aug 2016 10:56:40 -0700 Subject: f2fs: do not use discard_map for hard disks We don't need to keep discard_map, if disk does not support discard command. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index badd407..af69d6b 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -154,7 +154,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi) si->base_mem += sizeof(struct sit_info); si->base_mem += MAIN_SEGS(sbi) * sizeof(struct seg_entry); si->base_mem += f2fs_bitmap_size(MAIN_SEGS(sbi)); - si->base_mem += 3 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); + si->base_mem += 2 * SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); + if (f2fs_discard_en(sbi)) + si->base_mem += SIT_VBLOCK_MAP_SIZE * MAIN_SEGS(sbi); si->base_mem += SIT_VBLOCK_MAP_SIZE; if (sbi->segs_per_sec > 1) si->base_mem += MAIN_SECS(sbi) * sizeof(struct sec_entry); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 14f5fe2..f478cb6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1072,6 +1072,13 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) cp->ckpt_flags = cpu_to_le32(ckpt_flags); } +static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) +{ + struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); + + return blk_queue_discard(q); +} + static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { down_read(&sbi->cp_rwsem); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a46296f..59f578b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -660,7 +660,7 @@ static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc) bool force = (cpc->reason == CP_DISCARD); int i; - if (se->valid_blocks == max_blocks) + if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) return; if (!force) { @@ -818,12 +818,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) if (del > 0) { if (f2fs_test_and_set_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); - if (!f2fs_test_and_set_bit(offset, se->discard_map)) + if (f2fs_discard_en(sbi) && + !f2fs_test_and_set_bit(offset, se->discard_map)) sbi->discard_blks--; } else { if (!f2fs_test_and_clear_bit(offset, se->cur_valid_map)) f2fs_bug_on(sbi, 1); - if (f2fs_test_and_clear_bit(offset, se->discard_map)) + if (f2fs_discard_en(sbi) && + f2fs_test_and_clear_bit(offset, se->discard_map)) sbi->discard_blks++; } if (!f2fs_test_bit(offset, se->ckpt_valid_map)) @@ -2127,12 +2129,16 @@ static int build_sit_info(struct f2fs_sb_info *sbi) = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); sit_i->sentries[start].ckpt_valid_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); - sit_i->sentries[start].discard_map - = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); if (!sit_i->sentries[start].cur_valid_map || - !sit_i->sentries[start].ckpt_valid_map || - !sit_i->sentries[start].discard_map) + !sit_i->sentries[start].ckpt_valid_map) return -ENOMEM; + + if (f2fs_discard_en(sbi)) { + sit_i->sentries[start].discard_map + = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); + if (!sit_i->sentries[start].discard_map) + return -ENOMEM; + } } sit_i->tmp_map = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL); @@ -2276,8 +2282,12 @@ got_it: seg_info_from_raw_sit(se, &sit); /* build discard map only one time */ - memcpy(se->discard_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += sbi->blocks_per_seg - se->valid_blocks; + if (f2fs_discard_en(sbi)) { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += sbi->blocks_per_seg - + se->valid_blocks; + } if (sbi->segs_per_sec > 1) { struct sec_entry *e = get_sec_entry(sbi, start); -- cgit v0.10.2 From 44819a76d1304dd67672abaea3be55eae70ef766 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 6 Aug 2016 21:09:40 +0800 Subject: f2fs: clean up bio cache trace Trace info related to bio cache operation is out of format, clean up it. Before: <...>-28308 [002] .... 4781.052703: f2fs_submit_write_bio: dev = (251,1), WRITEWRITE_SYNC ^H, DATA, sector = 271424, size = 126976 <...>-28308 [002] .... 4781.052820: f2fs_submit_page_mbio: dev = (251,1), ino = 103, page_index = 0x1f, oldaddr = 0xffffffff, newaddr = 0x84a7 rw = WRITEWRITE_SYNCi ^H, type = DATA kworker/u8:2-29988 [001] .... 5549.293877: f2fs_submit_page_mbio: dev = (251,1), ino = 91, page_index = 0xd, oldaddr = 0xffffffff, newaddr = 0x782f rw = WRITE0x0i ^H type = DATA After: kworker/u8:2-8678 [000] .... 7945.124459: f2fs_submit_write_bio: dev = (251,1), rw = WRITE_SYNC, DATA, sector = 74080, size = 53248 kworker/u8:2-8678 [000] .... 7945.124551: f2fs_submit_page_mbio: dev = (251,1), ino = 11, page_index = 0xec, oldaddr = 0xffffffff, newaddr = 0x243a, rw = WRITE, type = DATA Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index ff95fd0..903a091 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -58,16 +58,12 @@ TRACE_DEFINE_ENUM(CP_DISCARD); #define F2FS_BIO_FLAG_MASK(t) (t & (REQ_RAHEAD | WRITE_FLUSH_FUA)) #define F2FS_BIO_EXTRA_MASK(t) (t & (REQ_META | REQ_PRIO)) -#define show_bio_type(op, op_flags) show_bio_op(op), \ - show_bio_op_flags(op_flags), show_bio_extra(op_flags) - -#define show_bio_op(op) \ - __print_symbolic(op, \ - { READ, "READ" }, \ - { WRITE, "WRITE" }) +#define show_bio_type(op_flags) show_bio_op_flags(op_flags), \ + show_bio_extra(op_flags) #define show_bio_op_flags(flags) \ __print_symbolic(F2FS_BIO_FLAG_MASK(flags), \ + { 0, "WRITE" }, \ { REQ_RAHEAD, "READAHEAD" }, \ { READ_SYNC, "READ_SYNC" }, \ { WRITE_SYNC, "WRITE_SYNC" }, \ @@ -754,12 +750,12 @@ DECLARE_EVENT_CLASS(f2fs__submit_page_bio, ), TP_printk("dev = (%d,%d), ino = %lu, page_index = 0x%lx, " - "oldaddr = 0x%llx, newaddr = 0x%llx rw = %s%si%s, type = %s", + "oldaddr = 0x%llx, newaddr = 0x%llx, rw = %s%s, type = %s", show_dev_ino(__entry), (unsigned long)__entry->index, (unsigned long long)__entry->old_blkaddr, (unsigned long long)__entry->new_blkaddr, - show_bio_type(__entry->op, __entry->op_flags), + show_bio_type(__entry->op_flags), show_block_type(__entry->type)) ); @@ -806,9 +802,9 @@ DECLARE_EVENT_CLASS(f2fs__submit_bio, __entry->size = bio->bi_iter.bi_size; ), - TP_printk("dev = (%d,%d), %s%s%s, %s, sector = %lld, size = %u", + TP_printk("dev = (%d,%d), rw = %s%s, %s, sector = %lld, size = %u", show_dev(__entry), - show_bio_type(__entry->op, __entry->op_flags), + show_bio_type(__entry->op_flags), show_block_type(__entry->type), (unsigned long long)__entry->sector, __entry->size) -- cgit v0.10.2 From 2d9e9c32a04edef5482e53d15535b77aa7e595cd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 11 Aug 2016 10:18:38 +0800 Subject: f2fs: reduce batch size of fstrim This is to reduce the batch size of fstrim to avoid long latency. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f478cb6..59eed3e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -158,7 +158,7 @@ enum { CP_DISCARD, }; -#define DEF_BATCHED_TRIM_SECTIONS 32 +#define DEF_BATCHED_TRIM_SECTIONS 2 #define BATCHED_TRIM_SEGMENTS(sbi) \ (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) #define BATCHED_TRIM_BLOCKS(sbi) \ -- cgit v0.10.2 From f83a2584cae8f4deacb4c82da0b061813d59a40c Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 18 Aug 2016 21:01:18 +0800 Subject: f2fs: add discard info to sys entry of f2fs status This patch add discard block count to sys entry of f2fs status Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index af69d6b..ae13521 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -54,6 +54,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); si->valid_count = valid_user_blocks(sbi); + si->discard_blks = discard_blocks(sbi); si->valid_node_count = valid_node_count(sbi); si->valid_inode_count = valid_inode_count(sbi); si->inline_xattr = atomic_read(&sbi->inline_xattr); @@ -230,8 +231,13 @@ static int stat_show(struct seq_file *s, void *v) si->ssa_area_segs, si->main_area_segs); seq_printf(s, "(OverProv:%d Resv:%d)]\n\n", si->overp_segs, si->rsvd_segs); - seq_printf(s, "Utilization: %d%% (%d valid blocks)\n", - si->utilization, si->valid_count); + if (test_opt(si->sbi, DISCARD)) + seq_printf(s, "Utilization: %u%% (%u valid blocks, %u discard blocks)\n", + si->utilization, si->valid_count, si->discard_blks); + else + seq_printf(s, "Utilization: %u%% (%u valid blocks)\n", + si->utilization, si->valid_count); + seq_printf(s, " - Node: %u (Inode: %u, ", si->valid_node_count, si->valid_inode_count); seq_printf(s, "Other: %u)\n - Data: %u\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 59eed3e..0f45990 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1250,6 +1250,11 @@ static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) return sbi->total_valid_block_count; } +static inline block_t discard_blocks(struct f2fs_sb_info *sbi) +{ + return sbi->discard_blks; +} + static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -2136,7 +2141,7 @@ struct f2fs_stat_info { int total_count, utilization; int bg_gc, wb_bios; int inline_xattr, inline_inode, inline_dir, orphans; - unsigned int valid_count, valid_node_count, valid_inode_count; + unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; int rsvd_segs, overp_segs; -- cgit v0.10.2 From 58cce381fa4e8cfbe7ce005a6a858267a3b481f2 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 18 Aug 2016 21:01:19 +0800 Subject: f2fs: skip new checkpoint when doing fstrim without fs change This patch enables to do fstrim without checkpoint, if there is no fs change. Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index f94d01e..cd0443d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1190,6 +1190,17 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_flush_merged_bios(sbi); + /* this is the case of multiple fstrims without any changes */ + if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) { + f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt); + f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries); + f2fs_bug_on(sbi, prefree_segments(sbi)); + flush_sit_entries(sbi, cpc); + clear_prefree_segments(sbi, cpc); + unblock_operations(sbi); + goto out; + } + /* * update checkpoint pack index * Increase the version number so that -- cgit v0.10.2 From 7c4abcbeccdd286e7d71b3e8f9fefad84112e54c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 18 Aug 2016 17:46:14 +0800 Subject: f2fs: set dirty state for filesystem only when updating meta data We don't guarantee integrity of user data after checkpoint, since we only guarantee meta data integrity for data consistency of filesystem. Due to above reason, we only need to set fs as dirty when meta data is updated, so that we can skip writing checkpoint in some case of non-meta data is updated. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0f45990..5d2db47 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1200,6 +1200,10 @@ static inline void dec_valid_block_count(struct f2fs_sb_info *sbi, static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) { percpu_counter_inc(&sbi->nr_pages[count_type]); + + if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES) + return; + set_sbi_flag(sbi, SBI_IS_DIRTY); } -- cgit v0.10.2 From 43ced84ec8a7cb1b2e56dd1e262a0c63db79c3c1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 Aug 2016 23:13:46 +0800 Subject: f2fs: clean up foreground GC flow This patch changes to check valid block number of one GCed section directly instead of checking the number in all segments of section one by one in order to clean up codes of foreground GC. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 8f7fa32..c1599b4 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -815,7 +815,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, struct blk_plug plug; unsigned int segno = start_segno; unsigned int end_segno = start_segno + sbi->segs_per_sec; - int seg_freed = 0; + int sec_freed = 0; unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ? SUM_TYPE_DATA : SUM_TYPE_NODE; @@ -871,22 +871,20 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, blk_finish_plug(&plug); - if (gc_type == FG_GC) { - while (start_segno < end_segno) - if (get_valid_blocks(sbi, start_segno++, 1) == 0) - seg_freed++; - } + if (gc_type == FG_GC && + get_valid_blocks(sbi, start_segno, sbi->segs_per_sec) == 0) + sec_freed = 1; stat_inc_call_count(sbi->stat_info); - return seg_freed; + return sec_freed; } int f2fs_gc(struct f2fs_sb_info *sbi, bool sync) { unsigned int segno; int gc_type = sync ? FG_GC : BG_GC; - int sec_freed = 0, seg_freed; + int sec_freed = 0; int ret = -EINVAL; struct cp_control cpc; struct gc_inode_list gc_list = { @@ -925,9 +923,8 @@ gc_more: goto stop; ret = 0; - seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type); - - if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec) + if (do_garbage_collect(sbi, segno, &gc_list, gc_type) && + gc_type == FG_GC) sec_freed++; if (gc_type == FG_GC) -- cgit v0.10.2 From d600af236da51d9e3b90d21a23f95b820bd02e2f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 19 Aug 2016 23:13:47 +0800 Subject: f2fs: avoid unneeded loop in build_sit_entries When building each sit entry in cache, firstly, we will load it from sit page, and then check all entries in sit journal, if there is one updated entry in journal, cover cached entry with the journaled one. Actually, most of check operation is unneeded since we only need to update cached entries with journaled entries in batch, so changing the flow as below for more efficient: 1. load all sit entries into cache from sit pages; 2. update sit entries with journal. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 59f578b..a394012 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2262,22 +2262,11 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) struct f2fs_sit_entry sit; struct page *page; - down_read(&curseg->journal_rwsem); - for (i = 0; i < sits_in_cursum(journal); i++) { - if (le32_to_cpu(segno_in_journal(journal, i)) - == start) { - sit = sit_in_journal(journal, i); - up_read(&curseg->journal_rwsem); - goto got_it; - } - } - up_read(&curseg->journal_rwsem); - page = get_current_sit_page(sbi, start); sit_blk = (struct f2fs_sit_block *)page_address(page); sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; f2fs_put_page(page, 1); -got_it: + check_block_count(sbi, start, &sit); seg_info_from_raw_sit(se, &sit); @@ -2289,13 +2278,40 @@ got_it: se->valid_blocks; } - if (sbi->segs_per_sec > 1) { - struct sec_entry *e = get_sec_entry(sbi, start); - e->valid_blocks += se->valid_blocks; - } + if (sbi->segs_per_sec > 1) + get_sec_entry(sbi, start)->valid_blocks += + se->valid_blocks; } start_blk += readed; } while (start_blk < sit_blk_cnt); + + down_read(&curseg->journal_rwsem); + for (i = 0; i < sits_in_cursum(journal); i++) { + struct f2fs_sit_entry sit; + struct seg_entry *se; + unsigned int old_valid_blocks; + + start = le32_to_cpu(segno_in_journal(journal, i)); + se = &sit_i->sentries[start]; + sit = sit_in_journal(journal, i); + + old_valid_blocks = se->valid_blocks; + + check_block_count(sbi, start, &sit); + seg_info_from_raw_sit(se, &sit); + + if (f2fs_discard_en(sbi)) { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += old_valid_blocks - + se->valid_blocks; + } + + if (sbi->segs_per_sec > 1) + get_sec_entry(sbi, start)->valid_blocks += + se->valid_blocks - old_valid_blocks; + } + up_read(&curseg->journal_rwsem); } static void init_free_segmap(struct f2fs_sb_info *sbi) -- cgit v0.10.2 From 58383befc3377b4e2305b98f91e445af73ba8d62 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 20 Aug 2016 15:12:02 +0800 Subject: f2fs: fix to do f2fs_balance_fs in f2fs_map_blocks correctly If we preallocate blocks with f2fs_reserve_blocks in f2fs_map_blocks, we should call f2fs_balance_fs for checking and reclaiming space, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ccb401e..7d618a9 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -783,6 +783,7 @@ skip: err = reserve_new_blocks(&dn, prealloc); if (err) goto sync_out; + allocated = dn.node_changed; map->m_len += dn.ofs_in_node - ofs_in_node; if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) { -- cgit v0.10.2 From e932835377f95c91789c8572a5aaaa9daad5d262 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 21 Aug 2016 23:21:29 +0800 Subject: f2fs: check return value of write_checkpoint during fstrim During fstrim, if one of multiple write_checkpoint failed, break off and return error number to caller. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a394012..020767c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1303,6 +1303,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) mutex_lock(&sbi->gc_mutex); err = write_checkpoint(sbi, &cpc); mutex_unlock(&sbi->gc_mutex); + if (err) + break; } out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); -- cgit v0.10.2 From 5f8eaf1f9b99df1f51988205e27634a22f497eb7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 21 Aug 2016 23:21:31 +0800 Subject: f2fs: remove redundant judgement condition in available_free_memory In available_free_memory, there are two same judgement conditions which is used for checking NAT excess, remove one of them. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index f75d197..8a28800 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -54,8 +54,6 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); if (excess_cached_nats(sbi)) res = false; - if (nm_i->nat_cnt > DEF_NAT_CACHE_THRESHOLD) - res = false; } else if (type == DIRTY_DENTS) { if (sbi->sb->s_bdi->wb.dirty_exceeded) return false; -- cgit v0.10.2 From 69494229ba5ada1b5521e3111328e8fe585c78d7 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Tue, 23 Aug 2016 20:10:47 +0800 Subject: f2fs: remove unnecessary initialization `flags' is used to save value from userspace, there is no need to initialize it, and FS_FL_USER_VISIBLE is the mask for getflags. Signed-off-by: Sheng Yong Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 47abb96..7c6ee7e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1454,7 +1454,7 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) { struct inode *inode = file_inode(filp); struct f2fs_inode_info *fi = F2FS_I(inode); - unsigned int flags = fi->i_flags & FS_FL_USER_VISIBLE; + unsigned int flags; unsigned int oldflags; int ret; -- cgit v0.10.2 From 6a7a3aedd562838fd402cccb5ad07b8063a3582e Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 23 Aug 2016 15:23:59 +0000 Subject: f2fs: fix non static symbol warning Fixes the following sparse warning: fs/f2fs/data.c:969:12: warning: symbol 'f2fs_grab_bio' was not declared. Should it be static? Signed-off-by: Wei Yongjun Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7d618a9..ced6beb 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -967,8 +967,8 @@ out: return ret; } -struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr, - unsigned nr_pages) +static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr, + unsigned nr_pages) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct fscrypt_ctx *ctx = NULL; -- cgit v0.10.2 From dfd02e4de1c5f40c268984254045d388ab0c3e74 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 20 Aug 2016 15:12:01 +0800 Subject: f2fs: fix to preallocate block only aligned to 4K In write_begin(), we skip checking dnode block for preallocating block when whole block needs to be updated since we preallocated its block in f2fs_preallocate_blocks, for partial updated block, we will still try to lock its node and do preallocation in write_begin(), so in f2fs_preallocate_blocks we should not preallocate its block. But previously, the calculation of preallocating block number is incorrect, fix it. Signed-off-by: Chao Yu [Jaegeuk Kim: fix a bug] Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ced6beb..7c8e219 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -626,7 +626,12 @@ ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) ssize_t ret = 0; map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos); - map.m_len = F2FS_BYTES_TO_BLK(iov_iter_count(from)); + map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from)); + if (map.m_len > map.m_lblk) + map.m_len -= map.m_lblk; + else + map.m_len = 0; + map.m_next_pgofs = NULL; if (f2fs_encrypted_inode(inode)) @@ -672,6 +677,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, bool allocated = false; block_t blkaddr; + if (!maxblocks) + return 0; + map->m_len = 0; map->m_flags = 0; -- cgit v0.10.2 From 5d2b42ede71c9da0bf4248fd2d409918fb065b5f Mon Sep 17 00:00:00 2001 From: Shuoran Liu Date: Thu, 25 Aug 2016 20:42:09 +0800 Subject: f2fs: fix a bug when using namehash to locate dentry bucket In the following scenario, 1) we don't have the key and doing a lookup for encrypted file, 2) and the encrypted filename is big name we should use fname->hash as name hash value instead of what is calculated by fname->disk_name. Because in such case, fname->disk_name is empty. Signed-off-by: Shuoran Liu Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9054aea..b3e6f7f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -172,7 +172,10 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, int max_slots; f2fs_hash_t namehash; - namehash = f2fs_dentry_hash(&name); + if(fname->hash) + namehash = cpu_to_le32(fname->hash); + else + namehash = f2fs_dentry_hash(&name); nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); nblock = bucket_blocks(level); -- cgit v0.10.2 From 97c1794a5dc160164aa7f161310da15c34d62641 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 9 May 2016 19:56:34 +0800 Subject: f2fs: enable inline_dentry by default and add noinline_dentry option Make inline_dentry as default mount option to improve space usage and IO performance in scenario of numerous small directory. It adds noinline_dentry mount option, instead. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/Documentation/filesystems/f2fs.txt b/Documentation/filesystems/f2fs.txt index ecd8080..753dd4f 100644 --- a/Documentation/filesystems/f2fs.txt +++ b/Documentation/filesystems/f2fs.txt @@ -131,6 +131,7 @@ inline_dentry Enable the inline dir feature: data in new created directory entries can be written into inode block. The space of inode block which is used to store inline dentries is limited to ~3.4k. +noinline_dentry Diable the inline dentry feature. flush_merge Merge concurrent cache_flush commands as much as possible to eliminate redundant command issues. If the underlying device handles the cache_flush command relatively slowly, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 7f863a6..555217f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -87,6 +87,7 @@ enum { Opt_inline_xattr, Opt_inline_data, Opt_inline_dentry, + Opt_noinline_dentry, Opt_flush_merge, Opt_noflush_merge, Opt_nobarrier, @@ -118,6 +119,7 @@ static match_table_t f2fs_tokens = { {Opt_inline_xattr, "inline_xattr"}, {Opt_inline_data, "inline_data"}, {Opt_inline_dentry, "inline_dentry"}, + {Opt_noinline_dentry, "noinline_dentry"}, {Opt_flush_merge, "flush_merge"}, {Opt_noflush_merge, "noflush_merge"}, {Opt_nobarrier, "nobarrier"}, @@ -488,6 +490,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_inline_dentry: set_opt(sbi, INLINE_DENTRY); break; + case Opt_noinline_dentry: + clear_opt(sbi, INLINE_DENTRY); + break; case Opt_flush_merge: set_opt(sbi, FLUSH_MERGE); break; @@ -878,6 +883,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) seq_puts(seq, ",noinline_data"); if (test_opt(sbi, INLINE_DENTRY)) seq_puts(seq, ",inline_dentry"); + else + seq_puts(seq, ",noinline_dentry"); if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE)) seq_puts(seq, ",flush_merge"); if (test_opt(sbi, NOBARRIER)) @@ -975,6 +982,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, BG_GC); set_opt(sbi, INLINE_DATA); + set_opt(sbi, INLINE_DENTRY); set_opt(sbi, EXTENT_CACHE); sbi->sb->s_flags |= MS_LAZYTIME; set_opt(sbi, FLUSH_MERGE); -- cgit v0.10.2 From 74fa5f3d43bca87257e9da7da95be8735ffa2b96 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 21 Aug 2016 23:21:30 +0800 Subject: f2fs: schedule in between two continous batch discards In batch discard approach of fstrim will grab/release gc_mutex lock repeatly, it makes contention of the lock becoming more intensive. So after one batch discards were issued in checkpoint and the lock was released, it's better to do schedule() to increase opportunity of grabbing gc_mutex lock for other competitors. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 020767c..d0f74eb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1305,6 +1305,8 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) mutex_unlock(&sbi->gc_mutex); if (err) break; + + schedule(); } out: range->len = F2FS_BLK_TO_BYTES(cpc.trimmed); -- cgit v0.10.2 From 7ea984b0604ac37e806ddc34baf950230bfdaadd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 27 Aug 2016 00:14:31 +0800 Subject: f2fs: do in batch synchronously readahead during GC In order to enhance performance, we try to readahead node page during GC, but before loading node page we should get block address of node page which is stored in NAT table, so synchronously read of single NAT page block our readahead flow. f2fs_submit_page_bio: dev = (251,0), ino = 2, page_index = 0xa1e, oldaddr = 0xa1e, newaddr = 0xa1e, rw = READ_SYNC(MP), type = META f2fs_submit_page_bio: dev = (251,0), ino = 1, page_index = 0x35e9, oldaddr = 0x72d7a, newaddr = 0x72d7a, rw = READAHEAD ^H, type = NODE f2fs_submit_page_bio: dev = (251,0), ino = 2, page_index = 0xc1f, oldaddr = 0xc1f, newaddr = 0xc1f, rw = READ_SYNC(MP), type = META f2fs_submit_page_bio: dev = (251,0), ino = 1, page_index = 0x389d, oldaddr = 0x72d7d, newaddr = 0x72d7d, rw = READAHEAD ^H, type = NODE f2fs_submit_page_bio: dev = (251,0), ino = 1, page_index = 0x3a82, oldaddr = 0x72d7f, newaddr = 0x72d7f, rw = READAHEAD ^H, type = NODE f2fs_submit_page_bio: dev = (251,0), ino = 1, page_index = 0x3bfa, oldaddr = 0x72d86, newaddr = 0x72d86, rw = READAHEAD ^H, type = NODE This patch adds one phase that do readahead NAT pages in batch before readahead node page for more effeciently. f2fs_submit_page_bio: dev = (251,0), ino = 2, page_index = 0x1952, oldaddr = 0x1952, newaddr = 0x1952, rw = READ_SYNC(MP), type = META f2fs_submit_page_mbio: dev = (251,0), ino = 2, page_index = 0xc34, oldaddr = 0xc34, newaddr = 0xc34, rw = READ_SYNC(MP), type = META f2fs_submit_page_mbio: dev = (251,0), ino = 2, page_index = 0xa33, oldaddr = 0xa33, newaddr = 0xa33, rw = READ_SYNC(MP), type = META f2fs_submit_page_mbio: dev = (251,0), ino = 2, page_index = 0xc30, oldaddr = 0xc30, newaddr = 0xc30, rw = READ_SYNC(MP), type = META f2fs_submit_page_mbio: dev = (251,0), ino = 2, page_index = 0xc32, oldaddr = 0xc32, newaddr = 0xc32, rw = READ_SYNC(MP), type = META f2fs_submit_page_mbio: dev = (251,0), ino = 2, page_index = 0xc26, oldaddr = 0xc26, newaddr = 0xc26, rw = READ_SYNC(MP), type = META f2fs_submit_page_mbio: dev = (251,0), ino = 2, page_index = 0xa2b, oldaddr = 0xa2b, newaddr = 0xa2b, rw = READ_SYNC(MP), type = META f2fs_submit_page_mbio: dev = (251,0), ino = 2, page_index = 0xc23, oldaddr = 0xc23, newaddr = 0xc23, rw = READ_SYNC(MP), type = META f2fs_submit_page_mbio: dev = (251,0), ino = 2, page_index = 0xc24, oldaddr = 0xc24, newaddr = 0xc24, rw = READ_SYNC(MP), type = META f2fs_submit_page_mbio: dev = (251,0), ino = 2, page_index = 0xa10, oldaddr = 0xa10, newaddr = 0xa10, rw = READ_SYNC(MP), type = META f2fs_submit_page_mbio: dev = (251,0), ino = 2, page_index = 0xc2c, oldaddr = 0xc2c, newaddr = 0xc2c, rw = READ_SYNC(MP), type = META f2fs_submit_page_bio: dev = (251,0), ino = 1, page_index = 0x5db7, oldaddr = 0x6be00, newaddr = 0x6be00, rw = READAHEAD ^H, type = NODE f2fs_submit_page_bio: dev = (251,0), ino = 1, page_index = 0x5db9, oldaddr = 0x6be17, newaddr = 0x6be17, rw = READAHEAD ^H, type = NODE f2fs_submit_page_bio: dev = (251,0), ino = 1, page_index = 0x5dbc, oldaddr = 0x6be1a, newaddr = 0x6be1a, rw = READAHEAD ^H, type = NODE f2fs_submit_page_bio: dev = (251,0), ino = 1, page_index = 0x5dc3, oldaddr = 0x6be20, newaddr = 0x6be20, rw = READAHEAD ^H, type = NODE f2fs_submit_page_bio: dev = (251,0), ino = 1, page_index = 0x5dc7, oldaddr = 0x6be24, newaddr = 0x6be24, rw = READAHEAD ^H, type = NODE f2fs_submit_page_bio: dev = (251,0), ino = 1, page_index = 0x5dc9, oldaddr = 0x6be25, newaddr = 0x6be25, rw = READAHEAD ^H, type = NODE Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c1599b4..cdc44a6 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -423,10 +423,10 @@ static int check_valid_map(struct f2fs_sb_info *sbi, static void gc_node_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, unsigned int segno, int gc_type) { - bool initial = true; struct f2fs_summary *entry; block_t start_addr; int off; + int phase = 0; start_addr = START_BLOCK(sbi, segno); @@ -445,10 +445,18 @@ next_step: if (check_valid_map(sbi, segno, off) == 0) continue; - if (initial) { + if (phase == 0) { + ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, + META_NAT, true); + continue; + } + + if (phase == 1) { ra_node_page(sbi, nid); continue; } + + /* phase == 2 */ node_page = get_node_page(sbi, nid); if (IS_ERR(node_page)) continue; @@ -469,10 +477,8 @@ next_step: stat_inc_node_blk_count(sbi, 1, gc_type); } - if (initial) { - initial = false; + if (++phase < 3) goto next_step; - } } /* @@ -706,6 +712,7 @@ next_step: struct node_info dni; /* dnode info for the data */ unsigned int ofs_in_node, nofs; block_t start_bidx; + nid_t nid = le32_to_cpu(entry->nid); /* stop BG_GC if there is not enough free sections. */ if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) @@ -715,7 +722,13 @@ next_step: continue; if (phase == 0) { - ra_node_page(sbi, le32_to_cpu(entry->nid)); + ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), 1, + META_NAT, true); + continue; + } + + if (phase == 1) { + ra_node_page(sbi, nid); continue; } @@ -723,14 +736,14 @@ next_step: if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs)) continue; - if (phase == 1) { + if (phase == 2) { ra_node_page(sbi, dni.ino); continue; } ofs_in_node = le16_to_cpu(entry->ofs_in_node); - if (phase == 2) { + if (phase == 3) { inode = f2fs_iget(sb, dni.ino); if (IS_ERR(inode) || is_bad_inode(inode)) continue; @@ -756,7 +769,7 @@ next_step: continue; } - /* phase 3 */ + /* phase 4 */ inode = find_gc_inode(gc_list, dni.ino); if (inode) { struct f2fs_inode_info *fi = F2FS_I(inode); @@ -789,7 +802,7 @@ next_step: } } - if (++phase < 4) + if (++phase < 5) goto next_step; } -- cgit v0.10.2 From 9421d57051c534c7477f98d0576b876237fbbc4c Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 28 Aug 2016 18:57:55 +0800 Subject: f2fs: fix to do security initialization of encrypted inode with original filename When creating new inode, security_inode_init_security will be called for initializing security info related to the inode, and filename is passed to security module, it helps security module such as SElinux to know which rule or label could be applied for the inode with specified name. Previously, if new inode is created as an encrypted one, f2fs will transfer encrypted filename to security module which may fail the check of security policy belong to the inode. So in order to this issue, alter to transfer original unencrypted filename instead. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index b3e6f7f..3522f6f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -378,7 +378,8 @@ static int make_empty_dir(struct inode *inode, } struct page *init_inode_metadata(struct inode *inode, struct inode *dir, - const struct qstr *name, struct page *dpage) + const struct qstr *new_name, const struct qstr *orig_name, + struct page *dpage) { struct page *page; int err; @@ -403,7 +404,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, if (err) goto put_error; - err = f2fs_init_security(inode, dir, name, page); + err = f2fs_init_security(inode, dir, orig_name, page); if (err) goto put_error; @@ -420,8 +421,8 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, set_cold_node(inode, page); } - if (name) - init_dent_inode(name, page); + if (new_name) + init_dent_inode(new_name, page); /* * This file should be checkpointed during fsync. @@ -507,6 +508,7 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, } int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, + const struct qstr *orig_name, struct inode *inode, nid_t ino, umode_t mode) { unsigned int bit_pos; @@ -572,7 +574,8 @@ add_dentry: if (inode) { down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, new_name, NULL); + page = init_inode_metadata(inode, dir, new_name, + orig_name, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -622,9 +625,11 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, err = -EAGAIN; if (f2fs_has_inline_dentry(dir)) - err = f2fs_add_inline_entry(dir, &new_name, inode, ino, mode); + err = f2fs_add_inline_entry(dir, &new_name, fname.usr_fname, + inode, ino, mode); if (err == -EAGAIN) - err = f2fs_add_regular_entry(dir, &new_name, inode, ino, mode); + err = f2fs_add_regular_entry(dir, &new_name, fname.usr_fname, + inode, ino, mode); fscrypt_free_filename(&fname); f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); @@ -637,7 +642,7 @@ int f2fs_do_tmpfile(struct inode *inode, struct inode *dir) int err = 0; down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, NULL, NULL); + page = init_inode_metadata(inode, dir, NULL, NULL, NULL); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5d2db47..98c4093 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1926,7 +1926,7 @@ bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *, void do_make_empty_dir(struct inode *, struct inode *, struct f2fs_dentry_ptr *); struct page *init_inode_metadata(struct inode *, struct inode *, - const struct qstr *, struct page *); + const struct qstr *, const struct qstr *, struct page *); void update_parent_metadata(struct inode *, struct inode *, unsigned int); int room_for_filename(const void *, int, int); void f2fs_drop_nlink(struct inode *, struct inode *); @@ -1940,7 +1940,7 @@ int update_dent_inode(struct inode *, struct inode *, const struct qstr *); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, const struct qstr *, f2fs_hash_t , unsigned int); int f2fs_add_regular_entry(struct inode *, const struct qstr *, - struct inode *, nid_t, umode_t); + const struct qstr *, struct inode *, nid_t, umode_t); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, umode_t); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, @@ -2310,8 +2310,8 @@ bool recover_inline_data(struct inode *, struct page *); struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct fscrypt_name *, struct page **); int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *); -int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *, - nid_t, umode_t); +int f2fs_add_inline_entry(struct inode *, const struct qstr *, + const struct qstr *, struct inode *, nid_t, umode_t); void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *, struct inode *, struct inode *); bool f2fs_empty_inline_dir(struct inode *); diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index ccea873..a96c825 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -424,7 +424,7 @@ static int f2fs_add_inline_entries(struct inode *dir, ino = le32_to_cpu(de->ino); fake_mode = get_de_type(de) << S_SHIFT; - err = f2fs_add_regular_entry(dir, &new_name, NULL, + err = f2fs_add_regular_entry(dir, &new_name, NULL, NULL, ino, fake_mode); if (err) goto punch_dentry_pages; @@ -488,17 +488,17 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage, return f2fs_move_rehashed_dirents(dir, ipage, inline_dentry); } -int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, - struct inode *inode, nid_t ino, umode_t mode) +int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, + const struct qstr *orig_name, + struct inode *inode, nid_t ino, umode_t mode) { struct f2fs_sb_info *sbi = F2FS_I_SB(dir); struct page *ipage; unsigned int bit_pos; f2fs_hash_t name_hash; - size_t namelen = name->len; struct f2fs_inline_dentry *dentry_blk = NULL; struct f2fs_dentry_ptr d; - int slots = GET_DENTRY_SLOTS(namelen); + int slots = GET_DENTRY_SLOTS(new_name->len); struct page *page = NULL; int err = 0; @@ -519,7 +519,8 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, if (inode) { down_write(&F2FS_I(inode)->i_sem); - page = init_inode_metadata(inode, dir, name, ipage); + page = init_inode_metadata(inode, dir, new_name, + orig_name, ipage); if (IS_ERR(page)) { err = PTR_ERR(page); goto fail; @@ -528,9 +529,9 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name, f2fs_wait_on_page_writeback(ipage, NODE, true); - name_hash = f2fs_dentry_hash(name); + name_hash = f2fs_dentry_hash(new_name); make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); - f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos); + f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); set_page_dirty(ipage); -- cgit v0.10.2 From e06f86e61d7a67fe6e826010f57aa39c674f4b1b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 28 Aug 2016 22:00:12 +0800 Subject: f2fs crypto: avoid unneeded memory allocation in ->readdir When decrypting dirents in ->readdir, fscrypt_fname_disk_to_usr won't change content of original encrypted dirent, we don't need to allocate additional buffer for storing mirror of it, so get rid of it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 3522f6f..fbc8ede 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -796,16 +796,9 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d, int save_len = fstr->len; int ret; - de_name.name = f2fs_kmalloc(de_name.len, GFP_NOFS); - if (!de_name.name) - return false; - - memcpy(de_name.name, d->filename[bit_pos], de_name.len); - ret = fscrypt_fname_disk_to_usr(d->inode, (u32)de->hash_code, 0, &de_name, fstr); - kfree(de_name.name); if (ret < 0) return true; -- cgit v0.10.2 From 167451efb53c7999fb72591c46f29de09cd8f8b0 Mon Sep 17 00:00:00 2001 From: Shuoran Liu Date: Mon, 29 Aug 2016 11:27:55 +0800 Subject: f2fs: set encryption name flag in add inline entry path This patch sets encryption name flag in the add inline entry path if filename is encrypted. Signed-off-by: Shuoran Liu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index a96c825..8a96107 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -525,6 +525,8 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, err = PTR_ERR(page); goto fail; } + if (f2fs_encrypted_inode(dir)) + file_set_enc_name(inode); } f2fs_wait_on_page_writeback(ipage, NODE, true); -- cgit v0.10.2 From 275b66b09e85cf0520dc610dd89706952751a473 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 29 Aug 2016 23:58:34 +0800 Subject: f2fs: support async discard Like most filesystems, f2fs will issue discard command synchronously, so when user trigger fstrim through ioctl, multiple discard commands will be issued serially with sync mode, which makes poor performance. In this patch we try to support async discard, so that all discard commands can be issued and be waited for endio in batch to improve performance. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index cd0443d..64a685d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1197,6 +1197,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_bug_on(sbi, prefree_segments(sbi)); flush_sit_entries(sbi, cpc); clear_prefree_segments(sbi, cpc); + f2fs_wait_all_discard_bio(sbi); unblock_operations(sbi); goto out; } @@ -1216,6 +1217,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* unlock all the fs_lock[] in do_checkpoint() */ err = do_checkpoint(sbi, cpc); + f2fs_wait_all_discard_bio(sbi); + unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 98c4093..c2478a1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -211,6 +211,13 @@ struct discard_entry { int len; /* # of consecutive blocks of the discard */ }; +struct bio_entry { + struct list_head list; + struct bio *bio; + struct completion event; + int error; +}; + /* for the list of fsync inodes, used only during recovery */ struct fsync_inode_entry { struct list_head list; /* list head */ @@ -645,6 +652,7 @@ struct f2fs_sm_info { /* for small discard management */ struct list_head discard_list; /* 4KB discard list */ + struct list_head wait_list; /* linked with issued discard bio */ int nr_discards; /* # of discards in the list */ int max_discards; /* max. discards to be issued */ @@ -2026,6 +2034,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *); void invalidate_blocks(struct f2fs_sb_info *, block_t); bool is_checkpointed_data(struct f2fs_sb_info *, block_t); void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); +void f2fs_wait_all_discard_bio(struct f2fs_sb_info *); void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); void release_discard_addrs(struct f2fs_sb_info *); bool discard_next_dnode(struct f2fs_sb_info *, block_t); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 9e652d5..2f38bbb 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -636,6 +636,8 @@ out: invalidate = true; } + f2fs_wait_all_discard_bio(sbi); + /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) sync_meta_pages(sbi, META, LONG_MAX); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d0f74eb..93c5e26 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -26,6 +26,7 @@ #define __reverse_ffz(x) __reverse_ffs(~(x)) static struct kmem_cache *discard_entry_slab; +static struct kmem_cache *bio_entry_slab; static struct kmem_cache *sit_entry_set_slab; static struct kmem_cache *inmem_entry_slab; @@ -580,6 +581,74 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } +static struct bio_entry *__add_bio_entry(struct f2fs_sb_info *sbi, + struct bio *bio) +{ + struct list_head *wait_list = &(SM_I(sbi)->wait_list); + struct bio_entry *be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS); + + INIT_LIST_HEAD(&be->list); + be->bio = bio; + init_completion(&be->event); + list_add_tail(&be->list, wait_list); + + return be; +} + +void f2fs_wait_all_discard_bio(struct f2fs_sb_info *sbi) +{ + struct list_head *wait_list = &(SM_I(sbi)->wait_list); + struct bio_entry *be, *tmp; + + list_for_each_entry_safe(be, tmp, wait_list, list) { + struct bio *bio = be->bio; + int err; + + wait_for_completion_io(&be->event); + err = be->error; + if (err == -EOPNOTSUPP) + err = 0; + + if (err) + f2fs_msg(sbi->sb, KERN_INFO, + "Issue discard failed, ret: %d", err); + + bio_put(bio); + list_del(&be->list); + kmem_cache_free(bio_entry_slab, be); + } +} + +static void f2fs_submit_bio_wait_endio(struct bio *bio) +{ + struct bio_entry *be = (struct bio_entry *)bio->bi_private; + + be->error = bio->bi_error; + complete(&be->event); +} + +/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ +int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) +{ + struct block_device *bdev = sbi->sb->s_bdev; + struct bio *bio = NULL; + int err; + + err = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags, + &bio); + if (!err && bio) { + struct bio_entry *be = __add_bio_entry(sbi, bio); + + bio->bi_private = be; + bio->bi_end_io = f2fs_submit_bio_wait_endio; + bio->bi_opf |= REQ_SYNC; + submit_bio(bio); + } + + return err; +} + static int f2fs_issue_discard(struct f2fs_sb_info *sbi, block_t blkstart, block_t blklen) { @@ -597,7 +666,7 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, sbi->discard_blks--; } trace_f2fs_issue_discard(sbi->sb, blkstart, blklen); - return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0); + return __f2fs_issue_discard_async(sbi, start, len, GFP_NOFS, 0); } bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) @@ -719,11 +788,14 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct list_head *head = &(SM_I(sbi)->discard_list); struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct blk_plug plug; unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; unsigned int secno, start_segno; bool force = (cpc->reason == CP_DISCARD); + blk_start_plug(&plug); + mutex_lock(&dirty_i->seglist_lock); while (1) { @@ -772,6 +844,8 @@ skip: SM_I(sbi)->nr_discards -= entry->len; kmem_cache_free(discard_entry_slab, entry); } + + blk_finish_plug(&plug); } static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) @@ -2457,6 +2531,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; INIT_LIST_HEAD(&sm_info->discard_list); + INIT_LIST_HEAD(&sm_info->wait_list); sm_info->nr_discards = 0; sm_info->max_discards = 0; @@ -2600,10 +2675,15 @@ int __init create_segment_manager_caches(void) if (!discard_entry_slab) goto fail; + bio_entry_slab = f2fs_kmem_cache_create("bio_entry", + sizeof(struct bio_entry)); + if (!bio_entry_slab) + goto destory_discard_entry; + sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", sizeof(struct sit_entry_set)); if (!sit_entry_set_slab) - goto destory_discard_entry; + goto destroy_bio_entry; inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry", sizeof(struct inmem_pages)); @@ -2613,6 +2693,8 @@ int __init create_segment_manager_caches(void) destroy_sit_entry_set: kmem_cache_destroy(sit_entry_set_slab); +destroy_bio_entry: + kmem_cache_destroy(bio_entry_slab); destory_discard_entry: kmem_cache_destroy(discard_entry_slab); fail: @@ -2622,6 +2704,7 @@ fail: void destroy_segment_manager_caches(void) { kmem_cache_destroy(sit_entry_set_slab); + kmem_cache_destroy(bio_entry_slab); kmem_cache_destroy(discard_entry_slab); kmem_cache_destroy(inmem_entry_slab); } -- cgit v0.10.2 From bbf156f7afa7f3cc07177f1119878f6f60855fd1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 29 Aug 2016 18:23:45 -0700 Subject: f2fs: fix lost xattrs of directories This patch enhances the xattr consistency of dirs from suddern power-cuts. Possible scenario would be: 1. dir->setxattr used by per-file encryption 2. file->setxattr goes into inline_xattr 3. file->fsync In that case, we should do checkpoint for #1. Otherwise we'd lose dir's key information for the file given #2. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 64a685d..727e97e 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1152,6 +1152,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) clear_prefree_segments(sbi, cpc); clear_sbi_flag(sbi, SBI_IS_DIRTY); + clear_sbi_flag(sbi, SBI_NEED_CP); return 0; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c2478a1..b9611d4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -756,6 +756,7 @@ enum { SBI_NEED_FSCK, /* need fsck.f2fs to fix */ SBI_POR_DOING, /* recovery is doing or not */ SBI_NEED_SB_WRITE, /* need to recover superblock */ + SBI_NEED_CP, /* need to checkpoint */ }; enum { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7c6ee7e..21aa99b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -135,6 +135,8 @@ static inline bool need_do_checkpoint(struct inode *inode) if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) need_cp = true; + else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) + need_cp = true; else if (file_enc_name(inode) && need_dentry_mark(sbi, inode->i_ino)) need_cp = true; else if (file_wrong_pino(inode)) diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index c8898b5..d39a792 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -548,6 +548,8 @@ static int __f2fs_setxattr(struct inode *inode, int index, !strcmp(name, F2FS_XATTR_NAME_ENCRYPTION_CONTEXT)) f2fs_set_encrypted_inode(inode); f2fs_mark_inode_dirty_sync(inode); + if (!error && S_ISDIR(inode->i_mode)) + set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP); exit: kzfree(base_addr); return error; -- cgit v0.10.2 From e7ba108a06216dae89a64c0243560502276b92d8 Mon Sep 17 00:00:00 2001 From: Shuoran Liu Date: Mon, 29 Aug 2016 11:27:56 +0800 Subject: f2fs: add roll-forward recovery process for encrypted dentry Add roll-forward recovery process for encrypted dentry, so the first fsync issued to an encrypted file does not need writing checkpoint. This improves the performance of the following test at thousands of small files: open -> write -> fsync -> close Signed-off-by: Shuoran Liu Acked-by: Chao Yu [Jaegeuk Kim: modify kernel message to show encrypted names] Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index fbc8ede..9316d8a 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -215,31 +215,17 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, return de; } -/* - * Find an entry in the specified directory with the wanted name. - * It returns the page where the entry was found (as a parameter - res_page), - * and the entry itself. Page is returned mapped and unlocked. - * Entry is guaranteed to be valid. - */ -struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, - const struct qstr *child, struct page **res_page) +struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, + struct fscrypt_name *fname, struct page **res_page) { unsigned long npages = dir_blocks(dir); struct f2fs_dir_entry *de = NULL; unsigned int max_depth; unsigned int level; - struct fscrypt_name fname; - int err; - - err = fscrypt_setup_filename(dir, child, 1, &fname); - if (err) { - *res_page = ERR_PTR(err); - return NULL; - } if (f2fs_has_inline_dentry(dir)) { *res_page = NULL; - de = find_in_inline_dir(dir, &fname, res_page); + de = find_in_inline_dir(dir, fname, res_page); goto out; } @@ -259,11 +245,35 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, for (level = 0; level < max_depth; level++) { *res_page = NULL; - de = find_in_level(dir, level, &fname, res_page); + de = find_in_level(dir, level, fname, res_page); if (de || IS_ERR(*res_page)) break; } out: + return de; +} + +/* + * Find an entry in the specified directory with the wanted name. + * It returns the page where the entry was found (as a parameter - res_page), + * and the entry itself. Page is returned mapped and unlocked. + * Entry is guaranteed to be valid. + */ +struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, + const struct qstr *child, struct page **res_page) +{ + struct f2fs_dir_entry *de = NULL; + struct fscrypt_name fname; + int err; + + err = fscrypt_setup_filename(dir, child, 1, &fname); + if (err) { + *res_page = ERR_PTR(err); + return NULL; + } + + de = __f2fs_find_entry(dir, &fname, res_page); + fscrypt_free_filename(&fname); return de; } @@ -605,6 +615,26 @@ fail: return err; } +int __f2fs_do_add_link(struct inode *dir, struct fscrypt_name *fname, + struct inode *inode, nid_t ino, umode_t mode) +{ + struct qstr new_name; + int err = -EAGAIN; + + new_name.name = fname_name(fname); + new_name.len = fname_len(fname); + + if (f2fs_has_inline_dentry(dir)) + err = f2fs_add_inline_entry(dir, &new_name, fname->usr_fname, + inode, ino, mode); + if (err == -EAGAIN) + err = f2fs_add_regular_entry(dir, &new_name, fname->usr_fname, + inode, ino, mode); + + f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); + return err; +} + /* * Caller should grab and release a rwsem by calling f2fs_lock_op() and * f2fs_unlock_op(). @@ -613,26 +643,15 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *inode, nid_t ino, umode_t mode) { struct fscrypt_name fname; - struct qstr new_name; int err; err = fscrypt_setup_filename(dir, name, 0, &fname); if (err) return err; - new_name.name = fname_name(&fname); - new_name.len = fname_len(&fname); - - err = -EAGAIN; - if (f2fs_has_inline_dentry(dir)) - err = f2fs_add_inline_entry(dir, &new_name, fname.usr_fname, - inode, ino, mode); - if (err == -EAGAIN) - err = f2fs_add_regular_entry(dir, &new_name, fname.usr_fname, - inode, ino, mode); + err = __f2fs_do_add_link(dir, &fname, inode, ino, mode); fscrypt_free_filename(&fname); - f2fs_update_time(F2FS_I_SB(dir), REQ_TIME); return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b9611d4..2064dc3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1939,6 +1939,8 @@ struct page *init_inode_metadata(struct inode *, struct inode *, void update_parent_metadata(struct inode *, struct inode *, unsigned int); int room_for_filename(const void *, int, int); void f2fs_drop_nlink(struct inode *, struct inode *); +struct f2fs_dir_entry *__f2fs_find_entry(struct inode *, struct fscrypt_name *, + struct page **); struct f2fs_dir_entry *f2fs_find_entry(struct inode *, const struct qstr *, struct page **); struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); @@ -1950,6 +1952,8 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *, const struct qstr *, f2fs_hash_t , unsigned int); int f2fs_add_regular_entry(struct inode *, const struct qstr *, const struct qstr *, struct inode *, nid_t, umode_t); +int __f2fs_do_add_link(struct inode *, struct fscrypt_name*, struct inode *, + nid_t, umode_t); int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *, nid_t, umode_t); void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 21aa99b..b74e9852 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -137,8 +137,6 @@ static inline bool need_do_checkpoint(struct inode *inode) need_cp = true; else if (is_sbi_flag_set(sbi, SBI_NEED_CP)) need_cp = true; - else if (file_enc_name(inode) && need_dentry_mark(sbi, inode->i_ino)) - need_cp = true; else if (file_wrong_pino(inode)) need_cp = true; else if (!space_for_roll_forward(sbi)) diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2f38bbb..ba0fc2e 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -96,11 +96,12 @@ static int recover_dentry(struct inode *inode, struct page *ipage, struct f2fs_inode *raw_inode = F2FS_INODE(ipage); nid_t pino = le32_to_cpu(raw_inode->i_pino); struct f2fs_dir_entry *de; - struct qstr name; + struct fscrypt_name fname; struct page *page; struct inode *dir, *einode; struct fsync_inode_entry *entry; int err = 0; + char *name; entry = get_fsync_inode(dir_list, pino); if (!entry) { @@ -120,19 +121,17 @@ static int recover_dentry(struct inode *inode, struct page *ipage, dir = entry->inode; - if (file_enc_name(inode)) - return 0; - - name.len = le32_to_cpu(raw_inode->i_namelen); - name.name = raw_inode->i_name; + memset(&fname, 0, sizeof(struct fscrypt_name)); + fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen); + fname.disk_name.name = raw_inode->i_name; - if (unlikely(name.len > F2FS_NAME_LEN)) { + if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) { WARN_ON(1); err = -ENAMETOOLONG; goto out; } retry: - de = f2fs_find_entry(dir, &name, &page); + de = __f2fs_find_entry(dir, &fname, &page); if (de && inode->i_ino == le32_to_cpu(de->ino)) goto out_unmap_put; @@ -156,7 +155,7 @@ retry: } else if (IS_ERR(page)) { err = PTR_ERR(page); } else { - err = __f2fs_add_link(dir, &name, inode, + err = __f2fs_do_add_link(dir, &fname, inode, inode->i_ino, inode->i_mode); } goto out; @@ -165,9 +164,13 @@ out_unmap_put: f2fs_dentry_kunmap(dir, page); f2fs_put_page(page, 0); out: + if (file_enc_name(inode)) + name = ""; + else + name = raw_inode->i_name; f2fs_msg(inode->i_sb, KERN_NOTICE, "%s: ino = %x, name = %s, dir = %lx, err = %d", - __func__, ino_of_node(ipage), raw_inode->i_name, + __func__, ino_of_node(ipage), name, IS_ERR(dir) ? 0 : dir->i_ino, err); return err; } -- cgit v0.10.2 From c2a080aefa94c4b3dad35fbd9fa6cd0c8039b128 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 31 Aug 2016 10:43:19 +0800 Subject: f2fs: fix to set superblock dirty correctly tests/generic/251 of fstest suit complains us with below message: ------------[ cut here ]------------ invalid opcode: 0000 [#1] PREEMPT SMP CPU: 2 PID: 7698 Comm: fstrim Tainted: G O 4.7.0+ #21 task: e9f4e000 task.stack: e7262000 EIP: 0060:[] EFLAGS: 00010202 CPU: 2 EIP is at write_checkpoint+0xfde/0x1020 [f2fs] EAX: f33eb300 EBX: eecac310 ECX: 00000001 EDX: ffff0001 ESI: eecac000 EDI: eecac5f0 EBP: e7263dec ESP: e7263d18 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 CR0: 80050033 CR2: b76ab01c CR3: 2eb89de0 CR4: 000406f0 Stack: 00000001 a220fb7b e9f4e000 00000002 419ff2d3 b3a05151 00000002 e9f4e5d8 e9f4e000 419ff2d3 b3a05151 eecac310 c10b8154 b3a05151 419ff2d3 c10b78bd e9f4e000 e9f4e000 e9f4e5d8 00000001 e9f4e000 ec409000 eecac2cc eecac288 Call Trace: [] ? __lock_acquire+0x3c4/0x760 [] ? mark_held_locks+0x5d/0x80 [] f2fs_trim_fs+0x1c2/0x2e0 [f2fs] [] f2fs_ioctl+0x6b6/0x10b0 [f2fs] [] ? __this_cpu_preempt_check+0xf/0x20 [] ? trace_hardirqs_off_caller+0x91/0x120 [] ? __exchange_data_block+0xd30/0xd30 [f2fs] [] do_vfs_ioctl+0x81/0x7f0 [] ? kmem_cache_free+0x245/0x2e0 [] ? get_unused_fd_flags+0x40/0x40 [] ? putname+0x4c/0x50 [] ? do_sys_open+0x16e/0x1d0 [] ? do_fast_syscall_32+0x30/0x1c0 [] ? __this_cpu_preempt_check+0xf/0x20 [] SyS_ioctl+0x58/0x80 [] do_fast_syscall_32+0xa1/0x1c0 [] sysenter_past_esp+0x45/0x74 EIP: [] write_checkpoint+0xfde/0x1020 [f2fs] SS:ESP 0068:e7263d18 ---[ end trace 4de95d7e6b3aa7c6 ]--- The reason is: with below call stack, we will encounter BUG_ON during doing fstrim. Thread A Thread B - write_checkpoint - do_checkpoint - f2fs_write_inode - update_inode_page - update_inode - set_page_dirty - f2fs_set_node_page_dirty - inc_page_count - percpu_counter_inc - set_sbi_flag(SBI_IS_DIRTY) - clear_sbi_flag(SBI_IS_DIRTY) Thread C Thread D - f2fs_write_node_page - set_node_addr - __set_nat_cache_dirty - nm_i->dirty_nat_cnt++ - do_vfs_ioctl - f2fs_ioctl - f2fs_trim_fs - write_checkpoint - f2fs_bug_on(nm_i->dirty_nat_cnt) Fix it by setting superblock dirty correctly in do_checkpoint and f2fs_write_node_page. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 727e97e..b80dd37 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1154,6 +1154,16 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) clear_sbi_flag(sbi, SBI_IS_DIRTY); clear_sbi_flag(sbi, SBI_NEED_CP); + /* + * redirty superblock if metadata like node page or inode cache is + * updated during writing checkpoint. + */ + if (get_pages(sbi, F2FS_DIRTY_NODES) || + get_pages(sbi, F2FS_DIRTY_IMETA)) + set_sbi_flag(sbi, SBI_IS_DIRTY); + + f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS)); + return 0; } -- cgit v0.10.2 From 6bf6b267d27d381fd43d2ac6152fa25de716ceaa Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 31 Aug 2016 16:20:37 -0700 Subject: f2fs: set dentry bits on random location in memory This fixes pointer panic when using inline_dentry, which was triggered when backporting to 3.10. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 9316d8a..2fb20fc 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -510,7 +510,7 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, de->ino = cpu_to_le32(ino); set_de_type(de, mode); for (i = 0; i < slots; i++) { - test_and_set_bit_le(bit_pos + i, (void *)d->bitmap); + __set_bit_le(bit_pos + i, (void *)d->bitmap); /* avoid wrong garbage data for readdir */ if (i) (de + i)->name_len = 0; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 8a96107..4d526f3 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -566,7 +566,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page, inline_dentry = inline_data_addr(page); bit_pos = dentry - inline_dentry->dentry; for (i = 0; i < slots; i++) - test_and_clear_bit_le(bit_pos + i, + __clear_bit_le(bit_pos + i, &inline_dentry->dentry_bitmap); set_page_dirty(page); -- cgit v0.10.2 From 6ab2a3085eb3c0ad58fa47f827921d05e9ad2e06 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 5 Sep 2016 12:28:26 +0800 Subject: f2fs: fix minor typo Correct typo from 'destory' to 'destroy'. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 93c5e26..ff4b723 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2678,7 +2678,7 @@ int __init create_segment_manager_caches(void) bio_entry_slab = f2fs_kmem_cache_create("bio_entry", sizeof(struct bio_entry)); if (!bio_entry_slab) - goto destory_discard_entry; + goto destroy_discard_entry; sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set", sizeof(struct sit_entry_set)); @@ -2695,7 +2695,7 @@ destroy_sit_entry_set: kmem_cache_destroy(sit_entry_set_slab); destroy_bio_entry: kmem_cache_destroy(bio_entry_slab); -destory_discard_entry: +destroy_discard_entry: kmem_cache_destroy(discard_entry_slab); fail: return -ENOMEM; -- cgit v0.10.2 From 7732c26ac3925e2aebfa84e14673240201ddc9de Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 5 Sep 2016 12:28:27 +0800 Subject: f2fs: fix to detect temporary name of multimedia file Some applications may create multimeida file with temporary name like '*.jpg.tmp' or '*.mp4.tmp', then rename to '*.jpg' or '*.mp4'. Now, f2fs can only detect multimedia filename with specified format: "filename + '.' + extension", so it will make f2fs missing to detect multimedia file with special temporary name, result in failing to set cold flag on file. This patch enhances detection flow for enabling lookup extension in the middle of temporary filename. Reported-by: Xue Liu Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 73fa356..bfcd9da 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -91,18 +91,23 @@ static int is_multimedia_file(const unsigned char *s, const char *sub) { size_t slen = strlen(s); size_t sublen = strlen(sub); + int i; /* * filename format of multimedia file should be defined as: - * "filename + '.' + extension". + * "filename + '.' + extension + (optional: '.' + temp extension)". */ if (slen < sublen + 2) return 0; - if (s[slen - sublen - 1] != '.') - return 0; + for (i = 1; i < slen - sublen; i++) { + if (s[i] != '.') + continue; + if (!strncasecmp(s + i + 1, sub, sublen)) + return 1; + } - return !strncasecmp(s + slen - sublen, sub, sublen); + return 0; } /* -- cgit v0.10.2 From 68f313935fb205822ed1f923f7833639f3c78573 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 6 Sep 2016 13:31:56 -0700 Subject: f2fs: no need to make zeros beyond i_size We don't need to make zeros beyond i_size, since we already wrote that through NEW_ADDR case. Reported-by: Al Viro Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7c8e219..8ffb480 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1647,15 +1647,6 @@ repeat: if (PageUptodate(page)) goto out_clear; - if ((pos & PAGE_MASK) >= i_size_read(inode)) { - unsigned start = pos & (PAGE_SIZE - 1); - unsigned end = start + len; - - /* Reading beyond i_size is simple: memset to zero */ - zero_user_segments(page, 0, start, end, PAGE_SIZE); - goto out_update; - } - if (blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); } else { -- cgit v0.10.2 From 34b5d5c22d64273319a525cb4e9f2d073df9f4a0 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 6 Sep 2016 15:55:54 -0700 Subject: f2fs: avoid page allocation for truncating partial inline_data When truncating cached inline_data, we don't need to allocate a new page all the time. Instead, it must check its page cache only. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b74e9852..3b62949 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -523,7 +523,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, return 0; if (cache_only) { - page = f2fs_grab_cache_page(mapping, index, false); + page = find_lock_page(mapping, index); if (page && PageUptodate(page)) goto truncate_out; f2fs_put_page(page, 1); -- cgit v0.10.2 From ed214a11830a12a83511eb32415e71f1a0760b8a Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 1 Sep 2016 10:14:39 +0800 Subject: f2fs: forbid to do fstrim if fs has some error This patch skip fstrim if sbi set SBI_NEED_FSCK flag Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ff4b723..3ff4621 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1353,6 +1353,12 @@ int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range) if (end <= MAIN_BLKADDR(sbi)) goto out; + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) { + f2fs_msg(sbi->sb, KERN_WARNING, + "Found FS corruption, run fsck to fix."); + goto out; + } + /* start/end segment number in main_area */ start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start); end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 : -- cgit v0.10.2 From 7f3037a5ec0672e03f96d4b0b86169c4c48e479e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 1 Sep 2016 12:02:51 -0700 Subject: f2fs: check free_sections for defragmentation Fix wrong condition check for defragmentation of a file. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8ffb480..357a423 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1293,7 +1293,7 @@ write: if (!wbc->for_reclaim) need_balance_fs = true; - else if (has_not_enough_free_secs(sbi, 0)) + else if (has_not_enough_free_secs(sbi, 0, 0)) goto redirty_out; err = -EAGAIN; @@ -1625,7 +1625,7 @@ repeat: if (err) goto fail; - if (need_balance && has_not_enough_free_secs(sbi, 0)) { + if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) { unlock_page(page); f2fs_balance_fs(sbi, true); lock_page(page); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3b62949..b8a521f 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1961,7 +1961,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, * avoid defragment running in SSR mode when free section are allocated * intensively */ - if (has_not_enough_free_secs(sbi, sec_num)) { + if (has_not_enough_free_secs(sbi, 0, sec_num)) { err = -EAGAIN; goto out; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index cdc44a6..24acbbb 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -439,7 +439,7 @@ next_step: struct node_info ni; /* stop BG_GC if there is not enough free sections. */ - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) return; if (check_valid_map(sbi, segno, off) == 0) @@ -715,7 +715,7 @@ next_step: nid_t nid = le32_to_cpu(entry->nid); /* stop BG_GC if there is not enough free sections. */ - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0)) + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) return; if (check_valid_map(sbi, segno, off) == 0) @@ -916,7 +916,7 @@ gc_more: goto stop; } - if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) { + if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed, 0)) { gc_type = FG_GC; /* * If there is no victim and no prefree segment but still not @@ -927,7 +927,7 @@ gc_more: prefree_segments(sbi)) { write_checkpoint(sbi, &cpc); segno = NULL_SEGNO; - } else if (has_not_enough_free_secs(sbi, 0)) { + } else if (has_not_enough_free_secs(sbi, 0, 0)) { write_checkpoint(sbi, &cpc); } } @@ -944,7 +944,7 @@ gc_more: sbi->cur_victim_sec = NULL_SEGNO; if (!sync) { - if (has_not_enough_free_secs(sbi, sec_freed)) + if (has_not_enough_free_secs(sbi, sec_freed, 0)) goto gc_more; if (gc_type == FG_GC) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 3ff4621..101b58f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -356,7 +356,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) * We should do GC or end up with checkpoint, if there are so many dirty * dir/node pages without enough free segments. */ - if (has_not_enough_free_secs(sbi, 0)) { + if (has_not_enough_free_secs(sbi, 0, 0)) { mutex_lock(&sbi->gc_mutex); f2fs_gc(sbi, false); } @@ -1278,7 +1278,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) struct curseg_info *curseg = CURSEG_I(sbi, type); const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; - if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0)) + if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0, 0)) return v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR); @@ -1477,7 +1477,7 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, /* direct_io'ed data is aligned to the segment for better performance */ if (direct_io && curseg->next_blkoff && - !has_not_enough_free_secs(sbi, 0)) + !has_not_enough_free_secs(sbi, 0, 0)) __allocate_new_segments(sbi, type); *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 87156c7..fecb856 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -479,7 +479,8 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi) reserved_sections(sbi) + 1); } -static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) +static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, + int freed, int needed) { int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); @@ -489,8 +490,8 @@ static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, int freed) if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) return false; - return (free_sections(sbi) + freed) <= (node_secs + 2 * dent_secs + - reserved_sections(sbi)); + return (free_sections(sbi) + freed) <= + (node_secs + 2 * dent_secs + reserved_sections(sbi) + needed); } static inline bool excess_prefree_segs(struct f2fs_sb_info *sbi) -- cgit v0.10.2 From f4702d61eb53466251eeb677f9784e047e1caf0c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 9 Sep 2016 16:48:15 -0700 Subject: f2fs: add common iget in add_fsync_inode There is no functional change. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ba0fc2e..12692777 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -68,14 +68,20 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, return NULL; } -static struct fsync_inode_entry *add_fsync_inode(struct list_head *head, - struct inode *inode) +static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi, + struct list_head *head, nid_t ino) { + struct inode *inode = f2fs_iget(sbi->sb, ino); struct fsync_inode_entry *entry; + if (IS_ERR(inode)) + return ERR_CAST(inode); + entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); - if (!entry) - return NULL; + if (!entry) { + iput(inode); + return ERR_PTR(-ENOMEM); + } entry->inode = inode; list_add_tail(&entry->list, head); @@ -105,16 +111,10 @@ static int recover_dentry(struct inode *inode, struct page *ipage, entry = get_fsync_inode(dir_list, pino); if (!entry) { - dir = f2fs_iget(inode->i_sb, pino); - if (IS_ERR(dir)) { - err = PTR_ERR(dir); - goto out; - } - - entry = add_fsync_inode(dir_list, dir); - if (!entry) { - err = -ENOMEM; - iput(dir); + entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, pino); + if (IS_ERR(entry)) { + dir = ERR_CAST(entry); + err = PTR_ERR(entry); goto out; } } @@ -228,7 +228,6 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) { unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); struct curseg_info *curseg; - struct inode *inode; struct page *page = NULL; block_t blkaddr; int err = 0; @@ -266,23 +265,15 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) * CP | dnode(F) | inode(DF) * For this case, we should not give up now. */ - inode = f2fs_iget(sbi->sb, ino_of_node(page)); - if (IS_ERR(inode)) { - err = PTR_ERR(inode); + entry = add_fsync_inode(sbi, head, ino_of_node(page)); + if (IS_ERR(entry)) { + err = PTR_ERR(entry); if (err == -ENOENT) { err = 0; goto next; } break; } - - /* add this fsync inode to the list */ - entry = add_fsync_inode(head, inode); - if (!entry) { - err = -ENOMEM; - iput(inode); - break; - } } entry->blkaddr = blkaddr; -- cgit v0.10.2 From e8ea9b3d7e278d2ef4b60e703f780ceee70cb331 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 9 Sep 2016 16:59:39 -0700 Subject: f2fs: avoid ENOMEM during roll-forward recovery This patch gives another chances during roll-forward recovery regarding to -ENOMEM. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2064dc3..5d2aa6a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1910,6 +1910,7 @@ long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); */ void f2fs_set_inode_flags(struct inode *); struct inode *f2fs_iget(struct super_block *, unsigned long); +struct inode *f2fs_iget_retry(struct super_block *, unsigned long); int try_to_free_nats(struct f2fs_sb_info *, int); int update_inode(struct inode *, struct page *); int update_inode_page(struct inode *); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 9ac5efc..ac4daa5 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "f2fs.h" @@ -234,6 +235,20 @@ bad_inode: return ERR_PTR(ret); } +struct inode *f2fs_iget_retry(struct super_block *sb, unsigned long ino) +{ + struct inode *inode; +retry: + inode = f2fs_iget(sb, ino); + if (IS_ERR(inode)) { + if (PTR_ERR(inode) == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + } + return inode; +} + int update_inode(struct inode *inode, struct page *node_page) { struct f2fs_inode *ri; diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 8a28800..2322a8e 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2013,10 +2013,12 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) if (unlikely(old_ni.blk_addr != NULL_ADDR)) return -EINVAL; - +retry: ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false); - if (!ipage) - return -ENOMEM; + if (!ipage) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } /* Should not use this inode from free nid list */ remove_free_nid(NM_I(sbi), ino); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 12692777..ad748e5 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -71,18 +71,14 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi, struct list_head *head, nid_t ino) { - struct inode *inode = f2fs_iget(sbi->sb, ino); + struct inode *inode; struct fsync_inode_entry *entry; + inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) return ERR_CAST(inode); - entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); - if (!entry) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - + entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO); entry->inode = inode; list_add_tail(&entry->list, head); @@ -136,7 +132,7 @@ retry: goto out_unmap_put; if (de) { - einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); + einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino)); if (IS_ERR(einode)) { WARN_ON(1); err = PTR_ERR(einode); @@ -158,6 +154,8 @@ retry: err = __f2fs_do_add_link(dir, &fname, inode, inode->i_ino, inode->i_mode); } + if (err == -ENOMEM) + goto retry; goto out; out_unmap_put: @@ -357,7 +355,7 @@ got_it: if (ino != dn->inode->i_ino) { /* Deallocate previous index in the node page */ - inode = f2fs_iget(sbi->sb, ino); + inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) return PTR_ERR(inode); } else { @@ -425,10 +423,15 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, end = start + ADDRS_PER_PAGE(page, inode); set_new_dnode(&dn, inode, NULL, NULL, 0); - +retry_dn: err = get_dnode_of_data(&dn, start, ALLOC_NODE); - if (err) + if (err) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry_dn; + } goto out; + } f2fs_wait_on_page_writeback(dn.node_page, NODE, true); @@ -479,11 +482,16 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, if (err) goto err; } - +retry_prev: /* Check the previous node page having this index */ err = check_index_in_prev_nodes(sbi, dest, &dn); - if (err) + if (err) { + if (err == -ENOMEM) { + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry_prev; + } goto err; + } /* write dummy data page */ f2fs_replace_block(sbi, &dn, src, dest, -- cgit v0.10.2 From 9512929608891830f4194e1959b667b43d86e619 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 9 Sep 2016 08:38:20 +0800 Subject: MAINTAINERS: update f2fs entry This patch includes below modifications: 1. change my maintainership from reviewer to maintainer. 2. remove maintainership of Changman Lee since he is not active about one and a half year. 3. change website of f2fs from wiki to kernel one. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/MAINTAINERS b/MAINTAINERS index 0bbe4b1..bd28973 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5060,10 +5060,9 @@ F: include/linux/fscrypto.h F2FS FILE SYSTEM M: Jaegeuk Kim -M: Changman Lee -R: Chao Yu +M: Chao Yu L: linux-f2fs-devel@lists.sourceforge.net -W: http://en.wikipedia.org/wiki/F2FS +W: https://f2fs.wiki.kernel.org/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git S: Maintained F: Documentation/filesystems/f2fs.txt -- cgit v0.10.2 From 61e4da1172d18f5277be847a40559eacd3169ce7 Mon Sep 17 00:00:00 2001 From: Fan Li Date: Sat, 10 Sep 2016 11:19:37 +0800 Subject: f2fs: fix parameters of __exchange_data_block __exchange_data_block should take block indexes as parameters instead of offsets in bytes. Signed-off-by: Fan li Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index b8a521f..40fe72e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2143,8 +2143,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, f2fs_balance_fs(sbi, true); f2fs_lock_op(sbi); - ret = __exchange_data_block(src, dst, pos_in, - pos_out, len >> F2FS_BLKSIZE_BITS, false); + ret = __exchange_data_block(src, dst, pos_in >> F2FS_BLKSIZE_BITS, + pos_out >> F2FS_BLKSIZE_BITS, + len >> F2FS_BLKSIZE_BITS, false); if (!ret) { if (dst_max_i_size) -- cgit v0.10.2 From 649d7df29ca83b2c9e81a4a305a8de8ab02b5e9d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 6 Sep 2016 11:02:03 -0700 Subject: f2fs: fix to set PageUptodate in f2fs_write_end correctly Previously, f2fs_write_begin sets PageUptodate all the time. But, when user tries to update the entire page (i.e., len == PAGE_SIZE), we need to consider that the page is able to be copied partially afterwards. In such the case, we will lose the remaing region in the page. This patch fixes this by setting PageUptodate in f2fs_write_end as given copied result. In the short copy case, it returns zero to let generic_perform_write retry copying user data again. As a result, f2fs_write_end() works: PageUptodate len copied return retry 1. no 4096 4096 4096 false -> return 4096 2. no 4096 1024 0 true -> goto #1 case 3. yes 2048 2048 2048 false -> return 2048 4. yes 2048 1024 1024 false -> return 1024 Suggested-by: Al Viro Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 357a423..528c3c0 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1642,13 +1642,12 @@ repeat: if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr); - if (len == PAGE_SIZE) - goto out_update; - if (PageUptodate(page)) - goto out_clear; + if (len == PAGE_SIZE || PageUptodate(page)) + return 0; if (blkaddr == NEW_ADDR) { zero_user_segment(page, 0, PAGE_SIZE); + SetPageUptodate(page); } else { struct bio *bio; @@ -1676,11 +1675,6 @@ repeat: goto fail; } } -out_update: - if (!PageUptodate(page)) - SetPageUptodate(page); -out_clear: - clear_cold_data(page); return 0; fail: @@ -1698,11 +1692,26 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); + /* + * This should be come from len == PAGE_SIZE, and we expect copied + * should be PAGE_SIZE. Otherwise, we treat it with zero copied and + * let generic_perform_write() try to copy data again through copied=0. + */ + if (!PageUptodate(page)) { + if (unlikely(copied != PAGE_SIZE)) + copied = 0; + else + SetPageUptodate(page); + } + if (!copied) + goto unlock_out; + set_page_dirty(page); + clear_cold_data(page); if (pos + copied > i_size_read(inode)) f2fs_i_size_write(inode, pos + copied); - +unlock_out: f2fs_put_page(page, 1); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return copied; -- cgit v0.10.2 From d95fd91c1ac1f551909ccdc4fcc80159521d0ef5 Mon Sep 17 00:00:00 2001 From: Fan Li Date: Tue, 13 Sep 2016 11:35:42 +0800 Subject: f2fs: exclude special cases for f2fs_move_file_range When src and dst is the same file, and the latter part of source region overlaps with the former part of destination region, current implement will overwrite data which hasn't been moved yet and truncate data in overlapped region. This patch return -EINVAL when such cases occur and return 0 when source region and destination region is actually the same part of the same file. Signed-off-by: Fan li Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 40fe72e..d400380 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2092,6 +2092,13 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst)) return -EOPNOTSUPP; + if (src == dst) { + if (pos_in == pos_out) + return 0; + if (pos_out > pos_in && pos_out < pos_in + len) + return -EINVAL; + } + inode_lock(src); if (src != dst) { if (!inode_trylock(dst)) { -- cgit v0.10.2 From 49ed09dd85e58a758557087a0abb330591e983cc Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Wed, 14 Sep 2016 03:00:02 +0800 Subject: f2fs: remove dead code f2fs_check_acl The macro f2fs_check_acl is defined but never used since the initial commit, this patch removes the code that has been dead for several years. Signed-off-by: Tiezhu Yang Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/acl.h b/fs/f2fs/acl.h index b2334d1..2c68518 100644 --- a/fs/f2fs/acl.h +++ b/fs/f2fs/acl.h @@ -41,7 +41,6 @@ extern int f2fs_set_acl(struct inode *, struct posix_acl *, int); extern int f2fs_init_acl(struct inode *, struct inode *, struct page *, struct page *); #else -#define f2fs_check_acl NULL #define f2fs_get_acl NULL #define f2fs_set_acl NULL -- cgit v0.10.2 From 5905f9afa27234f74423f7276d0833fed6a9a415 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 12 Sep 2016 15:08:37 -0700 Subject: f2fs: handle error in recover_orphan_inode This patch enhances the error path in recover_orphan_inode. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b80dd37..df56a43 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -531,8 +531,9 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode; + struct node_info ni; - inode = f2fs_iget(sbi->sb, ino); + inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) { /* * there should be a bug that we can't find the entry @@ -546,6 +547,22 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) /* truncate all the data during iput */ iput(inode); + + get_node_info(sbi, ino, &ni); + + /* ENOMEM was fully retried in f2fs_evict_inode. */ + if (ni.blk_addr != NULL_ADDR) { + int err = acquire_orphan_inode(sbi); + + if (err) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: orphan failed (ino=%x), run fsck to fix.", + __func__, ino); + return err; + } + __add_ino_entry(sbi, ino, ORPHAN_INO); + } return 0; } -- cgit v0.10.2 From ebfa732217fd1dba7118aa5d37455fbf2f94c6b7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 18 Sep 2016 23:30:03 +0800 Subject: f2fs: make f2fs_filetype_table static There is no more user of f2fs_filetype_table outside of dir.c, make it static. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 2fb20fc..39a850b 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -37,7 +37,7 @@ static unsigned int bucket_blocks(unsigned int level) return 4; } -unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { +static unsigned char f2fs_filetype_table[F2FS_FT_MAX] = { [F2FS_FT_UNKNOWN] = DT_UNKNOWN, [F2FS_FT_REG_FILE] = DT_REG, [F2FS_FT_DIR] = DT_DIR, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5d2aa6a..951b9b3 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1926,7 +1926,6 @@ struct dentry *f2fs_get_parent(struct dentry *child); /* * dir.c */ -extern unsigned char f2fs_filetype_table[F2FS_FT_MAX]; void set_de_type(struct f2fs_dir_entry *, umode_t); unsigned char get_de_type(struct f2fs_dir_entry *); struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *, -- cgit v0.10.2 From 866969668aebe9626c083b9ababc8f88454ce049 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 18 Sep 2016 23:30:04 +0800 Subject: f2fs: fix to return error number of read_all_xattrs correctly We treat all error in read_all_xattrs as a no memory error, which covers the real reason of failure in it. Fix it by return correct errno in order to reflect the real cause. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index d39a792..1f74876 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -217,18 +217,20 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index, return entry; } -static void *read_all_xattrs(struct inode *inode, struct page *ipage) +static int read_all_xattrs(struct inode *inode, struct page *ipage, + void **base_addr) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_header *header; size_t size = PAGE_SIZE, inline_size = 0; void *txattr_addr; + int err; inline_size = inline_xattr_size(inode); txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO); if (!txattr_addr) - return NULL; + return -ENOMEM; /* read from inline xattr */ if (inline_size) { @@ -239,8 +241,10 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage) inline_addr = inline_xattr_addr(ipage); } else { page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) + if (IS_ERR(page)) { + err = PTR_ERR(page); goto fail; + } inline_addr = inline_xattr_addr(page); } memcpy(txattr_addr, inline_addr, inline_size); @@ -254,8 +258,10 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage) /* The inode already has an extended attribute block. */ xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); - if (IS_ERR(xpage)) + if (IS_ERR(xpage)) { + err = PTR_ERR(xpage); goto fail; + } xattr_addr = page_address(xpage); memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE); @@ -269,10 +275,11 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage) header->h_magic = cpu_to_le32(F2FS_XATTR_MAGIC); header->h_refcount = cpu_to_le32(1); } - return txattr_addr; + *base_addr = txattr_addr; + return 0; fail: kzfree(txattr_addr); - return NULL; + return err; } static inline int write_all_xattrs(struct inode *inode, __u32 hsize, @@ -366,9 +373,9 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name, if (len > F2FS_NAME_LEN) return -ERANGE; - base_addr = read_all_xattrs(inode, ipage); - if (!base_addr) - return -ENOMEM; + error = read_all_xattrs(inode, ipage, &base_addr); + if (error) + return error; entry = __find_xattr(base_addr, index, len, name); if (IS_XATTR_LAST_ENTRY(entry)) { @@ -402,9 +409,9 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) int error = 0; size_t rest = buffer_size; - base_addr = read_all_xattrs(inode, NULL); - if (!base_addr) - return -ENOMEM; + error = read_all_xattrs(inode, NULL, &base_addr); + if (error) + return error; list_for_each_xattr(entry, base_addr) { const struct xattr_handler *handler = @@ -463,9 +470,9 @@ static int __f2fs_setxattr(struct inode *inode, int index, if (size > MAX_VALUE_LEN(inode)) return -E2BIG; - base_addr = read_all_xattrs(inode, ipage); - if (!base_addr) - return -ENOMEM; + error = read_all_xattrs(inode, ipage, &base_addr); + if (error) + return error; /* find entry with wanted name. */ here = __find_xattr(base_addr, index, len, name); -- cgit v0.10.2 From 8b038c70dfe4fd7b62573917a9e976f826ac6ad3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 18 Sep 2016 23:30:07 +0800 Subject: f2fs: support IO error injection This patch adds to support IO error injection for testing IO error tolerance of f2fs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 528c3c0..25e3c30 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -34,6 +34,11 @@ static void f2fs_read_end_io(struct bio *bio) struct bio_vec *bvec; int i; +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(FAULT_IO)) + bio->bi_error = -EIO; +#endif + if (f2fs_bio_encrypted(bio)) { if (bio->bi_error) { fscrypt_release_ctx(bio->bi_private); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 951b9b3..f28b3f8 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -46,6 +46,7 @@ enum { FAULT_BLOCK, FAULT_DIR_DEPTH, FAULT_EVICT_INODE, + FAULT_IO, FAULT_MAX, }; @@ -77,6 +78,8 @@ static inline bool time_to_inject(int type) return false; else if (type == FAULT_EVICT_INODE && !IS_FAULT_SET(type)) return false; + else if (type == FAULT_IO && !IS_FAULT_SET(type)) + return false; atomic_inc(&f2fs_fault.inject_ops); if (atomic_read(&f2fs_fault.inject_ops) >= f2fs_fault.inject_rate) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 555217f..29e3cf4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -50,6 +50,7 @@ char *fault_name[FAULT_MAX] = { [FAULT_BLOCK] = "no more block", [FAULT_DIR_DEPTH] = "too big dir depth", [FAULT_EVICT_INODE] = "evict_inode fail", + [FAULT_IO] = "IO error", }; static void f2fs_build_fault_attr(unsigned int rate) -- cgit v0.10.2 From 5bc994a043470c3ee544edaefbf93406d00f8de7 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sun, 18 Sep 2016 23:30:08 +0800 Subject: f2fs: show dirty inode number This patch enables showing dirty inode number in procfs. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ae13521..fb245bd 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -45,6 +45,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS); si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META); si->ndirty_data = get_pages(sbi, F2FS_DIRTY_DATA); + si->ndirty_imeta = get_pages(sbi, F2FS_DIRTY_IMETA); si->ndirty_dirs = sbi->ndirty_inode[DIR_INODE]; si->ndirty_files = sbi->ndirty_inode[FILE_INODE]; si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; @@ -319,6 +320,8 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_data, si->ndirty_files); seq_printf(s, " - meta: %4lld in %4d\n", si->ndirty_meta, si->meta_pages); + seq_printf(s, " - imeta: %4lld\n", + si->ndirty_imeta); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", si->dirty_nats, si->nats, si->dirty_sits, si->sits); seq_printf(s, " - free_nids: %9d\n", diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f28b3f8..b39fdcd 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2156,7 +2156,8 @@ struct f2fs_stat_info { unsigned long long hit_largest, hit_cached, hit_rbtree; unsigned long long hit_total, total_ext; int ext_tree, zombie_tree, ext_node; - s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, inmem_pages; + s64 ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; + s64 inmem_pages; unsigned int ndirty_dirs, ndirty_files, ndirty_all; int nats, dirty_nats, sits, dirty_sits, fnids; int total_count, utilization; -- cgit v0.10.2 From 5d4c0af41fd4cc26cb75af4f3de7fb63c91209c1 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Sun, 18 Sep 2016 08:16:56 +0800 Subject: f2fs: preallocate blocks for encrypted file This patch allow preallocates data blocks for buffered aio writes in encrypted file. Signed-off-by: Yunlei He Reviewed-by: Chao Yu [Jaegeuk Kim: fix to avoid BUG_ON] Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 25e3c30..637b81d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -639,9 +639,6 @@ ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) map.m_next_pgofs = NULL; - if (f2fs_encrypted_inode(inode)) - return 0; - if (iocb->ki_flags & IOCB_DIRECT) { ret = f2fs_convert_inline_inode(inode); if (ret) @@ -1532,8 +1529,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, * we already allocated all the blocks, so we don't need to get * the block addresses when there is no need to fill the page. */ - if (!f2fs_has_inline_data(inode) && !f2fs_encrypted_inode(inode) && - len == PAGE_SIZE) + if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE) return 0; if (f2fs_has_inline_data(inode) || diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 101b58f..c4d0472 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1675,11 +1675,9 @@ void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi, { struct page *cpage; - if (blkaddr == NEW_ADDR) + if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) return; - f2fs_bug_on(sbi, blkaddr == NULL_ADDR); - cpage = find_lock_page(META_MAPPING(sbi), blkaddr); if (cpage) { f2fs_wait_on_page_writeback(cpage, DATA, true); -- cgit v0.10.2 From a468f0ef516fda9c7d91bb550d458e853d76955e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 19 Sep 2016 17:55:10 -0700 Subject: f2fs: use crc and cp version to determine roll-forward recovery Previously, we used cp_version only to detect recoverable dnodes. In order to avoid same garbage cp_version, we needed to truncate the next dnode during checkpoint, resulting in additional discard or data write. If we can distinguish this by using crc in addition to cp_version, we can remove this overhead. There is backward compatibility concern where it changes node_footer layout. So, this patch introduces a new checkpoint flag, CP_CRC_RECOVERY_FLAG, to detect new layout. New layout will be activated only when this flag is set. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index df56a43..9c6439b 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -992,7 +992,6 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); - struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; nid_t last_nid = nm_i->next_scan_nid; @@ -1001,19 +1000,10 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) __u32 crc32 = 0; int i; int cp_payload_blks = __cp_payload(sbi); - block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg); - bool invalidate = false; struct super_block *sb = sbi->sb; struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE); u64 kbytes_written; - /* - * This avoids to conduct wrong roll-forward operations and uses - * metapages, so should be called prior to sync_meta_pages below. - */ - if (!test_opt(sbi, LFS) && discard_next_dnode(sbi, discard_blk)) - invalidate = true; - /* Flush all the NAT/SIT pages */ while (get_pages(sbi, F2FS_DIRTY_META)) { sync_meta_pages(sbi, META, LONG_MAX); @@ -1089,6 +1079,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) set_ckpt_flags(ckpt, CP_FSCK_FLAG); + /* set this flag to activate crc|cp_ver for recovery */ + set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); + /* update SIT/NAT bitmap */ get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); @@ -1154,14 +1147,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* wait for previous submitted meta pages writeback */ wait_on_all_pages_writeback(sbi); - /* - * invalidate meta page which is used temporarily for zeroing out - * block at the end of warm node chain. - */ - if (invalidate) - invalidate_mapping_pages(META_MAPPING(sbi), discard_blk, - discard_blk); - release_ino_entry(sbi, false); if (unlikely(f2fs_cp_error(sbi))) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b39fdcd..cda8e6f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2045,7 +2045,6 @@ void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); void f2fs_wait_all_discard_bio(struct f2fs_sb_info *); void clear_prefree_segments(struct f2fs_sb_info *, struct cp_control *); void release_discard_addrs(struct f2fs_sb_info *); -bool discard_next_dnode(struct f2fs_sb_info *, block_t); int npages_for_summary_flush(struct f2fs_sb_info *, bool); void allocate_new_segments(struct f2fs_sb_info *); int f2fs_trim_fs(struct f2fs_sb_info *, struct fstrim_range *); diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index fc76845..e8ca64a 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -229,6 +229,37 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) f2fs_change_bit(block_off, nm_i->nat_bitmap); } +static inline nid_t ino_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le32_to_cpu(rn->footer.ino); +} + +static inline nid_t nid_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le32_to_cpu(rn->footer.nid); +} + +static inline unsigned int ofs_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + unsigned flag = le32_to_cpu(rn->footer.flag); + return flag >> OFFSET_BIT_SHIFT; +} + +static inline __u64 cpver_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le64_to_cpu(rn->footer.cp_ver); +} + +static inline block_t next_blkaddr_of_node(struct page *node_page) +{ + struct f2fs_node *rn = F2FS_NODE(node_page); + return le32_to_cpu(rn->footer.next_blkaddr); +} + static inline void fill_node_footer(struct page *page, nid_t nid, nid_t ino, unsigned int ofs, bool reset) { @@ -259,40 +290,30 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); struct f2fs_node *rn = F2FS_NODE(page); + size_t crc_offset = le32_to_cpu(ckpt->checksum_offset); + __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver); - rn->footer.cp_ver = ckpt->checkpoint_ver; + if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { + __u64 crc = le32_to_cpu(*((__le32 *) + ((unsigned char *)ckpt + crc_offset))); + cp_ver |= (crc << 32); + } + rn->footer.cp_ver = cpu_to_le64(cp_ver); rn->footer.next_blkaddr = cpu_to_le32(blkaddr); } -static inline nid_t ino_of_node(struct page *node_page) -{ - struct f2fs_node *rn = F2FS_NODE(node_page); - return le32_to_cpu(rn->footer.ino); -} - -static inline nid_t nid_of_node(struct page *node_page) +static inline bool is_recoverable_dnode(struct page *page) { - struct f2fs_node *rn = F2FS_NODE(node_page); - return le32_to_cpu(rn->footer.nid); -} - -static inline unsigned int ofs_of_node(struct page *node_page) -{ - struct f2fs_node *rn = F2FS_NODE(node_page); - unsigned flag = le32_to_cpu(rn->footer.flag); - return flag >> OFFSET_BIT_SHIFT; -} - -static inline unsigned long long cpver_of_node(struct page *node_page) -{ - struct f2fs_node *rn = F2FS_NODE(node_page); - return le64_to_cpu(rn->footer.cp_ver); -} + struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page)); + size_t crc_offset = le32_to_cpu(ckpt->checksum_offset); + __u64 cp_ver = cur_cp_version(ckpt); -static inline block_t next_blkaddr_of_node(struct page *node_page) -{ - struct f2fs_node *rn = F2FS_NODE(node_page); - return le32_to_cpu(rn->footer.next_blkaddr); + if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { + __u64 crc = le32_to_cpu(*((__le32 *) + ((unsigned char *)ckpt + crc_offset))); + cp_ver |= (crc << 32); + } + return cpu_to_le64(cp_ver) == cpver_of_node(page); } /* diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index ad748e5..2b8a56d 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -224,7 +224,6 @@ static bool is_same_inode(struct inode *inode, struct page *ipage) static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) { - unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); struct curseg_info *curseg; struct page *page = NULL; block_t blkaddr; @@ -242,7 +241,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) page = get_tmp_page(sbi, blkaddr); - if (cp_ver != cpver_of_node(page)) + if (!is_recoverable_dnode(page)) break; if (!is_fsync_dnode(page)) @@ -516,7 +515,6 @@ out: static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, struct list_head *dir_list) { - unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi)); struct curseg_info *curseg; struct page *page = NULL; int err = 0; @@ -536,7 +534,7 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list, page = get_tmp_page(sbi, blkaddr); - if (cp_ver != cpver_of_node(page)) { + if (!is_recoverable_dnode(page)) { f2fs_put_page(page, 1); break; } @@ -628,37 +626,15 @@ out: } clear_sbi_flag(sbi, SBI_POR_DOING); - if (err) { - bool invalidate = false; - - if (test_opt(sbi, LFS)) { - update_meta_page(sbi, NULL, blkaddr); - invalidate = true; - } else if (discard_next_dnode(sbi, blkaddr)) { - invalidate = true; - } - - f2fs_wait_all_discard_bio(sbi); - - /* Flush all the NAT/SIT pages */ - while (get_pages(sbi, F2FS_DIRTY_META)) - sync_meta_pages(sbi, META, LONG_MAX); - - /* invalidate temporary meta page */ - if (invalidate) - invalidate_mapping_pages(META_MAPPING(sbi), - blkaddr, blkaddr); - + if (err) set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); - mutex_unlock(&sbi->cp_mutex); - } else if (need_writecp) { + mutex_unlock(&sbi->cp_mutex); + + if (!err && need_writecp) { struct cp_control cpc = { .reason = CP_RECOVERY, }; - mutex_unlock(&sbi->cp_mutex); err = write_checkpoint(sbi, &cpc); - } else { - mutex_unlock(&sbi->cp_mutex); } destroy_fsync_dnodes(&dir_list); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c4d0472..2d23d7b 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -669,28 +669,6 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, return __f2fs_issue_discard_async(sbi, start, len, GFP_NOFS, 0); } -bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr) -{ - int err = -EOPNOTSUPP; - - if (test_opt(sbi, DISCARD)) { - struct seg_entry *se = get_seg_entry(sbi, - GET_SEGNO(sbi, blkaddr)); - unsigned int offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); - - if (f2fs_test_bit(offset, se->discard_map)) - return false; - - err = f2fs_issue_discard(sbi, blkaddr, 1); - } - - if (err) { - update_meta_page(sbi, NULL, blkaddr); - return true; - } - return false; -} - static void __add_discard_entry(struct f2fs_sb_info *sbi, struct cp_control *cpc, struct seg_entry *se, unsigned int start, unsigned int end) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 29e3cf4..9649b79 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1827,6 +1827,9 @@ try_onemore: if (need_fsck) set_sbi_flag(sbi, SBI_NEED_FSCK); + if (!retry) + goto skip_recovery; + err = recover_fsync_data(sbi, false); if (err < 0) { need_fsck = true; @@ -1844,7 +1847,7 @@ try_onemore: goto free_kobj; } } - +skip_recovery: /* recover_fsync_data() cleared this already */ clear_sbi_flag(sbi, SBI_POR_DOING); diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 4c02c65..422630b 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -100,6 +100,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_CRC_RECOVERY_FLAG 0x00000040 #define CP_FASTBOOT_FLAG 0x00000020 #define CP_FSCK_FLAG 0x00000010 #define CP_ERROR_FLAG 0x00000008 -- cgit v0.10.2 From 9e1e6df412a28cdbbd2909de5c6189eda4a3383d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 19 Sep 2016 18:13:54 -0700 Subject: f2fs: put directory inodes before checkpoint in roll-forward recovery Before checkpoint, we'd be better drop any inodes. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 2b8a56d..509273a 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -630,6 +630,9 @@ out: set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); mutex_unlock(&sbi->cp_mutex); + /* let's drop all the directory inodes for clean checkpoint */ + destroy_fsync_dnodes(&dir_list); + if (!err && need_writecp) { struct cp_control cpc = { .reason = CP_RECOVERY, @@ -637,7 +640,6 @@ out: err = write_checkpoint(sbi, &cpc); } - destroy_fsync_dnodes(&dir_list); kmem_cache_destroy(fsync_entry_slab); return ret ? ret: err; } -- cgit v0.10.2 From fadb2fb8af5348c1bc59cab17c6f8bf515e50d55 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Sep 2016 10:29:47 +0800 Subject: f2fs: fix to avoid race condition when updating sbi flag Making updating of sbi flag atomic by using {test,set,clear}_bit, otherwise in concurrency scenario, the flag could be updated incorrectly. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cda8e6f..04e9618 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -777,7 +777,7 @@ struct f2fs_sb_info { struct proc_dir_entry *s_proc; /* proc entry */ struct f2fs_super_block *raw_super; /* raw super block pointer */ int valid_super_block; /* valid super block no */ - int s_flag; /* flags for sbi */ + unsigned long s_flag; /* flags for sbi */ #ifdef CONFIG_F2FS_FS_ENCRYPTION u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE]; @@ -1046,17 +1046,17 @@ static inline struct address_space *NODE_MAPPING(struct f2fs_sb_info *sbi) static inline bool is_sbi_flag_set(struct f2fs_sb_info *sbi, unsigned int type) { - return sbi->s_flag & (0x01 << type); + return test_bit(type, &sbi->s_flag); } static inline void set_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type) { - sbi->s_flag |= (0x01 << type); + set_bit(type, &sbi->s_flag); } static inline void clear_sbi_flag(struct f2fs_sb_info *sbi, unsigned int type) { - sbi->s_flag &= ~(0x01 << type); + clear_bit(type, &sbi->s_flag); } static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) -- cgit v0.10.2 From aaec2b1d18792a5f27b69ff37f34f43f89f5aa3b Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 20 Sep 2016 11:04:18 +0800 Subject: f2fs: introduce cp_lock to protect updating of ckpt_flags This patch introduces spinlock to protect updating process of ckpt_flags field in struct f2fs_checkpoint, it avoids incorrectly updating in race condition. Signed-off-by: Chao Yu [Jaegeuk Kim: add __is_set_ckpt_flags likewise __set_ckpt_flags] Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9c6439b..591db95 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -28,7 +28,7 @@ struct kmem_cache *inode_entry_slab; void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io) { - set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); + set_ckpt_flags(sbi, CP_ERROR_FLAG); sbi->sb->s_flags |= MS_RDONLY; if (!end_io) f2fs_flush_merged_bios(sbi); @@ -571,7 +571,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) block_t start_blk, orphan_blocks, i, j; int err; - if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG)) + if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG)) return 0; start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi); @@ -595,7 +595,7 @@ int recover_orphan_inodes(struct f2fs_sb_info *sbi) f2fs_put_page(page, 1); } /* clear Orphan Flag */ - clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG); + clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG); return 0; } @@ -1043,10 +1043,12 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = npages_for_summary_flush(sbi, false); + spin_lock(&sbi->cp_lock); if (data_sum_blocks < NR_CURSEG_DATA_TYPE) - set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); + __set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); else - clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); + __clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG); + spin_unlock(&sbi->cp_lock); orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num); ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks + @@ -1061,26 +1063,29 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) cp_payload_blks + data_sum_blocks + orphan_blocks); + spin_lock(&sbi->cp_lock); if (cpc->reason == CP_UMOUNT) - set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); else - clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); if (cpc->reason == CP_FASTBOOT) - set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); + __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); else - clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); + __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); if (orphan_num) - set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + __set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); else - clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) - set_ckpt_flags(ckpt, CP_FSCK_FLAG); + __set_ckpt_flags(ckpt, CP_FSCK_FLAG); /* set this flag to activate crc|cp_ver for recovery */ - set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); + __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); + + spin_unlock(&sbi->cp_lock); /* update SIT/NAT bitmap */ get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 04e9618..2fb8c35 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -797,6 +797,7 @@ struct f2fs_sb_info { /* for checkpoint */ struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ + spinlock_t cp_lock; /* for flag in ckpt */ struct inode *meta_inode; /* cache meta blocks */ struct mutex cp_mutex; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ @@ -1064,26 +1065,50 @@ static inline unsigned long long cur_cp_version(struct f2fs_checkpoint *cp) return le64_to_cpu(cp->checkpoint_ver); } -static inline bool is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +static inline bool __is_set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) { unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + return ckpt_flags & f; } -static inline void set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +static inline bool is_set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) { - unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + return __is_set_ckpt_flags(F2FS_CKPT(sbi), f); +} + +static inline void __set_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags; + + ckpt_flags = le32_to_cpu(cp->ckpt_flags); ckpt_flags |= f; cp->ckpt_flags = cpu_to_le32(ckpt_flags); } -static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +static inline void set_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) { - unsigned int ckpt_flags = le32_to_cpu(cp->ckpt_flags); + spin_lock(&sbi->cp_lock); + __set_ckpt_flags(F2FS_CKPT(sbi), f); + spin_unlock(&sbi->cp_lock); +} + +static inline void __clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) +{ + unsigned int ckpt_flags; + + ckpt_flags = le32_to_cpu(cp->ckpt_flags); ckpt_flags &= (~f); cp->ckpt_flags = cpu_to_le32(ckpt_flags); } +static inline void clear_ckpt_flags(struct f2fs_sb_info *sbi, unsigned int f) +{ + spin_lock(&sbi->cp_lock); + __clear_ckpt_flags(F2FS_CKPT(sbi), f); + spin_unlock(&sbi->cp_lock); +} + static inline bool f2fs_discard_en(struct f2fs_sb_info *sbi) { struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); @@ -1129,8 +1154,8 @@ static inline bool __remain_node_summaries(int reason) static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) { - return (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG) || - is_set_ckpt_flags(F2FS_CKPT(sbi), CP_FASTBOOT_FLAG)); + return (is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG) || + is_set_ckpt_flags(sbi, CP_FASTBOOT_FLAG)); } /* @@ -1832,7 +1857,7 @@ static inline int f2fs_readonly(struct super_block *sb) static inline bool f2fs_cp_error(struct f2fs_sb_info *sbi) { - return is_set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); + return is_set_ckpt_flags(sbi, CP_ERROR_FLAG); } static inline bool is_dot_dotdot(const struct qstr *str) diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index e8ca64a..868bec6 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -293,7 +293,7 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) size_t crc_offset = le32_to_cpu(ckpt->checksum_offset); __u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver); - if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { + if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { __u64 crc = le32_to_cpu(*((__le32 *) ((unsigned char *)ckpt + crc_offset))); cp_ver |= (crc << 32); @@ -308,7 +308,7 @@ static inline bool is_recoverable_dnode(struct page *page) size_t crc_offset = le32_to_cpu(ckpt->checksum_offset); __u64 cp_ver = cur_cp_version(ckpt); - if (is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { + if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) { __u64 crc = le32_to_cpu(*((__le32 *) ((unsigned char *)ckpt + crc_offset))); cp_ver |= (crc << 32); diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 509273a..2fc84a9 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -627,7 +627,7 @@ out: clear_sbi_flag(sbi, SBI_POR_DOING); if (err) - set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); + set_ckpt_flags(sbi, CP_ERROR_FLAG); mutex_unlock(&sbi->cp_mutex); /* let's drop all the directory inodes for clean checkpoint */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2d23d7b..e78501c 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1801,7 +1801,7 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) int type = CURSEG_HOT_DATA; int err; - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { + if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) { int npages = npages_for_summary_flush(sbi, true); if (npages >= 2) @@ -1898,7 +1898,7 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi, void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk) { - if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) + if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) write_compacted_summaries(sbi, start_blk); else write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9649b79..95986a9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -736,7 +736,7 @@ static void f2fs_put_super(struct super_block *sb) * clean checkpoint again. */ if (is_sbi_flag_set(sbi, SBI_IS_DIRTY) || - !is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG)) { + !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { struct cp_control cpc = { .reason = CP_UMOUNT, }; @@ -1478,6 +1478,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi) mutex_init(&sbi->umount_mutex); mutex_init(&sbi->wio_mutex[NODE]); mutex_init(&sbi->wio_mutex[DATA]); + spin_lock_init(&sbi->cp_lock); #ifdef CONFIG_F2FS_FS_ENCRYPTION memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX, @@ -1819,7 +1820,7 @@ try_onemore: * previous checkpoint was not done by clean system shutdown. */ if (bdev_read_only(sb->s_bdev) && - !is_set_ckpt_flags(sbi->ckpt, CP_UMOUNT_FLAG)) { + !is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) { err = -EROFS; goto free_kobj; } -- cgit v0.10.2 From 5b7a487cf32d3a266fea83d590d3226b5ad817a7 Mon Sep 17 00:00:00 2001 From: Weichao Guo Date: Tue, 20 Sep 2016 05:03:27 +0800 Subject: f2fs: add customized migrate_page callback This patch improves the migration of dirty pages and allows migrating atomic written pages that F2FS uses in Page Cache. Instead of the fallback releasing page path, it provides better performance for memory compaction, CMA and other users of memory page migrating. For dirty pages, there is no need to write back first when migrating. For an atomic written page before committing, we can migrate the page and update the related 'inmem_pages' list at the same time. Signed-off-by: Weichao Guo Reviewed-by: Chao Yu [Jaegeuk Kim: fix some coding style] Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 591db95..64e3892 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -388,6 +388,9 @@ const struct address_space_operations f2fs_meta_aops = { .set_page_dirty = f2fs_set_meta_page_dirty, .invalidatepage = f2fs_invalidate_page, .releasepage = f2fs_release_page, +#ifdef CONFIG_MIGRATION + .migratepage = f2fs_migrate_page, +#endif }; static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 637b81d..ed834cd 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1883,6 +1883,58 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) return generic_block_bmap(mapping, block, get_data_block_bmap); } +#ifdef CONFIG_MIGRATION +#include + +int f2fs_migrate_page(struct address_space *mapping, + struct page *newpage, struct page *page, enum migrate_mode mode) +{ + int rc, extra_count; + struct f2fs_inode_info *fi = F2FS_I(mapping->host); + bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page); + + BUG_ON(PageWriteback(page)); + + /* migrating an atomic written page is safe with the inmem_lock hold */ + if (atomic_written && !mutex_trylock(&fi->inmem_lock)) + return -EAGAIN; + + /* + * A reference is expected if PagePrivate set when move mapping, + * however F2FS breaks this for maintaining dirty page counts when + * truncating pages. So here adjusting the 'extra_count' make it work. + */ + extra_count = (atomic_written ? 1 : 0) - page_has_private(page); + rc = migrate_page_move_mapping(mapping, newpage, + page, NULL, mode, extra_count); + if (rc != MIGRATEPAGE_SUCCESS) { + if (atomic_written) + mutex_unlock(&fi->inmem_lock); + return rc; + } + + if (atomic_written) { + struct inmem_pages *cur; + list_for_each_entry(cur, &fi->inmem_pages, list) + if (cur->page == page) { + cur->page = newpage; + break; + } + mutex_unlock(&fi->inmem_lock); + put_page(page); + get_page(newpage); + } + + if (PagePrivate(page)) + SetPagePrivate(newpage); + set_page_private(newpage, page_private(page)); + + migrate_page_copy(newpage, page); + + return MIGRATEPAGE_SUCCESS; +} +#endif + const struct address_space_operations f2fs_dblock_aops = { .readpage = f2fs_read_data_page, .readpages = f2fs_read_data_pages, @@ -1895,4 +1947,7 @@ const struct address_space_operations f2fs_dblock_aops = { .releasepage = f2fs_release_page, .direct_IO = f2fs_direct_IO, .bmap = f2fs_bmap, +#ifdef CONFIG_MIGRATION + .migratepage = f2fs_migrate_page, +#endif }; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2fb8c35..0903191 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2152,6 +2152,10 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *, u64, u64); void f2fs_set_page_dirty_nobuffers(struct page *); void f2fs_invalidate_page(struct page *, unsigned int, unsigned int); int f2fs_release_page(struct page *, gfp_t); +#ifdef CONFIG_MIGRATION +int f2fs_migrate_page(struct address_space *, struct page *, struct page *, + enum migrate_mode); +#endif /* * gc.c diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 2322a8e..55c22a9 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1670,6 +1670,9 @@ const struct address_space_operations f2fs_node_aops = { .set_page_dirty = f2fs_set_node_page_dirty, .invalidatepage = f2fs_invalidate_page, .releasepage = f2fs_release_page, +#ifdef CONFIG_MIGRATION + .migratepage = f2fs_migrate_page, +#endif }; static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, -- cgit v0.10.2 From 2956e450fa08669ebf1541acb07843b5aa6acf96 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 21 Sep 2016 09:28:06 -0700 Subject: f2fs: assign return value in f2fs_gc This patch adds a return value of write_checkpoint for f2fs_gc. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 24acbbb..400bc6d 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -925,10 +925,14 @@ gc_more: */ if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi)) { - write_checkpoint(sbi, &cpc); + ret = write_checkpoint(sbi, &cpc); + if (ret) + goto stop; segno = NULL_SEGNO; } else if (has_not_enough_free_secs(sbi, 0, 0)) { - write_checkpoint(sbi, &cpc); + ret = write_checkpoint(sbi, &cpc); + if (ret) + goto stop; } } @@ -948,7 +952,7 @@ gc_more: goto gc_more; if (gc_type == FG_GC) - write_checkpoint(sbi, &cpc); + ret = write_checkpoint(sbi, &cpc); } stop: mutex_unlock(&sbi->gc_mutex); -- cgit v0.10.2 From f6fe2be3c6d6f0127742ae1cc2e3ffe9ad31ea8b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 21 Sep 2016 09:34:48 -0700 Subject: f2fs: should put_page for summary page We should call put_page for preloaded summary pages in do_garbage_collect. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 400bc6d..a9a3c9f 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -848,7 +848,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, for (segno = start_segno; segno < end_segno; segno++) { if (get_valid_blocks(sbi, segno, 1) == 0) - continue; + goto next; /* find segment summary of victim */ sum_page = find_get_page(META_MAPPING(sbi), @@ -874,7 +874,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, gc_type); stat_inc_seg_count(sbi, type, gc_type); - +next: f2fs_put_page(sum_page, 0); } -- cgit v0.10.2 From 646e759a4d09062df943eaf61cb8141a91204380 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 21 Sep 2016 09:37:23 -0700 Subject: f2fs: avoid gc in cp_error case Otherwise, we can hit f2fs_bug_on(sbi, !PageUptodate(sum_page)); Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a9a3c9f..b9d6c42 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -847,7 +847,8 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, for (segno = start_segno; segno < end_segno; segno++) { - if (get_valid_blocks(sbi, segno, 1) == 0) + if (get_valid_blocks(sbi, segno, 1) == 0 || + unlikely(f2fs_cp_error(sbi))) goto next; /* find segment summary of victim */ -- cgit v0.10.2 From d41065e20484c53d8976742367bb78ac248fb0d5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 21 Sep 2016 11:39:42 -0700 Subject: f2fs: handle errors during recover_orphan_inodes This patch fixes to handle EIO during recover_orphan_inode() given the below panic. F2FS-fs : inject IO error in f2fs_read_end_io+0xe6/0x100 [f2fs] ------------[ cut here ]------------ RIP: 0010:[] [] f2fs_evict_inode+0x433/0x470 [f2fs] RSP: 0018:ffff92f8b7fb7c30 EFLAGS: 00010246 RAX: ffff92fb88a13500 RBX: ffff92f890566ea0 RCX: 00000000fd3c255c RDX: 0000000000000001 RSI: ffff92fb88a13d90 RDI: ffff92fb8ee127e8 RBP: ffff92f8b7fb7c58 R08: 0000000000000001 R09: ffff92fb88a13d58 R10: 000000005a6a9373 R11: 0000000000000001 R12: 00000000fffffffb R13: ffff92fb8ee12000 R14: 00000000000034ca R15: ffff92fb8ee12620 FS: 00007f1fefd8e880(0000) GS:ffff92fb95600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc211d34cdb CR3: 000000012d43a000 CR4: 00000000001406e0 Stack: ffff92f890566ea0 ffff92f890567078 ffffffffc0b5a0c0 ffff92f890566f28 ffff92fb888b2000 ffff92f8b7fb7c80 ffffffffbc27ff55 ffff92f890566ea0 ffff92fb8bf10000 ffffffffc0b5a0c0 ffff92f8b7fb7cb0 ffffffffbc28090d Call Trace: [] evict+0xc5/0x1a0 [] iput+0x1ad/0x2c0 [] recover_orphan_inodes+0x10c/0x2e0 [f2fs] [] f2fs_fill_super+0x884/0x1150 [f2fs] [] mount_bdev+0x18c/0x1c0 [] ? f2fs_commit_super+0x100/0x100 [f2fs] [] f2fs_mount+0x15/0x20 [f2fs] [] mount_fs+0x39/0x170 [] vfs_kern_mount+0x6b/0x160 [] do_mount+0x1cf/0xd00 [] ? copy_mount_options+0xac/0x170 [] SyS_mount+0x83/0xd0 [] entry_SYSCALL_64_fastpath+0x23/0xc1 Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 64e3892..15c0006 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -535,6 +535,17 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) { struct inode *inode; struct node_info ni; + int err = acquire_orphan_inode(sbi); + + if (err) { + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: orphan failed (ino=%x), run fsck to fix.", + __func__, ino); + return err; + } + + __add_ino_entry(sbi, ino, ORPHAN_INO); inode = f2fs_iget_retry(sbi->sb, ino); if (IS_ERR(inode)) { @@ -555,17 +566,13 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) /* ENOMEM was fully retried in f2fs_evict_inode. */ if (ni.blk_addr != NULL_ADDR) { - int err = acquire_orphan_inode(sbi); - - if (err) { - set_sbi_flag(sbi, SBI_NEED_FSCK); - f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", - __func__, ino); - return err; - } - __add_ino_entry(sbi, ino, ORPHAN_INO); + set_sbi_flag(sbi, SBI_NEED_FSCK); + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: orphan failed (ino=%x), run fsck to fix.", + __func__, ino); + return -EIO; } + __remove_ino_entry(sbi, ino, ORPHAN_INO); return 0; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 95986a9..e7bb153 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1893,6 +1893,7 @@ free_root_inode: sb->s_root = NULL; free_node_inode: mutex_lock(&sbi->umount_mutex); + release_ino_entry(sbi, true); f2fs_leave_shrinker(sbi); iput(sbi->node_inode); mutex_unlock(&sbi->umount_mutex); -- cgit v0.10.2 From ef68bf119788be4b468aa6d72362c8c5bbc3e073 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Thu, 22 Sep 2016 13:31:48 -0700 Subject: f2fs: do not unnecessarily null-terminate encrypted symlink data Null-terminating the fscrypt_symlink_data on read is unnecessary because it is not string data --- it contains binary ciphertext. Signed-off-by: Eric Biggers Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index bfcd9da..300aef8 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -1015,7 +1015,6 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, struct fscrypt_str cstr = FSTR_INIT(NULL, 0); struct fscrypt_str pstr = FSTR_INIT(NULL, 0); struct fscrypt_symlink_data *sd; - loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1); u32 max_size = inode->i_sb->s_blocksize; int res; @@ -1030,7 +1029,6 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry, if (IS_ERR(cpage)) return ERR_CAST(cpage); caddr = page_address(cpage); - caddr[size] = 0; /* Symlink is encrypted */ sd = (struct fscrypt_symlink_data *)caddr; -- cgit v0.10.2 From bb5dada7d24778c6a315ded220ade20068305050 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 23 Sep 2016 11:29:00 -0700 Subject: f2fs: remove dirty inode pages in error path When getting EIO while handling orphan inodes, we can get some dirty node pages. Then, f2fs_write_node_pages() called by iput(node_inode) will try to flush node pages. But in this case, we should prevent to do that, since we will try again from the start. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index e7bb153..fbded38 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1892,6 +1892,7 @@ free_root_inode: dput(sb->s_root); sb->s_root = NULL; free_node_inode: + truncate_inode_pages_final(NODE_MAPPING(sbi)); mutex_lock(&sbi->umount_mutex); release_ino_entry(sbi, true); f2fs_leave_shrinker(sbi); -- cgit v0.10.2 From d32853de5080e7f0c2962de0058ea71d1e244f76 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 23 Sep 2016 13:24:56 +0800 Subject: f2fs: adjust display format of segment bit Just adjust segment bit info printed in procfs. Before: 1008 5|0 |0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1009 3|183|0 0 61 20 20 0 0 21 80 c0 2 e4 e 54 0 21 21 17 a 44 d0 28 e4 50 40 30 8 0 2d 32 0 5 b0 80 1 43 2 8e f8 7b 2 25 93 bf e0 73 8e 9a 19 44 60 ff e4 cc e6 8e bf f9 ff 5 3d 31 3d 13 1010 3|1 |0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 40 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 After: 1008 5|0 | 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 1009 4|434| ff 7d ff bf d9 3f ff e7 ff bf d7 bf ff bb be ff fb df f7 fb fa bf fb fe bb df dd ff fe ef ff fe ef e2 27 bf ab bf fb df fd bd bf fb db fc ff ff 3f ff ff bf ff 5f db 3f fb fb bf fb bf 4f ff ef 1010 4|422| ff bb fe ff ef d7 ee ff ff fc bf ef 7d eb ec fd fb 3f 97 7f ef ff af ff db ff ff 69 bf ff f6 e7 ff fb f7 7b fb df be ff ff ef f3 fe ff ff df fe f7 fa ff b7 77 be fe fb a9 7f 87 a2 ac c7 ff 75 Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fbded38..ebf41d5 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -954,7 +954,7 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset) seq_printf(seq, "%d|%-3u|", se->type, get_valid_blocks(sbi, i, 1)); for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) - seq_printf(seq, "%x ", se->cur_valid_map[j]); + seq_printf(seq, " %.2x", se->cur_valid_map[j]); seq_putc(seq, '\n'); } return 0; -- cgit v0.10.2 From 1ecc0c5c50ce8834f7e35b63be7480bf1aaa4155 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 23 Sep 2016 21:30:09 +0800 Subject: f2fs: support configuring fault injection per superblock Previously, we only support global fault injection configuration, so that when we configure type/rate of fault injection through sysfs, mount option, it will influence all f2fs partition which is being used. It is not make sence, since it will be not convenient if developer want to test separated partitions with different fault injection rate/type simultaneously, also it's not possible to enable fault injection in one partition and disable fault injection in other one. >From now on, we move global configuration of fault injection in module into per-superblock, hence injection testing can be more flexible. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 4dcc9e2..1e29630 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c @@ -109,14 +109,16 @@ fail: return ERR_PTR(-EINVAL); } -static void *f2fs_acl_to_disk(const struct posix_acl *acl, size_t *size) +static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi, + const struct posix_acl *acl, size_t *size) { struct f2fs_acl_header *f2fs_acl; struct f2fs_acl_entry *entry; int i; - f2fs_acl = f2fs_kmalloc(sizeof(struct f2fs_acl_header) + acl->a_count * - sizeof(struct f2fs_acl_entry), GFP_NOFS); + f2fs_acl = f2fs_kmalloc(sbi, sizeof(struct f2fs_acl_header) + + acl->a_count * sizeof(struct f2fs_acl_entry), + GFP_NOFS); if (!f2fs_acl) return ERR_PTR(-ENOMEM); @@ -175,7 +177,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type, retval = f2fs_getxattr(inode, name_index, "", NULL, 0, dpage); if (retval > 0) { - value = f2fs_kmalloc(retval, GFP_F2FS_ZERO); + value = f2fs_kmalloc(F2FS_I_SB(inode), retval, GFP_F2FS_ZERO); if (!value) return ERR_PTR(-ENOMEM); retval = f2fs_getxattr(inode, name_index, "", value, @@ -230,7 +232,7 @@ static int __f2fs_set_acl(struct inode *inode, int type, } if (acl) { - value = f2fs_acl_to_disk(acl, &size); + value = f2fs_acl_to_disk(F2FS_I_SB(inode), acl, &size); if (IS_ERR(value)) { clear_inode_flag(inode, FI_ACL_MODE); return (int)PTR_ERR(value); diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 15c0006..5891093 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -494,7 +494,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi) spin_lock(&im->ino_lock); #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(FAULT_ORPHAN)) { + if (time_to_inject(sbi, FAULT_ORPHAN)) { spin_unlock(&im->ino_lock); return -ENOSPC; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ed834cd..8b9a1dc 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -35,7 +35,7 @@ static void f2fs_read_end_io(struct bio *bio) int i; #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(FAULT_IO)) + if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) bio->bi_error = -EIO; #endif diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 39a850b..cbf85f6 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -545,7 +545,7 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name, start: #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(FAULT_DIR_DEPTH)) + if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) return -ENOSPC; #endif if (unlikely(current_depth == MAX_DIR_HASH_DEPTH)) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0903191..896ad9f 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -56,42 +56,8 @@ struct f2fs_fault_info { unsigned int inject_type; }; -extern struct f2fs_fault_info f2fs_fault; extern char *fault_name[FAULT_MAX]; -#define IS_FAULT_SET(type) (f2fs_fault.inject_type & (1 << (type))) - -static inline bool time_to_inject(int type) -{ - if (!f2fs_fault.inject_rate) - return false; - if (type == FAULT_KMALLOC && !IS_FAULT_SET(type)) - return false; - else if (type == FAULT_PAGE_ALLOC && !IS_FAULT_SET(type)) - return false; - else if (type == FAULT_ALLOC_NID && !IS_FAULT_SET(type)) - return false; - else if (type == FAULT_ORPHAN && !IS_FAULT_SET(type)) - return false; - else if (type == FAULT_BLOCK && !IS_FAULT_SET(type)) - return false; - else if (type == FAULT_DIR_DEPTH && !IS_FAULT_SET(type)) - return false; - else if (type == FAULT_EVICT_INODE && !IS_FAULT_SET(type)) - return false; - else if (type == FAULT_IO && !IS_FAULT_SET(type)) - return false; - - atomic_inc(&f2fs_fault.inject_ops); - if (atomic_read(&f2fs_fault.inject_ops) >= f2fs_fault.inject_rate) { - atomic_set(&f2fs_fault.inject_ops, 0); - printk("%sF2FS-fs : inject %s in %pF\n", - KERN_INFO, - fault_name[type], - __builtin_return_address(0)); - return true; - } - return false; -} +#define IS_FAULT_SET(fi, type) (fi->inject_type & (1 << (type))) #endif /* @@ -905,8 +871,37 @@ struct f2fs_sb_info { /* Reference to checksum algorithm driver via cryptoapi */ struct crypto_shash *s_chksum_driver; + + /* For fault injection */ +#ifdef CONFIG_F2FS_FAULT_INJECTION + struct f2fs_fault_info fault_info; +#endif }; +#ifdef CONFIG_F2FS_FAULT_INJECTION +static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) +{ + struct f2fs_fault_info *ffi = &sbi->fault_info; + + if (!ffi->inject_rate) + return false; + + if (!IS_FAULT_SET(ffi, type)) + return false; + + atomic_inc(&ffi->inject_ops); + if (atomic_read(&ffi->inject_ops) >= ffi->inject_rate) { + atomic_set(&ffi->inject_ops, 0); + printk("%sF2FS-fs : inject %s in %pF\n", + KERN_INFO, + fault_name[type], + __builtin_return_address(0)); + return true; + } + return false; +} +#endif + /* For write statistics. Suppose sector size is 512 bytes, * and the return value is in kbytes. s is of struct f2fs_sb_info. */ @@ -1195,7 +1190,7 @@ static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, blkcnt_t diff; #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(FAULT_BLOCK)) + if (time_to_inject(sbi, FAULT_BLOCK)) return false; #endif /* @@ -1429,7 +1424,7 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, if (page) return page; - if (time_to_inject(FAULT_PAGE_ALLOC)) + if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_ALLOC)) return NULL; #endif if (!for_write) @@ -1880,10 +1875,11 @@ static inline bool f2fs_may_extent_tree(struct inode *inode) return S_ISREG(inode->i_mode); } -static inline void *f2fs_kmalloc(size_t size, gfp_t flags) +static inline void *f2fs_kmalloc(struct f2fs_sb_info *sbi, + size_t size, gfp_t flags) { #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(FAULT_KMALLOC)) + if (time_to_inject(sbi, FAULT_KMALLOC)) return NULL; #endif return kmalloc(size, flags); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index b9d6c42..a5c4175 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -96,7 +96,7 @@ int start_gc_thread(struct f2fs_sb_info *sbi) dev_t dev = sbi->sb->s_bdev->bd_dev; int err = 0; - gc_th = f2fs_kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); + gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL); if (!gc_th) { err = -ENOMEM; goto out; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 4d526f3..34234d8 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -445,8 +445,8 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, struct f2fs_inline_dentry *backup_dentry; int err; - backup_dentry = f2fs_kmalloc(sizeof(struct f2fs_inline_dentry), - GFP_F2FS_ZERO); + backup_dentry = f2fs_kmalloc(F2FS_I_SB(dir), + sizeof(struct f2fs_inline_dentry), GFP_F2FS_ZERO); if (!backup_dentry) { f2fs_put_page(ipage, 1); return -ENOMEM; diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ac4daa5..d736989 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -369,7 +369,7 @@ void f2fs_evict_inode(struct inode *inode) goto no_delete; #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(FAULT_EVICT_INODE)) + if (time_to_inject(sbi, FAULT_EVICT_INODE)) goto no_delete; #endif diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 55c22a9..9faddcd 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1839,7 +1839,7 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *i = NULL; retry: #ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(FAULT_ALLOC_NID)) + if (time_to_inject(sbi, FAULT_ALLOC_NID)) return false; #endif if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index ebf41d5..a06eee4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -40,7 +40,6 @@ static struct kmem_cache *f2fs_inode_cachep; static struct kset *f2fs_kset; #ifdef CONFIG_F2FS_FAULT_INJECTION -struct f2fs_fault_info f2fs_fault; char *fault_name[FAULT_MAX] = { [FAULT_KMALLOC] = "kmalloc", @@ -53,14 +52,17 @@ char *fault_name[FAULT_MAX] = { [FAULT_IO] = "IO error", }; -static void f2fs_build_fault_attr(unsigned int rate) +static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, + unsigned int rate) { + struct f2fs_fault_info *ffi = &sbi->fault_info; + if (rate) { - atomic_set(&f2fs_fault.inject_ops, 0); - f2fs_fault.inject_rate = rate; - f2fs_fault.inject_type = (1 << FAULT_MAX) - 1; + atomic_set(&ffi->inject_ops, 0); + ffi->inject_rate = rate; + ffi->inject_type = (1 << FAULT_MAX) - 1; } else { - memset(&f2fs_fault, 0, sizeof(struct f2fs_fault_info)); + memset(ffi, 0, sizeof(struct f2fs_fault_info)); } } #endif @@ -170,7 +172,7 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) #ifdef CONFIG_F2FS_FAULT_INJECTION else if (struct_type == FAULT_INFO_RATE || struct_type == FAULT_INFO_TYPE) - return (unsigned char *)&f2fs_fault; + return (unsigned char *)&sbi->fault_info; #endif return NULL; } @@ -315,6 +317,10 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(dirty_nats_ratio), ATTR_LIST(cp_interval), ATTR_LIST(idle_interval), +#ifdef CONFIG_F2FS_FAULT_INJECTION + ATTR_LIST(inject_rate), + ATTR_LIST(inject_type), +#endif ATTR_LIST(lifetime_write_kbytes), NULL, }; @@ -330,22 +336,6 @@ static struct kobj_type f2fs_ktype = { .release = f2fs_sb_release, }; -#ifdef CONFIG_F2FS_FAULT_INJECTION -/* sysfs for f2fs fault injection */ -static struct kobject f2fs_fault_inject; - -static struct attribute *f2fs_fault_attrs[] = { - ATTR_LIST(inject_rate), - ATTR_LIST(inject_type), - NULL -}; - -static struct kobj_type f2fs_fault_ktype = { - .default_attrs = f2fs_fault_attrs, - .sysfs_ops = &f2fs_attr_ops, -}; -#endif - void f2fs_msg(struct super_block *sb, const char *level, const char *fmt, ...) { struct va_format vaf; @@ -374,7 +364,7 @@ static int parse_options(struct super_block *sb, char *options) int arg = 0; #ifdef CONFIG_F2FS_FAULT_INJECTION - f2fs_build_fault_attr(0); + f2fs_build_fault_attr(sbi, 0); #endif if (!options) @@ -539,7 +529,7 @@ static int parse_options(struct super_block *sb, char *options) if (args->from && match_int(args, &arg)) return -EINVAL; #ifdef CONFIG_F2FS_FAULT_INJECTION - f2fs_build_fault_attr(arg); + f2fs_build_fault_attr(sbi, arg); #else f2fs_msg(sb, KERN_INFO, "FAULT_INJECTION was not selected"); @@ -1993,16 +1983,6 @@ static int __init init_f2fs_fs(void) err = -ENOMEM; goto free_extent_cache; } -#ifdef CONFIG_F2FS_FAULT_INJECTION - f2fs_fault_inject.kset = f2fs_kset; - f2fs_build_fault_attr(0); - err = kobject_init_and_add(&f2fs_fault_inject, &f2fs_fault_ktype, - NULL, "fault_injection"); - if (err) { - f2fs_fault_inject.kset = NULL; - goto free_kset; - } -#endif err = register_shrinker(&f2fs_shrinker_info); if (err) goto free_kset; @@ -2021,10 +2001,6 @@ free_filesystem: free_shrinker: unregister_shrinker(&f2fs_shrinker_info); free_kset: -#ifdef CONFIG_F2FS_FAULT_INJECTION - if (f2fs_fault_inject.kset) - kobject_put(&f2fs_fault_inject); -#endif kset_unregister(f2fs_kset); free_extent_cache: destroy_extent_cache(); @@ -2046,9 +2022,6 @@ static void __exit exit_f2fs_fs(void) f2fs_destroy_root_stats(); unregister_filesystem(&f2fs_fs_type); unregister_shrinker(&f2fs_shrinker_info); -#ifdef CONFIG_F2FS_FAULT_INJECTION - kobject_put(&f2fs_fault_inject); -#endif kset_unregister(f2fs_kset); destroy_extent_cache(); destroy_checkpoint_caches(); -- cgit v0.10.2 From 9c094040c58ea2f6ae81621c6d9057dc0c94f8f8 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Sat, 24 Sep 2016 12:29:18 +0800 Subject: f2fs: remove redundant value definition This patch remove redundant value definition in build_sit_entries Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e78501c..fbcc172 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -2305,6 +2305,8 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA); struct f2fs_journal *journal = curseg->journal; + struct seg_entry *se; + struct f2fs_sit_entry sit; int sit_blk_cnt = SIT_BLK_CNT(sbi); unsigned int i, start, end; unsigned int readed, start_blk = 0; @@ -2317,11 +2319,10 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) end = (start_blk + readed) * sit_i->sents_per_block; for (; start < end && start < MAIN_SEGS(sbi); start++) { - struct seg_entry *se = &sit_i->sentries[start]; struct f2fs_sit_block *sit_blk; - struct f2fs_sit_entry sit; struct page *page; + se = &sit_i->sentries[start]; page = get_current_sit_page(sbi, start); sit_blk = (struct f2fs_sit_block *)page_address(page); sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)]; @@ -2347,8 +2348,6 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) down_read(&curseg->journal_rwsem); for (i = 0; i < sits_in_cursum(journal); i++) { - struct f2fs_sit_entry sit; - struct seg_entry *se; unsigned int old_valid_blocks; start = le32_to_cpu(segno_in_journal(journal, i)); -- cgit v0.10.2 From 36dbd3287fefdfceeb6a82997241c26df29882c8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Sep 2016 19:45:05 +0800 Subject: f2fs: do fault injection initialization in default_options Do fault injection initialization in default_options to keep consistent with other default option configurating. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a06eee4..785d0fd 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -363,10 +363,6 @@ static int parse_options(struct super_block *sb, char *options) char *p, *name; int arg = 0; -#ifdef CONFIG_F2FS_FAULT_INJECTION - f2fs_build_fault_attr(sbi, 0); -#endif - if (!options) return 0; @@ -990,6 +986,10 @@ static void default_options(struct f2fs_sb_info *sbi) #ifdef CONFIG_F2FS_FS_POSIX_ACL set_opt(sbi, POSIX_ACL); #endif + +#ifdef CONFIG_F2FS_FAULT_INJECTION + f2fs_build_fault_attr(sbi, 0); +#endif } static int f2fs_remount(struct super_block *sb, int *flags, char *data) -- cgit v0.10.2 From 2443b8b363917468e2fb163b34c8f14fe1546251 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Sep 2016 19:45:06 +0800 Subject: f2fs: fix to recover old fault injection config in ->remount_fs In ->remount_fs, we didn't recover original fault injection config if we encounter error, fix it. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 785d0fd..40aecd7 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1000,6 +1000,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) bool need_restart_gc = false; bool need_stop_gc = false; bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE); +#ifdef CONFIG_F2FS_FAULT_INJECTION + struct f2fs_fault_info ffi = sbi->fault_info; +#endif /* * Save the old mount options in case we @@ -1095,6 +1098,9 @@ restore_gc: restore_opts: sbi->mount_opt = org_mount_opt; sbi->active_logs = active_logs; +#ifdef CONFIG_F2FS_FAULT_INJECTION + sbi->fault_info = ffi; +#endif return err; } -- cgit v0.10.2 From 0f34802858e74e708c6d42209811f6d264892c8f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 26 Sep 2016 19:45:55 +0800 Subject: f2fs: support checkpoint error injection This patch adds to support checkpoint error injection in f2fs for testing fatal error tolerance, it will be useful that it can simulate abnormal power off by f2fs itself instead of calling godown ioctl by running apps. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 896ad9f..9e8de18 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -47,6 +47,7 @@ enum { FAULT_DIR_DEPTH, FAULT_EVICT_INODE, FAULT_IO, + FAULT_CHECKPOINT, FAULT_MAX, }; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index a5c4175..c9b8a67 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -47,6 +47,11 @@ static int gc_thread_func(void *data) continue; } +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(sbi, FAULT_CHECKPOINT)) + f2fs_stop_checkpoint(sbi, false); +#endif + /* * [GC triggering condition] * 0. GC is not conducted currently. diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index fbcc172..fc886f0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -345,6 +345,11 @@ int commit_inmem_pages(struct inode *inode) */ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) { +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(sbi, FAULT_CHECKPOINT)) + f2fs_stop_checkpoint(sbi, false); +#endif + if (!need) return; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 40aecd7..6132b4c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -50,6 +50,7 @@ char *fault_name[FAULT_MAX] = { [FAULT_DIR_DEPTH] = "too big dir depth", [FAULT_EVICT_INODE] = "evict_inode fail", [FAULT_IO] = "IO error", + [FAULT_CHECKPOINT] = "checkpoint error", }; static void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, -- cgit v0.10.2 From 7fd748df451d57cc40b66f6cd8091c0707497443 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 27 Sep 2016 00:09:53 +0800 Subject: f2fs: remove redundant io plug Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 5891093..2a7b9f5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -267,7 +267,6 @@ static int f2fs_write_meta_pages(struct address_space *mapping, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); - struct blk_plug plug; long diff, written; /* collect a number of dirty meta pages and write together */ @@ -280,9 +279,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping, /* if mounting is failed, skip writing node pages */ mutex_lock(&sbi->cp_mutex); diff = nr_pages_to_write(sbi, META, wbc); - blk_start_plug(&plug); written = sync_meta_pages(sbi, META, wbc->nr_to_write); - blk_finish_plug(&plug); mutex_unlock(&sbi->cp_mutex); wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); return 0; -- cgit v0.10.2 From 3fa565039e3338f60d7e7a8e818835dabdea764b Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Thu, 29 Sep 2016 18:37:31 +0800 Subject: f2fs: remove dead variable Signed-off-by: Sheng Yong Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c9b8a67..93985c6 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -275,7 +275,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); struct victim_sel_policy p; - unsigned int secno, max_cost, last_victim; + unsigned int secno, last_victim; unsigned int last_segment = MAIN_SEGS(sbi); unsigned int nsearched = 0; @@ -285,7 +285,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, select_policy(sbi, gc_type, type, &p); p.min_segno = NULL_SEGNO; - p.min_cost = max_cost = get_max_cost(sbi, &p); + p.min_cost = get_max_cost(sbi, &p); if (p.max_search == 0) goto out; -- cgit v0.10.2 From fc0065adb202518e25fb929cda7d5887a456f774 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Fri, 30 Sep 2016 08:24:53 +0800 Subject: f2fs: introduce get_checkpoint_version for cleanup There exists almost same codes when get the value of pre_version and cur_version in function validate_checkpoint, this patch adds get_checkpoint_version to clean up redundant codes. Signed-off-by: Tiezhu Yang Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 2a7b9f5..7e26328 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -663,45 +663,55 @@ static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk) } } -static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, - block_t cp_addr, unsigned long long *version) +static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, + struct f2fs_checkpoint **cp_block, struct page **cp_page, + unsigned long long *version) { - struct page *cp_page_1, *cp_page_2 = NULL; unsigned long blk_size = sbi->blocksize; - struct f2fs_checkpoint *cp_block; - unsigned long long cur_version = 0, pre_version = 0; - size_t crc_offset; + size_t crc_offset = 0; __u32 crc = 0; - /* Read the 1st cp block in this CP pack */ - cp_page_1 = get_meta_page(sbi, cp_addr); + *cp_page = get_meta_page(sbi, cp_addr); + *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); - /* get the version number */ - cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1); - crc_offset = le32_to_cpu(cp_block->checksum_offset); - if (crc_offset >= blk_size) - goto invalid_cp1; + crc_offset = le32_to_cpu((*cp_block)->checksum_offset); + if (crc_offset >= blk_size) { + f2fs_msg(sbi->sb, KERN_WARNING, + "invalid crc_offset: %zu", crc_offset); + return -EINVAL; + } - crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); - if (!f2fs_crc_valid(sbi, crc, cp_block, crc_offset)) - goto invalid_cp1; + crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block + + crc_offset))); + if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) { + f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value"); + return -EINVAL; + } - pre_version = cur_cp_version(cp_block); + *version = cur_cp_version(*cp_block); + return 0; +} - /* Read the 2nd cp block in this CP pack */ - cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; - cp_page_2 = get_meta_page(sbi, cp_addr); +static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, + block_t cp_addr, unsigned long long *version) +{ + struct page *cp_page_1 = NULL, *cp_page_2 = NULL; + struct f2fs_checkpoint *cp_block = NULL; + unsigned long long cur_version = 0, pre_version = 0; + int err; - cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2); - crc_offset = le32_to_cpu(cp_block->checksum_offset); - if (crc_offset >= blk_size) - goto invalid_cp2; + err = get_checkpoint_version(sbi, cp_addr, &cp_block, + &cp_page_1, version); + if (err) + goto invalid_cp1; + pre_version = *version; - crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset))); - if (!f2fs_crc_valid(sbi, crc, cp_block, crc_offset)) + cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1; + err = get_checkpoint_version(sbi, cp_addr, &cp_block, + &cp_page_2, version); + if (err) goto invalid_cp2; - - cur_version = cur_cp_version(cp_block); + cur_version = *version; if (cur_version == pre_version) { *version = cur_version; -- cgit v0.10.2 From 3f5f4959b144d9ba6657ccc0ab9edcc78fcc1d8a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 29 Sep 2016 18:50:10 +0800 Subject: f2fs: fix to commit bio cache after flushing node pages In sync_node_pages, we won't check and commit last merged pages in private bio cache of f2fs, as these pages were taged as writeback, someone who is waiting for writebacking of the page will be blocked until the cache was committed by someone else. We need to commit node type bio cache to avoid potential deadlock or long delay of waiting writeback. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9faddcd..8831035 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1312,6 +1312,7 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode, struct page *last_page = NULL; bool marked = false; nid_t ino = inode->i_ino; + int nwritten = 0; if (atomic) { last_page = last_fsync_dnode(sbi, ino); @@ -1385,7 +1386,10 @@ continue_unlock: unlock_page(page); f2fs_put_page(last_page, 0); break; + } else { + nwritten++; } + if (page == last_page) { f2fs_put_page(page, 0); marked = true; @@ -1407,6 +1411,9 @@ continue_unlock: unlock_page(last_page); goto retry; } + + if (nwritten) + f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE); return ret ? -EIO: 0; } @@ -1416,6 +1423,7 @@ int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc) struct pagevec pvec; int step = 0; int nwritten = 0; + int ret = 0; pagevec_init(&pvec, 0); @@ -1436,7 +1444,8 @@ next_step: if (unlikely(f2fs_cp_error(sbi))) { pagevec_release(&pvec); - return -EIO; + ret = -EIO; + goto out; } /* @@ -1487,6 +1496,8 @@ continue_unlock: if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc)) unlock_page(page); + else + nwritten++; if (--wbc->nr_to_write == 0) break; @@ -1504,7 +1515,10 @@ continue_unlock: step++; goto next_step; } - return nwritten; +out: + if (nwritten) + f2fs_submit_merged_bio(sbi, NODE, WRITE); + return ret; } int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) -- cgit v0.10.2 From 6ca56ca429aa94399534ec00598f7f9847c4cae2 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 29 Sep 2016 18:50:11 +0800 Subject: f2fs: don't submit irrelevant page While we call ->writepages, there are two cases: a. we didn't writeout any dirty pages, since they are writebacked by other thread concurrently. b. we writeout dirty pages, and have already submitted bio to block layer. In these cases, we don't need to do additional bio flushing unnecessarily, it may split bio in cache into smaller one. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8b9a1dc..0d0177c 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1355,6 +1355,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping, int cycled; int range_whole = 0; int tag; + int nwritten = 0; pagevec_init(&pvec, 0); @@ -1429,6 +1430,8 @@ continue_unlock: done_index = page->index + 1; done = 1; break; + } else { + nwritten++; } if (--wbc->nr_to_write <= 0 && @@ -1450,6 +1453,10 @@ continue_unlock: if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) mapping->writeback_index = done_index; + if (nwritten) + f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host, + NULL, 0, DATA, WRITE); + return ret; } @@ -1491,7 +1498,6 @@ static int f2fs_write_data_pages(struct address_space *mapping, * if some pages were truncated, we cannot guarantee its mapping->host * to detect pending bios. */ - f2fs_submit_merged_bio(sbi, DATA, WRITE); remove_dirty_inode(inode); return ret; -- cgit v0.10.2 From e4c5d8489a41209534699220021dab409e4d4f55 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 30 Sep 2016 17:37:43 -0700 Subject: f2fs: introduce update_ckpt_flags to clean up This patch add update_ckpt_flags() to clean up the flow. Signed-off-by: Jaegeuk Kim diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7e26328..7e9b504 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1006,6 +1006,37 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi) finish_wait(&sbi->cp_wait, &wait); } +static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) +{ + unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + + spin_lock(&sbi->cp_lock); + + if (cpc->reason == CP_UMOUNT) + __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + else + __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); + + if (cpc->reason == CP_FASTBOOT) + __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); + else + __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); + + if (orphan_num) + __set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + else + __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); + + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) + __set_ckpt_flags(ckpt, CP_FSCK_FLAG); + + /* set this flag to activate crc|cp_ver for recovery */ + __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); + + spin_unlock(&sbi->cp_lock); +} + static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) { struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); @@ -1080,29 +1111,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) cp_payload_blks + data_sum_blocks + orphan_blocks); - spin_lock(&sbi->cp_lock); - if (cpc->reason == CP_UMOUNT) - __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - else - __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - - if (cpc->reason == CP_FASTBOOT) - __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); - else - __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); - - if (orphan_num) - __set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - else - __clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG); - - if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) - __set_ckpt_flags(ckpt, CP_FSCK_FLAG); - - /* set this flag to activate crc|cp_ver for recovery */ - __set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG); - - spin_unlock(&sbi->cp_lock); + /* update ckpt flag for checkpoint */ + update_ckpt_flags(sbi, cpc); /* update SIT/NAT bitmap */ get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP)); -- cgit v0.10.2