From c63179556af1335585e53cd8a23da40bf69cb2e2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Dec 2015 14:08:11 +0100 Subject: btrfs: make set_extent_bit helpers static inline The funcions just wrap the set_extent_bit API and generate function calls. This increases stack consumption and may negatively affect performance due to icache misses. We can simply make the helpers static inline and keep the type checking and API untouched. The code slightly increases: text data bss dec hex filename 938427 43670 23144 1005241 f56b9 fs/btrfs/btrfs.ko.before 938667 43670 23144 1005481 f57a9 fs/btrfs/btrfs.ko Signed-off-by: David Sterba diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9abe187..11dc81a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1285,20 +1285,6 @@ search_again: } /* wrappers around set/clear extent bit */ -int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL, - NULL, mask); -} - -int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned bits, gfp_t mask) -{ - return set_extent_bit(tree, start, end, bits, NULL, - NULL, mask); -} - int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, gfp_t mask, struct extent_changeset *changeset) @@ -1348,22 +1334,6 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, changeset); } -int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, - struct extent_state **cached_state, gfp_t mask) -{ - return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_UPTODATE, - NULL, cached_state, mask); -} - -int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, - struct extent_state **cached_state, gfp_t mask) -{ - return set_extent_bit(tree, start, end, - EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG, - NULL, cached_state, mask); -} - int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { @@ -1372,20 +1342,6 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask); } -int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, - NULL, mask); -} - -int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, - struct extent_state **cached_state, gfp_t mask) -{ - return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL, - cached_state, mask); -} - int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask) { diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f4c1ae1..4b89ee5 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -229,31 +229,65 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, int wake, int delete, struct extent_state **cached, gfp_t mask); -int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned bits, gfp_t mask); + int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, gfp_t mask, struct extent_changeset *changeset); int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, u64 *failed_start, struct extent_state **cached_state, gfp_t mask); -int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, - struct extent_state **cached_state, gfp_t mask); + +static inline int set_extent_bits(struct extent_io_tree *tree, u64 start, + u64 end, unsigned bits, gfp_t mask) +{ + return set_extent_bit(tree, start, end, bits, NULL, NULL, mask); +} + int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached_state, gfp_t mask); -int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask); -int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask); + +static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start, + u64 end, gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL, + NULL, mask); +} + int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, unsigned clear_bits, struct extent_state **cached_state, gfp_t mask); -int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, - struct extent_state **cached_state, gfp_t mask); -int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, - struct extent_state **cached_state, gfp_t mask); + +static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start, + u64 end, struct extent_state **cached_state, gfp_t mask) +{ + return set_extent_bit(tree, start, end, + EXTENT_DELALLOC | EXTENT_UPTODATE, + NULL, cached_state, mask); +} + +static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start, + u64 end, struct extent_state **cached_state, gfp_t mask) +{ + return set_extent_bit(tree, start, end, + EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG, + NULL, cached_state, mask); +} + +static inline int set_extent_new(struct extent_io_tree *tree, u64 start, + u64 end, gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, NULL, mask); +} + +static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start, + u64 end, struct extent_state **cached_state, gfp_t mask) +{ + return set_extent_bit(tree, start, end, EXTENT_UPTODATE, NULL, + cached_state, mask); +} + int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, unsigned bits, struct extent_state **cached_state); -- cgit v0.10.2 From e83b1d91f872a4cf7bf0d3528044fc9e43260e2b Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Dec 2015 14:08:11 +0100 Subject: btrfs: make clear_extent_bit helpers static inline The funcions just wrap the clear_extent_bit API and generate function calls. This increases stack consumption and may negatively affect performance due to icache misses. We can simply make the helpers static inline and keep the type checking and API untouched. The code slightly decreases: text data bss dec hex filename 938667 43670 23144 1005481 f57a9 fs/btrfs/btrfs.ko.before 939651 43670 23144 1006465 f5b81 fs/btrfs/btrfs.ko.after Signed-off-by: David Sterba diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 11dc81a..3d9935e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1309,17 +1309,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, cached, mask, NULL); } -int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned bits, gfp_t mask) -{ - int wake = 0; - - if (bits & EXTENT_LOCKED) - wake = 1; - - return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask); -} - int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, gfp_t mask, struct extent_changeset *changeset) @@ -1334,21 +1323,6 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, changeset); } -int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask) -{ - return clear_extent_bit(tree, start, end, - EXTENT_DIRTY | EXTENT_DELALLOC | - EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask); -} - -int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, - struct extent_state **cached_state, gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, - cached_state, mask); -} - /* * either insert or lock state struct between start and end use mask to tell * us if waiting is desired. @@ -1394,19 +1368,6 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) return 1; } -int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, - struct extent_state **cached, gfp_t mask) -{ - return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, - mask); -} - -int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) -{ - return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, - GFP_NOFS); -} - int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) { unsigned long index = start >> PAGE_CACHE_SHIFT; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 4b89ee5..07cae0c 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -202,9 +202,6 @@ int try_release_extent_buffer(struct page *page); int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, struct extent_state **cached); -int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end); -int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end, - struct extent_state **cached, gfp_t mask); int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, int mirror_num); @@ -221,8 +218,6 @@ void free_extent_state(struct extent_state *state); int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, int filled, struct extent_state *cached_state); -int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned bits, gfp_t mask); int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, gfp_t mask, struct extent_changeset *changeset); @@ -230,6 +225,30 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, int wake, int delete, struct extent_state **cached, gfp_t mask); +static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) +{ + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL, + GFP_NOFS); +} + +static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start, + u64 end, struct extent_state **cached, gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached, + mask); +} + +static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start, + u64 end, unsigned bits, gfp_t mask) +{ + int wake = 0; + + if (bits & EXTENT_LOCKED) + wake = 1; + + return clear_extent_bit(tree, start, end, bits, wake, 0, NULL, mask); +} + int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, gfp_t mask, struct extent_changeset *changeset); @@ -243,8 +262,12 @@ static inline int set_extent_bits(struct extent_io_tree *tree, u64 start, return set_extent_bit(tree, start, end, bits, NULL, NULL, mask); } -int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, - struct extent_state **cached_state, gfp_t mask); +static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, + u64 end, struct extent_state **cached_state, gfp_t mask) +{ + return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, + cached_state, mask); +} static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) @@ -253,8 +276,14 @@ static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start, NULL, mask); } -int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, - gfp_t mask); +static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start, + u64 end, gfp_t mask) +{ + return clear_extent_bit(tree, start, end, + EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask); +} + int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, unsigned clear_bits, struct extent_state **cached_state, gfp_t mask); -- cgit v0.10.2 From ff13db41f184f8222aca0cb653347ccdd48a057a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Dec 2015 14:30:40 +0100 Subject: btrfs: drop unused parameter from lock_extent_bits We've always passed 0. Stack usage will slightly decrease. Signed-off-by: David Sterba diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 974be09..68ceed7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -362,7 +362,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, } lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, - 0, &cached_state); + &cached_state); if (extent_buffer_uptodate(eb) && btrfs_header_generation(eb) == parent_transid) { ret = 0; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3d9935e..bf98499 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1328,13 +1328,13 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, * us if waiting is desired. */ int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned bits, struct extent_state **cached_state) + struct extent_state **cached_state) { int err; u64 failed_start; while (1) { - err = __set_extent_bit(tree, start, end, EXTENT_LOCKED | bits, + err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, EXTENT_LOCKED, &failed_start, cached_state, GFP_NOFS, NULL); if (err == -EEXIST) { @@ -1349,7 +1349,7 @@ int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int lock_extent(struct extent_io_tree *tree, u64 start, u64 end) { - return lock_extent_bits(tree, start, end, 0, NULL); + return lock_extent_bits(tree, start, end, NULL); } int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) @@ -1717,7 +1717,7 @@ again: BUG_ON(ret); /* Only valid values are 0 and -EAGAIN */ /* step three, lock the state bits for the whole range */ - lock_extent_bits(tree, delalloc_start, delalloc_end, 0, &cached_state); + lock_extent_bits(tree, delalloc_start, delalloc_end, &cached_state); /* then test to make sure it is all still delalloc */ ret = test_range_bit(tree, delalloc_start, delalloc_end, @@ -4243,7 +4243,7 @@ int extent_invalidatepage(struct extent_io_tree *tree, if (start > end) return 0; - lock_extent_bits(tree, start, end, 0, &cached_state); + lock_extent_bits(tree, start, end, &cached_state); wait_on_page_writeback(page); clear_extent_bit(tree, start, end, EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | @@ -4453,7 +4453,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, last_for_get_extent = isize; } - lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, 0, + lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len - 1, &cached_state); em = get_extent_skip_holes(inode, start, last_for_get_extent, diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 07cae0c..e5726a3 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -201,7 +201,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, int try_release_extent_buffer(struct page *page); int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, - unsigned bits, struct extent_state **cached); + struct extent_state **cached); int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, int mirror_num); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 977e715..8a00298 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1384,7 +1384,7 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, if (start_pos < inode->i_size) { struct btrfs_ordered_extent *ordered; lock_extent_bits(&BTRFS_I(inode)->io_tree, - start_pos, last_pos, 0, cached_state); + start_pos, last_pos, cached_state); ordered = btrfs_lookup_ordered_range(inode, start_pos, last_pos - start_pos + 1); if (ordered && @@ -2384,7 +2384,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) truncate_pagecache_range(inode, lockstart, lockend); lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, - 0, &cached_state); + &cached_state); ordered = btrfs_lookup_first_ordered_extent(inode, lockend); /* @@ -2691,7 +2691,7 @@ static long btrfs_fallocate(struct file *file, int mode, * transaction */ lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start, - locked_end, 0, &cached_state); + locked_end, &cached_state); ordered = btrfs_lookup_first_ordered_extent(inode, alloc_end - 1); if (ordered && @@ -2838,7 +2838,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) lockend--; len = lockend - lockstart + 1; - lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0, + lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, &cached_state); while (start < inode->i_size) { diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 85a1f86..b688378 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1261,7 +1261,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, goto out; lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, - 0, &cached_state); + &cached_state); io_ctl_set_generation(io_ctl, trans->transid); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 994490d..827a726 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1989,7 +1989,7 @@ again: page_start = page_offset(page); page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; - lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0, + lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, &cached_state); /* already ordered? We're done */ @@ -2482,7 +2482,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path, lock_start = backref->file_pos; lock_end = backref->file_pos + backref->num_bytes - 1; lock_extent_bits(&BTRFS_I(inode)->io_tree, lock_start, lock_end, - 0, &cached); + &cached); ordered = btrfs_lookup_first_ordered_extent(inode, lock_end); if (ordered) { @@ -2874,7 +2874,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) lock_extent_bits(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, - 0, &cached_state); + &cached_state); ret = test_range_bit(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, @@ -4690,7 +4690,7 @@ again: } wait_on_page_writeback(page); - lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state); + lock_extent_bits(io_tree, page_start, page_end, &cached_state); set_page_extent_mapped(page); ordered = btrfs_lookup_ordered_extent(inode, page_start); @@ -4821,7 +4821,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) while (1) { struct btrfs_ordered_extent *ordered; - lock_extent_bits(io_tree, hole_start, block_end - 1, 0, + lock_extent_bits(io_tree, hole_start, block_end - 1, &cached_state); ordered = btrfs_lookup_ordered_range(inode, hole_start, block_end - hole_start); @@ -5133,7 +5133,7 @@ static void evict_inode_truncate_pages(struct inode *inode) end = state->end; spin_unlock(&io_tree->lock); - lock_extent_bits(io_tree, start, end, 0, &cached_state); + lock_extent_bits(io_tree, start, end, &cached_state); /* * If still has DELALLOC flag, the extent didn't reach disk, @@ -7402,7 +7402,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, while (1) { lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, - 0, cached_state); + cached_state); /* * We're concerned with the entire range that we're going to be * doing DIO to, so we need to make sure theres no ordered @@ -8636,7 +8636,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, } if (!inode_evicting) - lock_extent_bits(tree, page_start, page_end, 0, &cached_state); + lock_extent_bits(tree, page_start, page_end, &cached_state); ordered = btrfs_lookup_ordered_extent(inode, page_start); if (ordered) { /* @@ -8674,7 +8674,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, btrfs_put_ordered_extent(ordered); if (!inode_evicting) { cached_state = NULL; - lock_extent_bits(tree, page_start, page_end, 0, + lock_extent_bits(tree, page_start, page_end, &cached_state); } } @@ -8772,7 +8772,7 @@ again: } wait_on_page_writeback(page); - lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state); + lock_extent_bits(io_tree, page_start, page_end, &cached_state); set_page_extent_mapped(page); /* diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index da94138..1643206 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -992,7 +992,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) u64 end = start + len - 1; /* get the big lock and read metadata off disk */ - lock_extent_bits(io_tree, start, end, 0, &cached); + lock_extent_bits(io_tree, start, end, &cached); em = btrfs_get_extent(inode, NULL, 0, start, len, 0); unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS); @@ -1140,7 +1140,7 @@ again: page_end = page_start + PAGE_CACHE_SIZE - 1; while (1) { lock_extent_bits(tree, page_start, page_end, - 0, &cached_state); + &cached_state); ordered = btrfs_lookup_ordered_extent(inode, page_start); unlock_extent_cached(tree, page_start, page_end, @@ -1200,7 +1200,7 @@ again: page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE; lock_extent_bits(&BTRFS_I(inode)->io_tree, - page_start, page_end - 1, 0, &cached_state); + page_start, page_end - 1, &cached_state); clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2907a77..23f396a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -4161,7 +4161,7 @@ static int check_extent_to_block(struct inode *inode, u64 start, u64 len, io_tree = &BTRFS_I(inode)->io_tree; - lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state); + lock_extent_bits(io_tree, lockstart, lockend, &cached_state); ordered = btrfs_lookup_ordered_range(inode, lockstart, len); if (ordered) { btrfs_put_ordered_extent(ordered); -- cgit v0.10.2 From cd716d8fea125e5531003e66aaf7ca7323277f83 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Dec 2015 14:41:30 +0100 Subject: btrfs: make lock_extent static inline One call less reduces stack usage, code slightly reduced as well. Signed-off-by: David Sterba diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index bf98499..044ffa3 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1347,11 +1347,6 @@ int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, return err; } -int lock_extent(struct extent_io_tree *tree, u64 start, u64 end) -{ - return lock_extent_bits(tree, start, end, NULL); -} - int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end) { int err; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index e5726a3..7cb9fa5 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -199,9 +199,14 @@ int try_release_extent_mapping(struct extent_map_tree *map, struct extent_io_tree *tree, struct page *page, gfp_t mask); int try_release_extent_buffer(struct page *page); -int lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state **cached); + +static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end) +{ + return lock_extent_bits(tree, start, end, NULL); +} + int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end); int extent_read_full_page(struct extent_io_tree *tree, struct page *page, get_extent_t *get_extent, int mirror_num); -- cgit v0.10.2 From 87ad58c5f07444ab2fa083c62b154f38ef0a3035 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 27 Nov 2015 18:49:39 +0100 Subject: btrfs: make btrfs_close_one_device static Signed-off-by: David Sterba diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a6df8fd..e335938 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -125,6 +125,7 @@ static int btrfs_relocate_sys_chunks(struct btrfs_root *root); static void __btrfs_reset_dev_stats(struct btrfs_device *dev); static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev); static void btrfs_dev_stat_print_on_load(struct btrfs_device *device); +static void btrfs_close_one_device(struct btrfs_device *device); DEFINE_MUTEX(uuid_mutex); static LIST_HEAD(fs_uuids); @@ -6951,7 +6952,7 @@ void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info) } } -void btrfs_close_one_device(struct btrfs_device *device) +static void btrfs_close_one_device(struct btrfs_device *device) { struct btrfs_fs_devices *fs_devices = device->fs_devices; struct btrfs_device *new_device; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index ec57123..2dc59eb 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -566,6 +566,5 @@ static inline void unlock_chunks(struct btrfs_root *root) struct list_head *btrfs_get_fs_uuids(void); void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); -void btrfs_close_one_device(struct btrfs_device *device); #endif -- cgit v0.10.2 From 651d494a678e48712f784e8f26450c34b5c61015 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 27 Nov 2015 19:24:16 +0100 Subject: btrfs: sink parameter wait to btrfs_alloc_delalloc_work There's only one caller and single value, we can propagate it down to the callee and remove the unused parameter. Signed-off-by: David Sterba diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8c58191..cb54025 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3910,7 +3910,7 @@ struct btrfs_delalloc_work { }; struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, - int wait, int delay_iput); + int delay_iput); void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work); struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 994490d..9898b0d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9457,7 +9457,7 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work) } struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, - int wait, int delay_iput) + int delay_iput) { struct btrfs_delalloc_work *work; @@ -9468,7 +9468,7 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, init_completion(&work->completion); INIT_LIST_HEAD(&work->list); work->inode = inode; - work->wait = wait; + work->wait = 0; work->delay_iput = delay_iput; WARN_ON_ONCE(!inode); btrfs_init_work(&work->work, btrfs_flush_delalloc_helper, @@ -9516,7 +9516,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, } spin_unlock(&root->delalloc_lock); - work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); + work = btrfs_alloc_delalloc_work(inode, delay_iput); if (!work) { if (delay_iput) btrfs_add_delayed_iput(inode); -- cgit v0.10.2 From 3042460136bee7bf48860f16a391e6d75f2d0d5c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 27 Nov 2015 19:27:11 +0100 Subject: btrfs: remove wait from struct btrfs_delalloc_work The value is 0 and never changes, we can propagate the value, remove wait and the implied dead code. Signed-off-by: David Sterba diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index cb54025..a61c53b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3902,7 +3902,6 @@ void btrfs_extent_item_to_extent_map(struct inode *inode, /* inode.c */ struct btrfs_delalloc_work { struct inode *inode; - int wait; int delay_iput; struct completion completion; struct list_head list; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 9898b0d..15b29e8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9440,14 +9440,10 @@ static void btrfs_run_delalloc_work(struct btrfs_work *work) delalloc_work = container_of(work, struct btrfs_delalloc_work, work); inode = delalloc_work->inode; - if (delalloc_work->wait) { - btrfs_wait_ordered_range(inode, 0, (u64)-1); - } else { + filemap_flush(inode->i_mapping); + if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) filemap_flush(inode->i_mapping); - if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, - &BTRFS_I(inode)->runtime_flags)) - filemap_flush(inode->i_mapping); - } if (delalloc_work->delay_iput) btrfs_add_delayed_iput(inode); @@ -9468,7 +9464,6 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, init_completion(&work->completion); INIT_LIST_HEAD(&work->list); work->inode = inode; - work->wait = 0; work->delay_iput = delay_iput; WARN_ON_ONCE(!inode); btrfs_init_work(&work->work, btrfs_flush_delalloc_helper, -- cgit v0.10.2 From 8d2db7855e7b65c9eecc8789fc54d818ee281ac6 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Wed, 4 Nov 2015 15:38:29 +0100 Subject: btrfs: use GFP_KERNEL for allocations in ioctl handlers We don't have to use GFP_NOFS in the ioctl handlers because there's no risk of looping through the allocators back to the filesystem. This patch covers only allocations that are directly in the ioctl handlers. Signed-off-by: David Sterba diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index da94138..2f4a5c2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -4147,7 +4147,7 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) return -ENOMEM; space_args.total_spaces = 0; - dest = kmalloc(alloc_size, GFP_NOFS); + dest = kmalloc(alloc_size, GFP_KERNEL); if (!dest) return -ENOMEM; dest_orig = dest; @@ -4673,7 +4673,7 @@ locked: goto out_bargs; } - bctl = kzalloc(sizeof(*bctl), GFP_NOFS); + bctl = kzalloc(sizeof(*bctl), GFP_KERNEL); if (!bctl) { ret = -ENOMEM; goto out_bargs; @@ -4759,7 +4759,7 @@ static long btrfs_ioctl_balance_progress(struct btrfs_root *root, goto out; } - bargs = kzalloc(sizeof(*bargs), GFP_NOFS); + bargs = kzalloc(sizeof(*bargs), GFP_KERNEL); if (!bargs) { ret = -ENOMEM; goto out; @@ -5019,7 +5019,7 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - qsa = kzalloc(sizeof(*qsa), GFP_NOFS); + qsa = kzalloc(sizeof(*qsa), GFP_KERNEL); if (!qsa) return -ENOMEM; @@ -5149,7 +5149,7 @@ static long btrfs_ioctl_set_received_subvol_32(struct file *file, goto out; } - args64 = kmalloc(sizeof(*args64), GFP_NOFS); + args64 = kmalloc(sizeof(*args64), GFP_KERNEL); if (!args64) { ret = -ENOMEM; goto out; -- cgit v0.10.2 From 61dd5ae65be6dfaeadb0e841ea6639351f0e04ce Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 1 Dec 2015 18:04:30 +0100 Subject: btrfs: use GFP_KERNEL for allocations of workqueues We don't have to use GFP_NOFS to allocate workqueue structures, this is done from mount context or potentially scrub start context, safe to fail in both cases. Signed-off-by: David Sterba diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 3e36e4a..88d9af3 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -97,7 +97,7 @@ static struct __btrfs_workqueue * __btrfs_alloc_workqueue(const char *name, unsigned int flags, int limit_active, int thresh) { - struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); + struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; @@ -148,7 +148,7 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, int limit_active, int thresh) { - struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); + struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return NULL; -- cgit v0.10.2 From 39a27ec1004e886f1d949bdb8f2616896d02c5c2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Dec 2015 12:49:48 +0100 Subject: btrfs: use GFP_KERNEL for xattr and acl allocations We don't have to use GFP_NOFS in context of ACL or XATTR actions, not possible to loop through the allocator and it's safe to fail with ENOMEM. Signed-off-by: David Sterba diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 9a0124a..dbbb8ed5 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -48,7 +48,7 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type) size = __btrfs_getxattr(inode, name, "", 0); if (size > 0) { - value = kzalloc(size, GFP_NOFS); + value = kzalloc(size, GFP_KERNEL); if (!value) return ERR_PTR(-ENOMEM); size = __btrfs_getxattr(inode, name, value, size); @@ -102,7 +102,7 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans, if (acl) { size = posix_acl_xattr_size(acl->a_count); - value = kmalloc(size, GFP_NOFS); + value = kmalloc(size, GFP_KERNEL); if (!value) { ret = -ENOMEM; goto out; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 1fcd7b6..24e8ff7 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -494,7 +494,7 @@ static int btrfs_initxattrs(struct inode *inode, for (xattr = xattr_array; xattr->name != NULL; xattr++) { name = kmalloc(XATTR_SECURITY_PREFIX_LEN + - strlen(xattr->name) + 1, GFP_NOFS); + strlen(xattr->name) + 1, GFP_KERNEL); if (!name) { err = -ENOMEM; break; -- cgit v0.10.2 From 4db8c528cdccb44f8ca237ca3c695d6c164fa70f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Dec 2015 13:06:46 +0100 Subject: btrfs: remove a trivial helper btrfs_set_buffer_uptodate Signed-off-by: David Sterba diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 974be09..166ad08 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3902,11 +3902,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, return !ret; } -int btrfs_set_buffer_uptodate(struct extent_buffer *buf) -{ - return set_extent_buffer_uptodate(buf); -} - void btrfs_mark_buffer_dirty(struct extent_buffer *buf) { struct btrfs_root *root; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index adeb318..7c52e29 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -116,7 +116,6 @@ static inline void btrfs_put_fs_root(struct btrfs_root *root) void btrfs_mark_buffer_dirty(struct extent_buffer *buf); int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, int atomic); -int btrfs_set_buffer_uptodate(struct extent_buffer *buf); int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); u32 btrfs_csum_data(char *data, u32 seed, size_t len); void btrfs_csum_final(u32 crc, char *result); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index acf3ed1..9dfd60a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7831,7 +7831,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, clear_bit(EXTENT_BUFFER_STALE, &buf->bflags); btrfs_set_lock_blocking(buf); - btrfs_set_buffer_uptodate(buf); + set_extent_buffer_uptodate(buf); if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { buf->log_index = root->log_transid % 2; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e335938..83bbca7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6468,11 +6468,11 @@ int btrfs_read_sys_array(struct btrfs_root *root) sb = btrfs_find_create_tree_block(root, BTRFS_SUPER_INFO_OFFSET); if (!sb) return -ENOMEM; - btrfs_set_buffer_uptodate(sb); + set_extent_buffer_uptodate(sb); btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); /* * The sb extent buffer is artifical and just used to read the system array. - * btrfs_set_buffer_uptodate() call does not properly mark all it's + * set_extent_buffer_uptodate() call does not properly mark all it's * pages up-to-date when the page is larger: extent does not cover the * whole page and consequently check_page_uptodate does not find all * the page's extents up-to-date (the hole beyond sb), -- cgit v0.10.2 From 09c25a8cda5baf6537234be0954173a18568423d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Dec 2015 13:08:59 +0100 Subject: btrfs: make set_extent_buffer_uptodate return void Does not return any errors, nor anything from the callgraph. Signed-off-by: David Sterba diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9abe187..47db906 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5243,7 +5243,7 @@ int clear_extent_buffer_uptodate(struct extent_buffer *eb) return 0; } -int set_extent_buffer_uptodate(struct extent_buffer *eb) +void set_extent_buffer_uptodate(struct extent_buffer *eb) { unsigned long i; struct page *page; @@ -5255,7 +5255,6 @@ int set_extent_buffer_uptodate(struct extent_buffer *eb) page = eb->pages[i]; SetPageUptodate(page); } - return 0; } int extent_buffer_uptodate(struct extent_buffer *eb) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index f4c1ae1..013af05 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -330,7 +330,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, unsigned long start, unsigned long len); void clear_extent_buffer_dirty(struct extent_buffer *eb); int set_extent_buffer_dirty(struct extent_buffer *eb); -int set_extent_buffer_uptodate(struct extent_buffer *eb); +void set_extent_buffer_uptodate(struct extent_buffer *eb); int clear_extent_buffer_uptodate(struct extent_buffer *eb); int extent_buffer_uptodate(struct extent_buffer *eb); int extent_buffer_under_io(struct extent_buffer *eb); -- cgit v0.10.2 From 69ba39272c519a34a4a0169b839ca177bf0f0104 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Dec 2015 13:08:59 +0100 Subject: btrfs: make clear_extent_buffer_uptodate return void Does not return any errors, nor anything from the callgraph. Signed-off-by: David Sterba diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 47db906..d5778e8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5227,7 +5227,7 @@ int set_extent_buffer_dirty(struct extent_buffer *eb) return was_dirty; } -int clear_extent_buffer_uptodate(struct extent_buffer *eb) +void clear_extent_buffer_uptodate(struct extent_buffer *eb) { unsigned long i; struct page *page; @@ -5240,7 +5240,6 @@ int clear_extent_buffer_uptodate(struct extent_buffer *eb) if (page) ClearPageUptodate(page); } - return 0; } void set_extent_buffer_uptodate(struct extent_buffer *eb) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 013af05..ad1d28c 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -331,7 +331,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, void clear_extent_buffer_dirty(struct extent_buffer *eb); int set_extent_buffer_dirty(struct extent_buffer *eb); void set_extent_buffer_uptodate(struct extent_buffer *eb); -int clear_extent_buffer_uptodate(struct extent_buffer *eb); +void clear_extent_buffer_uptodate(struct extent_buffer *eb); int extent_buffer_uptodate(struct extent_buffer *eb); int extent_buffer_under_io(struct extent_buffer *eb); int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, -- cgit v0.10.2 From a9d93e1778f3a92852d3816cd203cbec179b8813 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Dec 2015 13:08:59 +0100 Subject: btrfs: make extent_clear_unlock_delalloc return void Does not return any errors, nor anything from the callgraph. Signed-off-by: David Sterba diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d5778e8..b7261ea 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1820,7 +1820,7 @@ out_failed: return found; } -int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, +void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, struct page *locked_page, unsigned clear_bits, unsigned long page_ops) @@ -1835,7 +1835,7 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); if (page_ops == 0) - return 0; + return; if ((page_ops & PAGE_SET_ERROR) && nr_pages > 0) mapping_set_error(inode->i_mapping, -EIO); @@ -1869,7 +1869,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, index += ret; cond_resched(); } - return 0; } /* diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index ad1d28c..2d57166 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -340,7 +340,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long *map_len); int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); -int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, +void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, struct page *locked_page, unsigned bits_to_clear, unsigned long page_ops); -- cgit v0.10.2 From b5227c075b5c11d5cf653bac5c01b9f7f03f2a8f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Dec 2015 13:08:59 +0100 Subject: btrfs: make end_extent_writepage return void Does not return any errors, nor anything from the callgraph. The branch in end_bio_extent_writepage has been skipped since 5fd02043553b ("Btrfs: finish ordered extents in their own thread"). Signed-off-by: David Sterba diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b7261ea..0ea3e99 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2515,7 +2515,7 @@ static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset, /* lots and lots of room for performance fixes in the end_bio funcs */ -int end_extent_writepage(struct page *page, int err, u64 start, u64 end) +void end_extent_writepage(struct page *page, int err, u64 start, u64 end) { int uptodate = (err == 0); struct extent_io_tree *tree; @@ -2536,7 +2536,6 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) ret = ret < 0 ? ret : -EIO; mapping_set_error(page->mapping, ret); } - return 0; } /* @@ -2578,9 +2577,7 @@ static void end_bio_extent_writepage(struct bio *bio) start = page_offset(page); end = start + bvec->bv_offset + bvec->bv_len - 1; - if (end_extent_writepage(page, bio->bi_error, start, end)) - continue; - + end_extent_writepage(page, bio->bi_error, start, end); end_page_writeback(page); } diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2d57166..81f84f1 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -357,7 +357,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical, int mirror_num); int clean_io_failure(struct inode *inode, u64 start, struct page *page, unsigned int pg_offset); -int end_extent_writepage(struct page *page, int err, u64 start, u64 end); +void end_extent_writepage(struct page *page, int err, u64 start, u64 end); int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, int mirror_num); -- cgit v0.10.2 From bd1fa4f0b0ddbed05ef98c28048d00db727c4b0e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Dec 2015 13:08:59 +0100 Subject: btrfs: make extent_range_clear_dirty_for_io return void Does not return any errors, nor anything from the callgraph. There's a BUG_ON but it's a sanity check and not an error condition we could recover from. Signed-off-by: David Sterba diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 0ea3e99..e55a728 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1451,7 +1451,7 @@ int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end) GFP_NOFS); } -int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) +void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) { unsigned long index = start >> PAGE_CACHE_SHIFT; unsigned long end_index = end >> PAGE_CACHE_SHIFT; @@ -1464,7 +1464,6 @@ int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) page_cache_release(page); index++; } - return 0; } int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 81f84f1..b0b2d20 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -338,7 +338,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long min_len, char **map, unsigned long *map_start, unsigned long *map_len); -int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); +void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, struct page *locked_page, -- cgit v0.10.2 From f6311572762a9ccaa533e60bf7929d63be914bde Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Dec 2015 13:08:59 +0100 Subject: btrfs: make extent_range_redirty_for_io return void Does not return any errors, nor anything from the callgraph. There's a BUG_ON but it's a sanity check and not an error condition we could recover from. Signed-off-by: David Sterba diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e55a728..f06800e 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1466,7 +1466,7 @@ void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end) } } -int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) +void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) { unsigned long index = start >> PAGE_CACHE_SHIFT; unsigned long end_index = end >> PAGE_CACHE_SHIFT; @@ -1480,7 +1480,6 @@ int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) page_cache_release(page); index++; } - return 0; } /* diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index b0b2d20..fbc6448 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -339,7 +339,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, unsigned long *map_start, unsigned long *map_len); void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); -int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); +void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end); void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end, struct page *locked_page, unsigned bits_to_clear, -- cgit v0.10.2 From 35de6db28f260e88e9acc47305f7b3f272d76cbf Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 3 Dec 2015 13:08:59 +0100 Subject: btrfs: make set_range_writeback return void Does not return any errors, nor anything from the callgraph. There's a BUG_ON but it's a sanity check and not an error condition we could recover from. Signed-off-by: David Sterba diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f06800e..efe6ac6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1485,7 +1485,7 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end) /* * helper function to set both pages and extents in the tree writeback */ -static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) +static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) { unsigned long index = start >> PAGE_CACHE_SHIFT; unsigned long end_index = end >> PAGE_CACHE_SHIFT; @@ -1498,7 +1498,6 @@ static int set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end) page_cache_release(page); index++; } - return 0; } /* find the first state struct with 'bits' set after 'start', and -- cgit v0.10.2 From 50460e37186a2b932eacea24dca804bd1bcd2012 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 20 Nov 2015 10:42:47 +0000 Subject: Btrfs: fix race when finishing dev replace leading to transaction abort During the final phase of a device replace operation, I ran into a transaction abort that resulted in the following trace: [23919.655368] WARNING: CPU: 10 PID: 30175 at fs/btrfs/extent-tree.c:9843 btrfs_create_pending_block_groups+0x15e/0x1ab [btrfs]() [23919.664742] BTRFS: Transaction aborted (error -2) [23919.665749] Modules linked in: btrfs crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse parport_pc i2c_piix4 parport psmouse acpi_cpufreq processor i2c_core evdev microcode pcspkr button serio_raw ext4 crc16 jbd2 mbcache sd_mod sg sr_mod cdrom virtio_scsi ata_generic ata_piix virtio_pci floppy virtio_ring libata e1000 virtio scsi_mod [last unloaded: btrfs] [23919.679442] CPU: 10 PID: 30175 Comm: fsstress Not tainted 4.3.0-rc5-btrfs-next-17+ #1 [23919.682392] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [23919.689151] 0000000000000000 ffff8804020cbb50 ffffffff812566f4 ffff8804020cbb98 [23919.692604] ffff8804020cbb88 ffffffff8104d0a6 ffffffffa03eea69 ffff88041b678a48 [23919.694230] ffff88042ac38000 ffff88041b678930 00000000fffffffe ffff8804020cbbf0 [23919.696716] Call Trace: [23919.698669] [] dump_stack+0x4e/0x79 [23919.700597] [] warn_slowpath_common+0x9f/0xb8 [23919.701958] [] ? btrfs_create_pending_block_groups+0x15e/0x1ab [btrfs] [23919.703612] [] warn_slowpath_fmt+0x48/0x50 [23919.705047] [] btrfs_create_pending_block_groups+0x15e/0x1ab [btrfs] [23919.706967] [] __btrfs_end_transaction+0x84/0x2dd [btrfs] [23919.708611] [] btrfs_end_transaction+0x10/0x12 [btrfs] [23919.710099] [] btrfs_alloc_data_chunk_ondemand+0x121/0x28b [btrfs] [23919.711970] [] btrfs_fallocate+0x7d3/0xc6d [btrfs] [23919.713602] [] ? lock_acquire+0x10d/0x194 [23919.714756] [] ? percpu_down_read+0x51/0x78 [23919.716155] [] ? __sb_start_write+0x5f/0xb0 [23919.718918] [] ? __sb_start_write+0x5f/0xb0 [23919.724170] [] vfs_fallocate+0x170/0x1ff [23919.725482] [] ioctl_preallocate+0x89/0x9b [23919.726790] [] do_vfs_ioctl+0x406/0x4e6 [23919.728428] [] ? SYSC_newfstat+0x25/0x2e [23919.729642] [] ? __fget_light+0x4d/0x71 [23919.730782] [] SyS_ioctl+0x57/0x79 [23919.731847] [] entry_SYSCALL_64_fastpath+0x12/0x6f [23919.733330] ---[ end trace 166ef301a335832a ]--- This is due to a race between device replace and chunk allocation, which the following diagram illustrates: CPU 1 CPU 2 btrfs_dev_replace_finishing() at this point dev_replace->tgtdev->devid == BTRFS_DEV_REPLACE_DEVID (0ULL) ... btrfs_start_transaction() btrfs_commit_transaction() btrfs_fallocate() btrfs_alloc_data_chunk_ondemand() btrfs_join_transaction() --> starts a new transaction do_chunk_alloc() lock fs_info->chunk_mutex btrfs_alloc_chunk() --> creates extent map for the new chunk with em->bdev->map->stripes[i]->dev->devid == X (X > 0) --> extent map is added to fs_info->mapping_tree --> initial phase of bg A allocation completes unlock fs_info->chunk_mutex lock fs_info->chunk_mutex btrfs_dev_replace_update_device_in_mapping_tree() --> iterates fs_info->mapping_tree and replaces the device in every extent map's map->stripes[] with dev_replace->tgtdev, which still has an id of 0ULL (BTRFS_DEV_REPLACE_DEVID) btrfs_end_transaction() btrfs_create_pending_block_groups() --> starts final phase of bg A creation (update device, extent, and chunk trees, etc) btrfs_finish_chunk_alloc() btrfs_update_device() --> attempts to update a device item with ID == 0ULL (BTRFS_DEV_REPLACE_DEVID) which is the current ID of bg A's em->bdev->map->stripes[i]->dev->devid --> doesn't find such item returns -ENOENT --> the device id should have been X and not 0ULL got -ENOENT from btrfs_finish_chunk_alloc() and aborts current transaction finishes setting up the target device, namely it sets tgtdev->devid to the value of srcdev->devid, which is X (and X > 0) frees the srcdev unlock fs_info->chunk_mutex So fix this by taking the device list mutex when processing the chunk's extent map stripes to update the device items. This avoids getting the wrong device id and use-after-free problems if the task finishing a chunk allocation grabs the replaced device, which is freed while the dev replace task is holding the device list mutex. This happened while running fstest btrfs/071. Signed-off-by: Filipe Manana Reviewed-by: Liu Bo diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 86124cd..f5e5e20 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4824,20 +4824,32 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, goto out; } + /* + * Take the device list mutex to prevent races with the final phase of + * a device replace operation that replaces the device object associated + * with the map's stripes, because the device object's id can change + * at any time during that final phase of the device replace operation + * (dev-replace.c:btrfs_dev_replace_finishing()). + */ + mutex_lock(&chunk_root->fs_info->fs_devices->device_list_mutex); for (i = 0; i < map->num_stripes; i++) { device = map->stripes[i].dev; dev_offset = map->stripes[i].physical; ret = btrfs_update_device(trans, device); if (ret) - goto out; + break; ret = btrfs_alloc_dev_extent(trans, device, chunk_root->root_key.objectid, BTRFS_FIRST_CHUNK_TREE_OBJECTID, chunk_offset, dev_offset, stripe_size); if (ret) - goto out; + break; + } + if (ret) { + mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex); + goto out; } stripe = &chunk->stripe; @@ -4850,6 +4862,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE); stripe++; } + mutex_unlock(&chunk_root->fs_info->fs_devices->device_list_mutex); btrfs_set_stack_chunk_length(chunk, chunk_size); btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid); -- cgit v0.10.2 From 7785a663c4beebdafeb300caf2818e7e6474abd1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 27 Nov 2015 16:12:00 +0000 Subject: Btrfs: fix memory leaks after transaction is aborted When a transaction is aborted, or its commit fails before writing the new superblock and calling btrfs_finish_extent_commit(), we leak reference counts on the block groups attached to the transaction's delete_bgs list, because btrfs_finish_extent_commit() is never called for those two cases. Fix this by dropping their references at btrfs_put_transaction(), which is called when transactions are aborted (by making the transaction kthread commit the transaction) or if their commits fail. Signed-off-by: Filipe Manana diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index be8eae8..f85ccf6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -75,6 +75,23 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction) list_del_init(&em->list); free_extent_map(em); } + /* + * If any block groups are found in ->deleted_bgs then it's + * because the transaction was aborted and a commit did not + * happen (things failed before writing the new superblock + * and calling btrfs_finish_extent_commit()), so we can not + * discard the physical locations of the block groups. + */ + while (!list_empty(&transaction->deleted_bgs)) { + struct btrfs_block_group_cache *cache; + + cache = list_first_entry(&transaction->deleted_bgs, + struct btrfs_block_group_cache, + bg_list); + list_del_init(&cache->bg_list); + btrfs_put_block_group_trimming(cache); + btrfs_put_block_group(cache); + } kmem_cache_free(btrfs_transaction_cachep, transaction); } } -- cgit v0.10.2 From 14543774bd67a64f616431e5c9d1472f58979841 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 24 Nov 2015 16:23:54 +0000 Subject: Btrfs: fix error path when failing to submit bio for direct IO write Commit 61de718fceb6 ("Btrfs: fix memory corruption on failure to submit bio for direct IO") fixed problems with the error handling code after we fail to submit a bio for direct IO. However there were 2 problems that it did not address when the failure is due to memory allocation failures for direct IO writes: 1) We considered that there could be only one ordered extent for the whole IO range, which is not always true, as we can have multiple; 2) It did not set the bit BTRFS_ORDERED_IO_DONE in the ordered extent, which can make other tasks running btrfs_wait_logged_extents() hang forever, since they wait for that bit to be set. The general assumption is that regardless of an error, the BTRFS_ORDERED_IO_DONE is always set and it precedes setting the bit BTRFS_ORDERED_COMPLETE. Fix these issues by moving part of the btrfs_endio_direct_write() handler into a new helper function and having that new helper function called when we fail to allocate memory to submit the bio (and its private object) for a direct IO write. Signed-off-by: Filipe Manana Reviewed-by: Liu Bo diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f82d1f4..66106b4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7995,22 +7995,22 @@ static void btrfs_endio_direct_read(struct bio *bio) bio_put(bio); } -static void btrfs_endio_direct_write(struct bio *bio) +static void btrfs_endio_direct_write_update_ordered(struct inode *inode, + const u64 offset, + const u64 bytes, + const int uptodate) { - struct btrfs_dio_private *dip = bio->bi_private; - struct inode *inode = dip->inode; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ordered_extent *ordered = NULL; - u64 ordered_offset = dip->logical_offset; - u64 ordered_bytes = dip->bytes; - struct bio *dio_bio; + u64 ordered_offset = offset; + u64 ordered_bytes = bytes; int ret; again: ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, &ordered_offset, ordered_bytes, - !bio->bi_error); + uptodate); if (!ret) goto out_test; @@ -8023,13 +8023,22 @@ out_test: * our bio might span multiple ordered extents. If we haven't * completed the accounting for the whole dio, go back and try again */ - if (ordered_offset < dip->logical_offset + dip->bytes) { - ordered_bytes = dip->logical_offset + dip->bytes - - ordered_offset; + if (ordered_offset < offset + bytes) { + ordered_bytes = offset + bytes - ordered_offset; ordered = NULL; goto again; } - dio_bio = dip->dio_bio; +} + +static void btrfs_endio_direct_write(struct bio *bio) +{ + struct btrfs_dio_private *dip = bio->bi_private; + struct bio *dio_bio = dip->dio_bio; + + btrfs_endio_direct_write_update_ordered(dip->inode, + dip->logical_offset, + dip->bytes, + !bio->bi_error); kfree(dip); @@ -8365,24 +8374,15 @@ free_ordered: dip = NULL; io_bio = NULL; } else { - if (write) { - struct btrfs_ordered_extent *ordered; - - ordered = btrfs_lookup_ordered_extent(inode, - file_offset); - set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); - /* - * Decrements our ref on the ordered extent and removes - * the ordered extent from the inode's ordered tree, - * doing all the proper resource cleanup such as for the - * reserved space and waking up any waiters for this - * ordered extent (through btrfs_remove_ordered_extent). - */ - btrfs_finish_ordered_io(ordered); - } else { + if (write) + btrfs_endio_direct_write_update_ordered(inode, + file_offset, + dio_bio->bi_iter.bi_size, + 0); + else unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, file_offset + dio_bio->bi_iter.bi_size - 1); - } + dio_bio->bi_error = -EIO; /* * Releases and cleans up our dio_bio, no need to bio_put() -- cgit v0.10.2 From b850ae14278dfc49c3a03b39357214fc79330db9 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 8 Dec 2015 16:23:16 +0000 Subject: Btrfs: fix deadlock between direct IO write and defrag/readpages If readpages() (triggered by defrag or buffered reads) is called while a direct IO write is in progress, we have a small time window where we can deadlock, resulting in traces like the following being generated: [84723.212993] INFO: task fio:2849 blocked for more than 120 seconds. [84723.214310] Tainted: G W 4.3.0-rc5-btrfs-next-17+ #1 [84723.215640] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [84723.217313] fio D ffff88023ec75218 0 2849 2835 0x00000000 [84723.218778] ffff880122dfb6e8 0000000000000092 0000000000000000 ffff88023ec75200 [84723.220458] ffff88000e05d2c0 ffff880122dfc000 ffff88023ec75200 7fffffffffffffff [84723.230597] 0000000000000002 ffffffff8147891a ffff880122dfb700 ffffffff8147856a [84723.232085] Call Trace: [84723.232625] [] ? bit_wait+0x3c/0x3c [84723.233529] [] schedule+0x7d/0x95 [84723.234398] [] schedule_timeout+0x43/0x10b [84723.235384] [] ? time_hardirqs_on+0x15/0x28 [84723.236426] [] ? trace_hardirqs_on+0xd/0xf [84723.237502] [] ? read_seqcount_begin.constprop.20+0x57/0x6d [84723.238807] [] ? trace_hardirqs_on_caller+0x16/0x1ab [84723.242012] [] ? trace_hardirqs_on+0xd/0xf [84723.243064] [] ? timekeeping_get_ns+0xe/0x33 [84723.244116] [] ? ktime_get+0x41/0x52 [84723.245029] [] io_schedule_timeout+0xb7/0x12b [84723.245942] [] ? io_schedule_timeout+0xb7/0x12b [84723.246596] [] bit_wait_io+0x39/0x45 [84723.247503] [] __wait_on_bit_lock+0x49/0x8d [84723.248540] [] __lock_page+0x66/0x68 [84723.249558] [] ? autoremove_wake_function+0x3a/0x3a [84723.250844] [] lock_page+0x2c/0x2f [84723.251871] [] invalidate_inode_pages2_range+0xf5/0x2aa [84723.253274] [] ? filemap_fdatawait_range+0x12d/0x146 [84723.254757] [] ? filemap_fdatawrite_range+0x13/0x15 [84723.256378] [] btrfs_get_blocks_direct+0x1b0/0x664 [btrfs] [84723.258556] [] ? submit_page_section+0x7b/0x111 [84723.260064] [] do_blockdev_direct_IO+0x658/0xbdb [84723.261479] [] ? btrfs_page_exists_in_range+0x1a9/0x1a9 [btrfs] [84723.262961] [] ? btrfs_writepage_start_hook+0xce/0xce [btrfs] [84723.264449] [] __blockdev_direct_IO+0x31/0x33 [84723.265614] [] ? __blockdev_direct_IO+0x31/0x33 [84723.266769] [] ? btrfs_writepage_start_hook+0xce/0xce [btrfs] [84723.268264] [] btrfs_direct_IO+0x1b9/0x259 [btrfs] [84723.270954] [] ? btrfs_writepage_start_hook+0xce/0xce [btrfs] [84723.272465] [] generic_file_direct_write+0xb3/0x128 [84723.273734] [] btrfs_file_write_iter+0x228/0x404 [btrfs] [84723.275101] [] __vfs_write+0x7c/0xa5 [84723.276200] [] vfs_write+0xa0/0xe4 [84723.277298] [] SyS_write+0x50/0x7e [84723.278327] [] entry_SYSCALL_64_fastpath+0x12/0x6f [84723.279595] INFO: lockdep is turned off. [84723.379035] INFO: task btrfs:2923 blocked for more than 120 seconds. [84723.380323] Tainted: G W 4.3.0-rc5-btrfs-next-17+ #1 [84723.381608] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [84723.383003] btrfs D ffff88023ed75218 0 2923 2859 0x00000000 [84723.384277] ffff88001311f860 0000000000000082 ffff88001311f840 ffff88023ed75200 [84723.385748] ffff88012c6751c0 ffff880013120000 ffff88012042fe68 ffff88012042fe30 [84723.387152] ffff880221571c88 0000000000000001 ffff88001311f878 ffffffff8147856a [84723.388620] Call Trace: [84723.389105] [] schedule+0x7d/0x95 [84723.391882] [] btrfs_start_ordered_extent+0x161/0x1fa [btrfs] [84723.393718] [] ? signal_pending_state+0x31/0x31 [84723.395659] [] __do_contiguous_readpages.constprop.21+0x81/0xdc [btrfs] [84723.397383] [] ? btrfs_submit_direct+0x3f0/0x3f0 [btrfs] [84723.398852] [] __extent_readpages.constprop.20+0xed/0x100 [btrfs] [84723.400561] [] ? __lru_cache_add+0x5d/0x72 [84723.401787] [] extent_readpages+0x111/0x1a7 [btrfs] [84723.403121] [] ? btrfs_submit_direct+0x3f0/0x3f0 [btrfs] [84723.404583] [] btrfs_readpages+0x1f/0x21 [btrfs] [84723.406007] [] __do_page_cache_readahead+0x168/0x1f4 [84723.407502] [] ondemand_readahead+0x21d/0x22e [84723.408937] [] ? ondemand_readahead+0x21d/0x22e [84723.410487] [] page_cache_sync_readahead+0x3d/0x3f [84723.411710] [] btrfs_defrag_file+0x419/0xaaf [btrfs] [84723.413007] [] ? kzalloc+0xf/0x11 [btrfs] [84723.414085] [] btrfs_ioctl_defrag+0x125/0x14e [btrfs] [84723.415307] [] btrfs_ioctl+0x746/0x24c6 [btrfs] [84723.416532] [] ? arch_local_irq_save+0x9/0xc [84723.417731] [] ? __might_fault+0x4c/0xa7 [84723.418699] [] ? __might_fault+0x4c/0xa7 [84723.421532] [] ? __might_fault+0xa5/0xa7 [84723.422629] [] ? cp_new_stat+0x15d/0x174 [84723.423712] [] do_vfs_ioctl+0x427/0x4e6 [84723.424801] [] ? SYSC_newfstat+0x25/0x2e [84723.425968] [] ? __fget_light+0x4d/0x71 [84723.427063] [] SyS_ioctl+0x57/0x79 [84723.428138] [] entry_SYSCALL_64_fastpath+0x12/0x6f Consider the following logical and physical file layout: logical: ... [ prealloc extent A ] [ prealloc extent B ] [ extent C ] ... 4K 8K 16K physical: ... 12853248 12857344 1103101952 ... (= 12853248 + 4K) Extents A and B are physically adjacent. The following diagram shows a sequence of events that lead to the deadlock when we attempt to do a direct IO write against the file range [4K, 16K[ and a defrag is triggered simultaneously. CPU 1 CPU 2 btrfs_direct_IO() btrfs_get_blocks_direct() creates ordered extent A, covering the 4k prealloc extent A (range [4K, 8K[) btrfs_defrag_file() page_cache_sync_readahead([0K, 1M[) btrfs_readpages() extent_readpages() locks all pages in the file range [0K, 128K[ through calls to add_to_page_cache_lru() __do_contiguous_readpages() finds ordered extent A waits for it to complete btrfs_get_blocks_direct() called again lock_extent_direct(range [8K, 16K[) finds a page in range [8K, 16K[ through btrfs_page_exists_in_range() invalidate_inode_pages2_range([8K, 16K[) --> tries to lock pages that are already locked by the task at CPU 2 --> our task, running __blockdev_direct_IO(), hangs waiting to lock the pages and the submit bio callback, btrfs_submit_direct(), ends up never being called, resulting in the ordered extent A never completing (because a corresponding bio is never submitted) and CPU 2 will wait for it forever while holding the pages locked ---> deadlock! Fix this by removing the page invalidation approach when attempting to lock the range for IO from the callback btrfs_get_blocks_direct() and falling back buffered IO. This was a rare case anyway and well behaved applications do not mix concurrent direct IO writes with buffered reads anyway, being a concurrent defrag the only normal case that could lead to the deadlock. Signed-off-by: Filipe Manana diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 66106b4..269da82 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7411,25 +7411,21 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, btrfs_start_ordered_extent(inode, ordered, 1); btrfs_put_ordered_extent(ordered); } else { - /* Screw you mmap */ - ret = btrfs_fdatawrite_range(inode, lockstart, lockend); - if (ret) - break; - ret = filemap_fdatawait_range(inode->i_mapping, - lockstart, - lockend); - if (ret) - break; - /* - * If we found a page that couldn't be invalidated just - * fall back to buffered. + * We could trigger writeback for this range (and wait + * for it to complete) and then invalidate the pages for + * this range (through invalidate_inode_pages2_range()), + * but that can lead us to a deadlock with a concurrent + * call to readpages() (a buffered read or a defrag call + * triggered a readahead) on a page lock due to an + * ordered dio extent we created before but did not have + * yet a corresponding bio submitted (whence it can not + * complete), which makes readpages() wait for that + * ordered extent to complete while holding a lock on + * that page. */ - ret = invalidate_inode_pages2_range(inode->i_mapping, - lockstart >> PAGE_CACHE_SHIFT, - lockend >> PAGE_CACHE_SHIFT); - if (ret) - break; + ret = -ENOTBLK; + break; } cond_resched(); -- cgit v0.10.2 From f28a492878170f39002660a26c329201cf678d74 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 8 Dec 2015 19:23:20 +0000 Subject: Btrfs: fix leaking of ordered extents after direct IO write error When doing a direct IO write, __blockdev_direct_IO() can call the btrfs_get_blocks_direct() callback one or more times before it calls the btrfs_submit_direct() callback. However it can fail after calling the first callback and before calling the second callback, which is a problem because the first one creates ordered extents and the second one is the one that submits bios that cover the ordered extents created by the first one. That means the ordered extents will never complete nor have any of the flags BTRFS_ORDERED_IO_DONE / BTRFS_ORDERED_IOERR set, resulting in subsequent operations (such as other direct IO writes, buffered writes or hole punching) that lock the same IO range and lookup for ordered extents in the range to hang forever waiting for those ordered extents because they can not complete ever, since no bio was submitted. Fix this by tracking a range of created ordered extents that don't have yet corresponding bios submitted and completing the ordered extents in the range if __blockdev_direct_IO() fails with an error. Signed-off-by: Filipe Manana diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 269da82..1436799 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -66,6 +66,13 @@ struct btrfs_iget_args { struct btrfs_root *root; }; +struct btrfs_dio_data { + u64 outstanding_extents; + u64 reserve; + u64 unsubmitted_oe_range_start; + u64 unsubmitted_oe_range_end; +}; + static const struct inode_operations btrfs_dir_inode_operations; static const struct inode_operations btrfs_symlink_inode_operations; static const struct inode_operations btrfs_dir_ro_inode_operations; @@ -7481,11 +7488,6 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, return em; } -struct btrfs_dio_data { - u64 outstanding_extents; - u64 reserve; -}; - static void adjust_dio_outstanding_extents(struct inode *inode, struct btrfs_dio_data *dio_data, const u64 len) @@ -7669,6 +7671,7 @@ unlock: btrfs_free_reserved_data_space(inode, start, len); WARN_ON(dio_data->reserve < len); dio_data->reserve -= len; + dio_data->unsubmitted_oe_range_end = start + len; current->journal_info = dio_data; } @@ -8342,6 +8345,21 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, dip->subio_endio = btrfs_subio_endio_read; } + /* + * Reset the range for unsubmitted ordered extents (to a 0 length range) + * even if we fail to submit a bio, because in such case we do the + * corresponding error handling below and it must not be done a second + * time by btrfs_direct_IO(). + */ + if (write) { + struct btrfs_dio_data *dio_data = current->journal_info; + + dio_data->unsubmitted_oe_range_end = dip->logical_offset + + dip->bytes; + dio_data->unsubmitted_oe_range_start = + dio_data->unsubmitted_oe_range_end; + } + ret = btrfs_submit_direct_hook(rw, dip, skip_sum); if (!ret) return; @@ -8478,6 +8496,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, * originally calculated. Abuse current->journal_info for this. */ dio_data.reserve = round_up(count, root->sectorsize); + dio_data.unsubmitted_oe_range_start = (u64)offset; + dio_data.unsubmitted_oe_range_end = (u64)offset; current->journal_info = &dio_data; } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags)) { @@ -8496,6 +8516,19 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (dio_data.reserve) btrfs_delalloc_release_space(inode, offset, dio_data.reserve); + /* + * On error we might have left some ordered extents + * without submitting corresponding bios for them, so + * cleanup them up to avoid other tasks getting them + * and waiting for them to complete forever. + */ + if (dio_data.unsubmitted_oe_range_start < + dio_data.unsubmitted_oe_range_end) + btrfs_endio_direct_write_update_ordered(inode, + dio_data.unsubmitted_oe_range_start, + dio_data.unsubmitted_oe_range_end - + dio_data.unsubmitted_oe_range_start, + 0); } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, offset, count - (size_t)ret); -- cgit v0.10.2 From 3e1e8bb770dba29645b302c5499ffcb8e3906712 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 29 Sep 2015 20:50:30 -0700 Subject: Btrfs: add extent buffer bitmap operations These are going to be used for the free space tree bitmap items. Signed-off-by: Omar Sandoval Signed-off-by: Chris Mason diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3915c94..324d382 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -5527,6 +5527,155 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, } } +/* + * The extent buffer bitmap operations are done with byte granularity because + * bitmap items are not guaranteed to be aligned to a word and therefore a + * single word in a bitmap may straddle two pages in the extent buffer. + */ +#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE) +#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1) +#define BITMAP_FIRST_BYTE_MASK(start) \ + ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK) +#define BITMAP_LAST_BYTE_MASK(nbits) \ + (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1))) + +/* + * eb_bitmap_offset() - calculate the page and offset of the byte containing the + * given bit number + * @eb: the extent buffer + * @start: offset of the bitmap item in the extent buffer + * @nr: bit number + * @page_index: return index of the page in the extent buffer that contains the + * given bit number + * @page_offset: return offset into the page given by page_index + * + * This helper hides the ugliness of finding the byte in an extent buffer which + * contains a given bit. + */ +static inline void eb_bitmap_offset(struct extent_buffer *eb, + unsigned long start, unsigned long nr, + unsigned long *page_index, + size_t *page_offset) +{ + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + size_t byte_offset = BIT_BYTE(nr); + size_t offset; + + /* + * The byte we want is the offset of the extent buffer + the offset of + * the bitmap item in the extent buffer + the offset of the byte in the + * bitmap item. + */ + offset = start_offset + start + byte_offset; + + *page_index = offset >> PAGE_CACHE_SHIFT; + *page_offset = offset & (PAGE_CACHE_SIZE - 1); +} + +/** + * extent_buffer_test_bit - determine whether a bit in a bitmap item is set + * @eb: the extent buffer + * @start: offset of the bitmap item in the extent buffer + * @nr: bit number to test + */ +int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, + unsigned long nr) +{ + char *kaddr; + struct page *page; + unsigned long i; + size_t offset; + + eb_bitmap_offset(eb, start, nr, &i, &offset); + page = eb->pages[i]; + WARN_ON(!PageUptodate(page)); + kaddr = page_address(page); + return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1))); +} + +/** + * extent_buffer_bitmap_set - set an area of a bitmap + * @eb: the extent buffer + * @start: offset of the bitmap item in the extent buffer + * @pos: bit number of the first bit + * @len: number of bits to set + */ +void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, + unsigned long pos, unsigned long len) +{ + char *kaddr; + struct page *page; + unsigned long i; + size_t offset; + const unsigned int size = pos + len; + int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE); + unsigned int mask_to_set = BITMAP_FIRST_BYTE_MASK(pos); + + eb_bitmap_offset(eb, start, pos, &i, &offset); + page = eb->pages[i]; + WARN_ON(!PageUptodate(page)); + kaddr = page_address(page); + + while (len >= bits_to_set) { + kaddr[offset] |= mask_to_set; + len -= bits_to_set; + bits_to_set = BITS_PER_BYTE; + mask_to_set = ~0U; + if (++offset >= PAGE_CACHE_SIZE && len > 0) { + offset = 0; + page = eb->pages[++i]; + WARN_ON(!PageUptodate(page)); + kaddr = page_address(page); + } + } + if (len) { + mask_to_set &= BITMAP_LAST_BYTE_MASK(size); + kaddr[offset] |= mask_to_set; + } +} + + +/** + * extent_buffer_bitmap_clear - clear an area of a bitmap + * @eb: the extent buffer + * @start: offset of the bitmap item in the extent buffer + * @pos: bit number of the first bit + * @len: number of bits to clear + */ +void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start, + unsigned long pos, unsigned long len) +{ + char *kaddr; + struct page *page; + unsigned long i; + size_t offset; + const unsigned int size = pos + len; + int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE); + unsigned int mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos); + + eb_bitmap_offset(eb, start, pos, &i, &offset); + page = eb->pages[i]; + WARN_ON(!PageUptodate(page)); + kaddr = page_address(page); + + while (len >= bits_to_clear) { + kaddr[offset] &= ~mask_to_clear; + len -= bits_to_clear; + bits_to_clear = BITS_PER_BYTE; + mask_to_clear = ~0U; + if (++offset >= PAGE_CACHE_SIZE && len > 0) { + offset = 0; + page = eb->pages[++i]; + WARN_ON(!PageUptodate(page)); + kaddr = page_address(page); + } + } + if (len) { + mask_to_clear &= BITMAP_LAST_BYTE_MASK(size); + kaddr[offset] &= ~mask_to_clear; + } +} + static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) { unsigned long distance = (src > dst) ? src - dst : dst - src; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c668f36..9185a20 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -309,6 +309,12 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, unsigned long src_offset, unsigned long len); void memset_extent_buffer(struct extent_buffer *eb, char c, unsigned long start, unsigned long len); +int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start, + unsigned long pos); +void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start, + unsigned long pos, unsigned long len); +void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start, + unsigned long pos, unsigned long len); void clear_extent_buffer_dirty(struct extent_buffer *eb); int set_extent_buffer_dirty(struct extent_buffer *eb); int set_extent_buffer_uptodate(struct extent_buffer *eb); -- cgit v0.10.2 From 0f3312295d3ce1d82392244236a52b3b663480ef Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 29 Sep 2015 20:50:31 -0700 Subject: Btrfs: add extent buffer bitmap sanity tests Sanity test the extent buffer bitmap operations (test, set, and clear) against the equivalent standard kernel operations. Signed-off-by: Omar Sandoval Signed-off-by: Chris Mason diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 324d382..a6eec2d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4730,24 +4730,14 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) return new; } -struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start) +struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, + u64 start, unsigned long len) { struct extent_buffer *eb; - unsigned long len; unsigned long num_pages; unsigned long i; - if (!fs_info) { - /* - * Called only from tests that don't always have a fs_info - * available, but we know that nodesize is 4096 - */ - len = 4096; - } else { - len = fs_info->tree_root->nodesize; - } - num_pages = num_extent_pages(0, len); + num_pages = num_extent_pages(start, len); eb = __alloc_extent_buffer(fs_info, start, len); if (!eb) @@ -4770,6 +4760,24 @@ err: return NULL; } +struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, + u64 start) +{ + unsigned long len; + + if (!fs_info) { + /* + * Called only from tests that don't always have a fs_info + * available, but we know that nodesize is 4096 + */ + len = 4096; + } else { + len = fs_info->tree_root->nodesize; + } + + return __alloc_dummy_extent_buffer(fs_info, start, len); +} + static void check_buffer_tree_ref(struct extent_buffer *eb) { int refs; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 9185a20..9f8d7d1 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -263,8 +263,10 @@ void set_page_extent_mapped(struct page *page); struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start); +struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, + u64 start, unsigned long len); struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, - u64 start); + u64 start); struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src); struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, u64 start); diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 9e9f236..71ab575 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -18,6 +18,7 @@ #include #include +#include #include "btrfs-tests.h" #include "../extent_io.h" @@ -76,6 +77,8 @@ static int test_find_delalloc(void) u64 found; int ret = -EINVAL; + test_msg("Running find delalloc tests\n"); + inode = btrfs_new_test_inode(); if (!inode) { test_msg("Failed to allocate test inode\n"); @@ -268,8 +271,139 @@ out: return ret; } +static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb, + unsigned long len) +{ + unsigned long i, x; + + memset(bitmap, 0, len); + memset_extent_buffer(eb, 0, 0, len); + if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { + test_msg("Bitmap was not zeroed\n"); + return -EINVAL; + } + + bitmap_set(bitmap, 0, len * BITS_PER_BYTE); + extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE); + if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { + test_msg("Setting all bits failed\n"); + return -EINVAL; + } + + bitmap_clear(bitmap, 0, len * BITS_PER_BYTE); + extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE); + if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { + test_msg("Clearing all bits failed\n"); + return -EINVAL; + } + + bitmap_set(bitmap, (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, + sizeof(long) * BITS_PER_BYTE); + extent_buffer_bitmap_set(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0, + sizeof(long) * BITS_PER_BYTE); + if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { + test_msg("Setting straddling pages failed\n"); + return -EINVAL; + } + + bitmap_set(bitmap, 0, len * BITS_PER_BYTE); + bitmap_clear(bitmap, + (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE, + sizeof(long) * BITS_PER_BYTE); + extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE); + extent_buffer_bitmap_clear(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0, + sizeof(long) * BITS_PER_BYTE); + if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) { + test_msg("Clearing straddling pages failed\n"); + return -EINVAL; + } + + /* + * Generate a wonky pseudo-random bit pattern for the sake of not using + * something repetitive that could miss some hypothetical off-by-n bug. + */ + x = 0; + for (i = 0; i < len / sizeof(long); i++) { + x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffUL; + bitmap[i] = x; + } + write_extent_buffer(eb, bitmap, 0, len); + + for (i = 0; i < len * BITS_PER_BYTE; i++) { + int bit, bit1; + + bit = !!test_bit(i, bitmap); + bit1 = !!extent_buffer_test_bit(eb, 0, i); + if (bit1 != bit) { + test_msg("Testing bit pattern failed\n"); + return -EINVAL; + } + + bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE, + i % BITS_PER_BYTE); + if (bit1 != bit) { + test_msg("Testing bit pattern with offset failed\n"); + return -EINVAL; + } + } + + return 0; +} + +static int test_eb_bitmaps(void) +{ + unsigned long len = PAGE_CACHE_SIZE * 4; + unsigned long *bitmap; + struct extent_buffer *eb; + int ret; + + test_msg("Running extent buffer bitmap tests\n"); + + bitmap = kmalloc(len, GFP_NOFS); + if (!bitmap) { + test_msg("Couldn't allocate test bitmap\n"); + return -ENOMEM; + } + + eb = __alloc_dummy_extent_buffer(NULL, 0, len); + if (!eb) { + test_msg("Couldn't allocate test extent buffer\n"); + kfree(bitmap); + return -ENOMEM; + } + + ret = __test_eb_bitmaps(bitmap, eb, len); + if (ret) + goto out; + + /* Do it over again with an extent buffer which isn't page-aligned. */ + free_extent_buffer(eb); + eb = __alloc_dummy_extent_buffer(NULL, PAGE_CACHE_SIZE / 2, len); + if (!eb) { + test_msg("Couldn't allocate test extent buffer\n"); + kfree(bitmap); + return -ENOMEM; + } + + ret = __test_eb_bitmaps(bitmap, eb, len); +out: + free_extent_buffer(eb); + kfree(bitmap); + return ret; +} + int btrfs_test_extent_io(void) { - test_msg("Running find delalloc tests\n"); - return test_find_delalloc(); + int ret; + + test_msg("Running extent I/O tests\n"); + + ret = test_find_delalloc(); + if (ret) + goto out; + + ret = test_eb_bitmaps(); +out: + test_msg("Extent I/O tests finished\n"); + return ret; } -- cgit v0.10.2 From 1abfbcdf56d9485f050149bc4968c1609f9a0773 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 29 Sep 2015 20:50:32 -0700 Subject: Btrfs: add helpers for read-only compat bits We're finally going to add one of these for the free space tree, so let's add the same nice helpers that we have for the incompat bits. While we're add it, also add helpers to clear the bits. Reviewed-by: Josef Bacik Signed-off-by: Omar Sandoval Signed-off-by: Chris Mason diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 938efe3..42c41dc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4100,6 +4100,30 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, } } +#define btrfs_clear_fs_incompat(__fs_info, opt) \ + __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt) + +static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, + u64 flag) +{ + struct btrfs_super_block *disk_super; + u64 features; + + disk_super = fs_info->super_copy; + features = btrfs_super_incompat_flags(disk_super); + if (features & flag) { + spin_lock(&fs_info->super_lock); + features = btrfs_super_incompat_flags(disk_super); + if (features & flag) { + features &= ~flag; + btrfs_set_super_incompat_flags(disk_super, features); + btrfs_info(fs_info, "clearing %llu feature flag", + flag); + } + spin_unlock(&fs_info->super_lock); + } +} + #define btrfs_fs_incompat(fs_info, opt) \ __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt) @@ -4110,6 +4134,64 @@ static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag) return !!(btrfs_super_incompat_flags(disk_super) & flag); } +#define btrfs_set_fs_compat_ro(__fs_info, opt) \ + __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt) + +static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, + u64 flag) +{ + struct btrfs_super_block *disk_super; + u64 features; + + disk_super = fs_info->super_copy; + features = btrfs_super_compat_ro_flags(disk_super); + if (!(features & flag)) { + spin_lock(&fs_info->super_lock); + features = btrfs_super_compat_ro_flags(disk_super); + if (!(features & flag)) { + features |= flag; + btrfs_set_super_compat_ro_flags(disk_super, features); + btrfs_info(fs_info, "setting %llu ro feature flag", + flag); + } + spin_unlock(&fs_info->super_lock); + } +} + +#define btrfs_clear_fs_compat_ro(__fs_info, opt) \ + __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt) + +static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, + u64 flag) +{ + struct btrfs_super_block *disk_super; + u64 features; + + disk_super = fs_info->super_copy; + features = btrfs_super_compat_ro_flags(disk_super); + if (features & flag) { + spin_lock(&fs_info->super_lock); + features = btrfs_super_compat_ro_flags(disk_super); + if (features & flag) { + features &= ~flag; + btrfs_set_super_compat_ro_flags(disk_super, features); + btrfs_info(fs_info, "clearing %llu ro feature flag", + flag); + } + spin_unlock(&fs_info->super_lock); + } +} + +#define btrfs_fs_compat_ro(fs_info, opt) \ + __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt) + +static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag) +{ + struct btrfs_super_block *disk_super; + disk_super = fs_info->super_copy; + return !!(btrfs_super_compat_ro_flags(disk_super) & flag); +} + /* * Call btrfs_abort_transaction as early as possible when an error condition is * detected, that way the exact line number is reported. -- cgit v0.10.2 From 73fa48b674e819098c3bafc47618d0e2868191e5 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 29 Sep 2015 20:50:33 -0700 Subject: Btrfs: refactor caching_thread() We're also going to load the free space tree from caching_thread(), so we should refactor some of the common code. Signed-off-by: Omar Sandoval Signed-off-by: Chris Mason diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 42c41dc..60df67e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1262,6 +1262,9 @@ struct btrfs_caching_control { atomic_t count; }; +/* Once caching_thread() finds this much free space, it will wake up waiters. */ +#define CACHING_CTL_WAKE_UP (1024 * 1024 * 2) + struct btrfs_io_ctl { void *cur, *orig; struct page *page; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 601d7d4..af8ffb5 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -375,11 +375,10 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, return total_added; } -static noinline void caching_thread(struct btrfs_work *work) +static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl) { struct btrfs_block_group_cache *block_group; struct btrfs_fs_info *fs_info; - struct btrfs_caching_control *caching_ctl; struct btrfs_root *extent_root; struct btrfs_path *path; struct extent_buffer *leaf; @@ -387,16 +386,15 @@ static noinline void caching_thread(struct btrfs_work *work) u64 total_found = 0; u64 last = 0; u32 nritems; - int ret = -ENOMEM; + int ret; - caching_ctl = container_of(work, struct btrfs_caching_control, work); block_group = caching_ctl->block_group; fs_info = block_group->fs_info; extent_root = fs_info->extent_root; path = btrfs_alloc_path(); if (!path) - goto out; + return -ENOMEM; last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); @@ -413,15 +411,11 @@ static noinline void caching_thread(struct btrfs_work *work) key.objectid = last; key.offset = 0; key.type = BTRFS_EXTENT_ITEM_KEY; -again: - mutex_lock(&caching_ctl->mutex); - /* need to make sure the commit_root doesn't disappear */ - down_read(&fs_info->commit_root_sem); next: ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); if (ret < 0) - goto err; + goto out; leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); @@ -446,12 +440,14 @@ next: up_read(&fs_info->commit_root_sem); mutex_unlock(&caching_ctl->mutex); cond_resched(); - goto again; + mutex_lock(&caching_ctl->mutex); + down_read(&fs_info->commit_root_sem); + goto next; } ret = btrfs_next_leaf(extent_root, path); if (ret < 0) - goto err; + goto out; if (ret) break; leaf = path->nodes[0]; @@ -489,7 +485,7 @@ next: else last = key.objectid + key.offset; - if (total_found > (1024 * 1024 * 2)) { + if (total_found > CACHING_CTL_WAKE_UP) { total_found = 0; wake_up(&caching_ctl->wait); } @@ -503,25 +499,36 @@ next: block_group->key.offset); caching_ctl->progress = (u64)-1; +out: + btrfs_free_path(path); + return ret; +} + +static noinline void caching_thread(struct btrfs_work *work) +{ + struct btrfs_block_group_cache *block_group; + struct btrfs_fs_info *fs_info; + struct btrfs_caching_control *caching_ctl; + int ret; + + caching_ctl = container_of(work, struct btrfs_caching_control, work); + block_group = caching_ctl->block_group; + fs_info = block_group->fs_info; + + mutex_lock(&caching_ctl->mutex); + down_read(&fs_info->commit_root_sem); + + ret = load_extent_tree_free(caching_ctl); + spin_lock(&block_group->lock); block_group->caching_ctl = NULL; - block_group->cached = BTRFS_CACHE_FINISHED; + block_group->cached = ret ? BTRFS_CACHE_ERROR : BTRFS_CACHE_FINISHED; spin_unlock(&block_group->lock); -err: - btrfs_free_path(path); up_read(&fs_info->commit_root_sem); - - free_excluded_extents(extent_root, block_group); - + free_excluded_extents(fs_info->extent_root, block_group); mutex_unlock(&caching_ctl->mutex); -out: - if (ret) { - spin_lock(&block_group->lock); - block_group->caching_ctl = NULL; - block_group->cached = BTRFS_CACHE_ERROR; - spin_unlock(&block_group->lock); - } + wake_up(&caching_ctl->wait); put_caching_control(caching_ctl); -- cgit v0.10.2 From 208acb8c72d7ace6b672b105502dca0bcb050162 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 29 Sep 2015 20:50:34 -0700 Subject: Btrfs: introduce the free space B-tree on-disk format The on-disk format for the free space tree is straightforward. Each block group is represented in the free space tree by a free space info item that stores accounting information: whether the free space for this block group is stored as bitmaps or extents and how many extents of free space exist for this block group (regardless of which format is being used in the tree). Extents are (start, FREE_SPACE_EXTENT, length) keys with no corresponding item, and bitmaps instead have the FREE_SPACE_BITMAP type and have a bitmap item attached, which is just an array of bytes. Reviewed-by: Josef Bacik Signed-off-by: Omar Sandoval Signed-off-by: Chris Mason diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 60df67e..0e40d32 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -96,6 +96,9 @@ struct btrfs_ordered_sum; /* for storing items that use the BTRFS_UUID_KEY* types */ #define BTRFS_UUID_TREE_OBJECTID 9ULL +/* tracks free space in block groups. */ +#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL + /* for storing balance parameters in the root tree */ #define BTRFS_BALANCE_OBJECTID -4ULL @@ -500,6 +503,8 @@ struct btrfs_super_block { * Compat flags that we support. If any incompat flags are set other than the * ones specified below then we will fail to mount */ +#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0) + #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) @@ -1061,6 +1066,13 @@ struct btrfs_block_group_item { __le64 flags; } __attribute__ ((__packed__)); +struct btrfs_free_space_info { + __le32 extent_count; + __le32 flags; +} __attribute__ ((__packed__)); + +#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0) + #define BTRFS_QGROUP_LEVEL_SHIFT 48 static inline u64 btrfs_qgroup_level(u64 qgroupid) { @@ -2058,6 +2070,27 @@ struct btrfs_ioctl_defrag_range_args { */ #define BTRFS_BLOCK_GROUP_ITEM_KEY 192 +/* + * Every block group is represented in the free space tree by a free space info + * item, which stores some accounting information. It is keyed on + * (block_group_start, FREE_SPACE_INFO, block_group_length). + */ +#define BTRFS_FREE_SPACE_INFO_KEY 198 + +/* + * A free space extent tracks an extent of space that is free in a block group. + * It is keyed on (start, FREE_SPACE_EXTENT, length). + */ +#define BTRFS_FREE_SPACE_EXTENT_KEY 199 + +/* + * When a block group becomes very fragmented, we convert it to use bitmaps + * instead of extents. A free space bitmap is keyed on + * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with + * (length / sectorsize) bits. + */ +#define BTRFS_FREE_SPACE_BITMAP_KEY 200 + #define BTRFS_DEV_EXTENT_KEY 204 #define BTRFS_DEV_ITEM_KEY 216 #define BTRFS_CHUNK_ITEM_KEY 228 @@ -2458,6 +2491,11 @@ BTRFS_SETGET_FUNCS(disk_block_group_flags, BTRFS_SETGET_STACK_FUNCS(block_group_flags, struct btrfs_block_group_item, flags, 64); +/* struct btrfs_free_space_info */ +BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info, + extent_count, 32); +BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32); + /* struct btrfs_inode_ref */ BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 0b73af9..e6289e6 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -45,7 +45,8 @@ struct btrfs_qgroup_operation; { BTRFS_TREE_LOG_OBJECTID, "TREE_LOG" }, \ { BTRFS_QUOTA_TREE_OBJECTID, "QUOTA_TREE" }, \ { BTRFS_TREE_RELOC_OBJECTID, "TREE_RELOC" }, \ - { BTRFS_UUID_TREE_OBJECTID, "UUID_RELOC" }, \ + { BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" }, \ + { BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" }, \ { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }) #define show_root_type(obj) \ -- cgit v0.10.2 From a5ed91828518ab076209266c2bc510adabd078df Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 29 Sep 2015 20:50:35 -0700 Subject: Btrfs: implement the free space B-tree The free space cache has turned out to be a scalability bottleneck on large, busy filesystems. When the cache for a lot of block groups needs to be written out, we can get extremely long commit times; if this happens in the critical section, things are especially bad because we block new transactions from happening. The main problem with the free space cache is that it has to be written out in its entirety and is managed in an ad hoc fashion. Using a B-tree to store free space fixes this: updates can be done as needed and we get all of the benefits of using a B-tree: checksumming, RAID handling, well-understood behavior. With the free space tree, we get commit times that are about the same as the no cache case with load times slower than the free space cache case but still much faster than the no cache case. Free space is represented with extents until it becomes more space-efficient to use bitmaps, giving us similar space overhead to the free space cache. The operations on the free space tree are: adding and removing free space, handling the creation and deletion of block groups, and loading the free space for a block group. We can also create the free space tree by walking the extent tree and clear the free space tree. Signed-off-by: Omar Sandoval Signed-off-by: Chris Mason diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 6d1d0b9..7661697 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -9,7 +9,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ export.o tree-log.o free-space-cache.o zlib.o lzo.o \ compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ - uuid-tree.o props.o hash.o + uuid-tree.o props.o hash.o free-space-tree.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0e40d32..0888c3e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1302,8 +1302,20 @@ struct btrfs_block_group_cache { u64 delalloc_bytes; u64 bytes_super; u64 flags; - u64 sectorsize; u64 cache_generation; + u32 sectorsize; + + /* + * If the free space extent count exceeds this number, convert the block + * group to bitmaps. + */ + u32 bitmap_high_thresh; + + /* + * If the free space extent count drops below this number, convert the + * block group back to extents. + */ + u32 bitmap_low_thresh; /* * It is just used for the delayed data space allocation because @@ -1359,6 +1371,15 @@ struct btrfs_block_group_cache { struct list_head io_list; struct btrfs_io_ctl io_ctl; + + /* Lock for free space tree operations. */ + struct mutex free_space_lock; + + /* + * Does the block group need to be added to the free space tree? + * Protected by free_space_lock. + */ + int needs_free_space; }; /* delayed seq elem */ @@ -1410,6 +1431,7 @@ struct btrfs_fs_info { struct btrfs_root *csum_root; struct btrfs_root *quota_root; struct btrfs_root *uuid_root; + struct btrfs_root *free_space_root; /* the log root tree is a directory of all the other log roots */ struct btrfs_root *log_root_tree; @@ -3555,6 +3577,9 @@ void btrfs_end_write_no_snapshoting(struct btrfs_root *root); void check_system_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *root, const u64 type); +u64 add_new_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_fs_info *info, u64 start, u64 end); + /* ctree.c */ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, int level, int *slot); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index af8ffb5..e07280c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -337,8 +337,8 @@ static void put_caching_control(struct btrfs_caching_control *ctl) * we need to check the pinned_extents for any extents that can't be used yet * since their free space will be released as soon as the transaction commits. */ -static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, - struct btrfs_fs_info *info, u64 start, u64 end) +u64 add_new_free_space(struct btrfs_block_group_cache *block_group, + struct btrfs_fs_info *info, u64 start, u64 end) { u64 extent_start, extent_end, size, total_added = 0; int ret; @@ -9381,6 +9381,7 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) INIT_LIST_HEAD(&cache->io_list); btrfs_init_free_space_ctl(cache); atomic_set(&cache->trimming, 0); + mutex_init(&cache->free_space_lock); return cache; } diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c new file mode 100644 index 0000000..cbe36dd --- /dev/null +++ b/fs/btrfs/free-space-tree.c @@ -0,0 +1,1584 @@ +/* + * Copyright (C) 2015 Facebook. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include +#include +#include "ctree.h" +#include "disk-io.h" +#include "locking.h" +#include "free-space-tree.h" +#include "transaction.h" + +static int __add_block_group_free_space(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path); + +void set_free_space_tree_thresholds(struct btrfs_block_group_cache *cache) +{ + u32 bitmap_range; + size_t bitmap_size; + u64 num_bitmaps, total_bitmap_size; + + /* + * We convert to bitmaps when the disk space required for using extents + * exceeds that required for using bitmaps. + */ + bitmap_range = cache->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; + num_bitmaps = div_u64(cache->key.offset + bitmap_range - 1, + bitmap_range); + bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE; + total_bitmap_size = num_bitmaps * bitmap_size; + cache->bitmap_high_thresh = div_u64(total_bitmap_size, + sizeof(struct btrfs_item)); + + /* + * We allow for a small buffer between the high threshold and low + * threshold to avoid thrashing back and forth between the two formats. + */ + if (cache->bitmap_high_thresh > 100) + cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100; + else + cache->bitmap_low_thresh = 0; +} + +static int add_new_free_space_info(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path) +{ + struct btrfs_root *root = fs_info->free_space_root; + struct btrfs_free_space_info *info; + struct btrfs_key key; + struct extent_buffer *leaf; + int ret; + + key.objectid = block_group->key.objectid; + key.type = BTRFS_FREE_SPACE_INFO_KEY; + key.offset = block_group->key.offset; + + ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info)); + if (ret) + goto out; + + leaf = path->nodes[0]; + info = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_free_space_info); + btrfs_set_free_space_extent_count(leaf, info, 0); + btrfs_set_free_space_flags(leaf, info, 0); + btrfs_mark_buffer_dirty(leaf); + + ret = 0; +out: + btrfs_release_path(path); + return ret; +} + +struct btrfs_free_space_info * +search_free_space_info(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path, int cow) +{ + struct btrfs_root *root = fs_info->free_space_root; + struct btrfs_key key; + int ret; + + key.objectid = block_group->key.objectid; + key.type = BTRFS_FREE_SPACE_INFO_KEY; + key.offset = block_group->key.offset; + + ret = btrfs_search_slot(trans, root, &key, path, 0, cow); + if (ret < 0) + return ERR_PTR(ret); + if (ret != 0) { + btrfs_warn(fs_info, "missing free space info for %llu\n", + block_group->key.objectid); + ASSERT(0); + return ERR_PTR(-ENOENT); + } + + return btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_free_space_info); +} + +/* + * btrfs_search_slot() but we're looking for the greatest key less than the + * passed key. + */ +static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *key, struct btrfs_path *p, + int ins_len, int cow) +{ + int ret; + + ret = btrfs_search_slot(trans, root, key, p, ins_len, cow); + if (ret < 0) + return ret; + + if (ret == 0) { + ASSERT(0); + return -EIO; + } + + if (p->slots[0] == 0) { + ASSERT(0); + return -EIO; + } + p->slots[0]--; + + return 0; +} + +static inline u32 free_space_bitmap_size(u64 size, u32 sectorsize) +{ + return DIV_ROUND_UP((u32)div_u64(size, sectorsize), BITS_PER_BYTE); +} + +static unsigned long *alloc_bitmap(u32 bitmap_size) +{ + return __vmalloc(bitmap_size, GFP_NOFS | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL); +} + +int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path) +{ + struct btrfs_root *root = fs_info->free_space_root; + struct btrfs_free_space_info *info; + struct btrfs_key key, found_key; + struct extent_buffer *leaf; + unsigned long *bitmap; + char *bitmap_cursor; + u64 start, end; + u64 bitmap_range, i; + u32 bitmap_size, flags, expected_extent_count; + u32 extent_count = 0; + int done = 0, nr; + int ret; + + bitmap_size = free_space_bitmap_size(block_group->key.offset, + block_group->sectorsize); + bitmap = alloc_bitmap(bitmap_size); + if (!bitmap) { + ret = -ENOMEM; + goto out; + } + + start = block_group->key.objectid; + end = block_group->key.objectid + block_group->key.offset; + + key.objectid = end - 1; + key.type = (u8)-1; + key.offset = (u64)-1; + + while (!done) { + ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); + if (ret) + goto out; + + leaf = path->nodes[0]; + nr = 0; + path->slots[0]++; + while (path->slots[0] > 0) { + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); + + if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { + ASSERT(found_key.objectid == block_group->key.objectid); + ASSERT(found_key.offset == block_group->key.offset); + done = 1; + break; + } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) { + u64 first, last; + + ASSERT(found_key.objectid >= start); + ASSERT(found_key.objectid < end); + ASSERT(found_key.objectid + found_key.offset <= end); + + first = div_u64(found_key.objectid - start, + block_group->sectorsize); + last = div_u64(found_key.objectid + found_key.offset - start, + block_group->sectorsize); + bitmap_set(bitmap, first, last - first); + + extent_count++; + nr++; + path->slots[0]--; + } else { + ASSERT(0); + } + } + + ret = btrfs_del_items(trans, root, path, path->slots[0], nr); + if (ret) + goto out; + btrfs_release_path(path); + } + + info = search_free_space_info(trans, fs_info, block_group, path, 1); + if (IS_ERR(info)) { + ret = PTR_ERR(info); + goto out; + } + leaf = path->nodes[0]; + flags = btrfs_free_space_flags(leaf, info); + flags |= BTRFS_FREE_SPACE_USING_BITMAPS; + btrfs_set_free_space_flags(leaf, info, flags); + expected_extent_count = btrfs_free_space_extent_count(leaf, info); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(path); + + if (extent_count != expected_extent_count) { + btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u", + block_group->key.objectid, extent_count, + expected_extent_count); + ASSERT(0); + ret = -EIO; + goto out; + } + + bitmap_cursor = (char *)bitmap; + bitmap_range = block_group->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; + i = start; + while (i < end) { + unsigned long ptr; + u64 extent_size; + u32 data_size; + + extent_size = min(end - i, bitmap_range); + data_size = free_space_bitmap_size(extent_size, + block_group->sectorsize); + + key.objectid = i; + key.type = BTRFS_FREE_SPACE_BITMAP_KEY; + key.offset = extent_size; + + ret = btrfs_insert_empty_item(trans, root, path, &key, + data_size); + if (ret) + goto out; + + leaf = path->nodes[0]; + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + write_extent_buffer(leaf, bitmap_cursor, ptr, + data_size); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(path); + + i += extent_size; + bitmap_cursor += data_size; + } + + ret = 0; +out: + vfree(bitmap); + if (ret) + btrfs_abort_transaction(trans, root, ret); + return ret; +} + +int convert_free_space_to_extents(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path) +{ + struct btrfs_root *root = fs_info->free_space_root; + struct btrfs_free_space_info *info; + struct btrfs_key key, found_key; + struct extent_buffer *leaf; + unsigned long *bitmap; + u64 start, end; + /* Initialize to silence GCC. */ + u64 extent_start = 0; + u64 offset; + u32 bitmap_size, flags, expected_extent_count; + int prev_bit = 0, bit, bitnr; + u32 extent_count = 0; + int done = 0, nr; + int ret; + + bitmap_size = free_space_bitmap_size(block_group->key.offset, + block_group->sectorsize); + bitmap = alloc_bitmap(bitmap_size); + if (!bitmap) { + ret = -ENOMEM; + goto out; + } + + start = block_group->key.objectid; + end = block_group->key.objectid + block_group->key.offset; + + key.objectid = end - 1; + key.type = (u8)-1; + key.offset = (u64)-1; + + while (!done) { + ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); + if (ret) + goto out; + + leaf = path->nodes[0]; + nr = 0; + path->slots[0]++; + while (path->slots[0] > 0) { + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); + + if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { + ASSERT(found_key.objectid == block_group->key.objectid); + ASSERT(found_key.offset == block_group->key.offset); + done = 1; + break; + } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { + unsigned long ptr; + char *bitmap_cursor; + u32 bitmap_pos, data_size; + + ASSERT(found_key.objectid >= start); + ASSERT(found_key.objectid < end); + ASSERT(found_key.objectid + found_key.offset <= end); + + bitmap_pos = div_u64(found_key.objectid - start, + block_group->sectorsize * + BITS_PER_BYTE); + bitmap_cursor = ((char *)bitmap) + bitmap_pos; + data_size = free_space_bitmap_size(found_key.offset, + block_group->sectorsize); + + ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1); + read_extent_buffer(leaf, bitmap_cursor, ptr, + data_size); + + nr++; + path->slots[0]--; + } else { + ASSERT(0); + } + } + + ret = btrfs_del_items(trans, root, path, path->slots[0], nr); + if (ret) + goto out; + btrfs_release_path(path); + } + + info = search_free_space_info(trans, fs_info, block_group, path, 1); + if (IS_ERR(info)) { + ret = PTR_ERR(info); + goto out; + } + leaf = path->nodes[0]; + flags = btrfs_free_space_flags(leaf, info); + flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS; + btrfs_set_free_space_flags(leaf, info, flags); + expected_extent_count = btrfs_free_space_extent_count(leaf, info); + btrfs_mark_buffer_dirty(leaf); + btrfs_release_path(path); + + offset = start; + bitnr = 0; + while (offset < end) { + bit = !!test_bit(bitnr, bitmap); + if (prev_bit == 0 && bit == 1) { + extent_start = offset; + } else if (prev_bit == 1 && bit == 0) { + key.objectid = extent_start; + key.type = BTRFS_FREE_SPACE_EXTENT_KEY; + key.offset = offset - extent_start; + + ret = btrfs_insert_empty_item(trans, root, path, &key, 0); + if (ret) + goto out; + btrfs_release_path(path); + + extent_count++; + } + prev_bit = bit; + offset += block_group->sectorsize; + bitnr++; + } + if (prev_bit == 1) { + key.objectid = extent_start; + key.type = BTRFS_FREE_SPACE_EXTENT_KEY; + key.offset = end - extent_start; + + ret = btrfs_insert_empty_item(trans, root, path, &key, 0); + if (ret) + goto out; + btrfs_release_path(path); + + extent_count++; + } + + if (extent_count != expected_extent_count) { + btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u", + block_group->key.objectid, extent_count, + expected_extent_count); + ASSERT(0); + ret = -EIO; + goto out; + } + + ret = 0; +out: + vfree(bitmap); + if (ret) + btrfs_abort_transaction(trans, root, ret); + return ret; +} + +static int update_free_space_extent_count(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path, + int new_extents) +{ + struct btrfs_free_space_info *info; + u32 flags; + u32 extent_count; + int ret = 0; + + if (new_extents == 0) + return 0; + + info = search_free_space_info(trans, fs_info, block_group, path, 1); + if (IS_ERR(info)) { + ret = PTR_ERR(info); + goto out; + } + flags = btrfs_free_space_flags(path->nodes[0], info); + extent_count = btrfs_free_space_extent_count(path->nodes[0], info); + + extent_count += new_extents; + btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_release_path(path); + + if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) && + extent_count > block_group->bitmap_high_thresh) { + ret = convert_free_space_to_bitmaps(trans, fs_info, block_group, + path); + } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) && + extent_count < block_group->bitmap_low_thresh) { + ret = convert_free_space_to_extents(trans, fs_info, block_group, + path); + } + +out: + return ret; +} + +int free_space_test_bit(struct btrfs_block_group_cache *block_group, + struct btrfs_path *path, u64 offset) +{ + struct extent_buffer *leaf; + struct btrfs_key key; + u64 found_start, found_end; + unsigned long ptr, i; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); + + found_start = key.objectid; + found_end = key.objectid + key.offset; + ASSERT(offset >= found_start && offset < found_end); + + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + i = div_u64(offset - found_start, block_group->sectorsize); + return !!extent_buffer_test_bit(leaf, ptr, i); +} + +static void free_space_set_bits(struct btrfs_block_group_cache *block_group, + struct btrfs_path *path, u64 *start, u64 *size, + int bit) +{ + struct extent_buffer *leaf; + struct btrfs_key key; + u64 end = *start + *size; + u64 found_start, found_end; + unsigned long ptr, first, last; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); + + found_start = key.objectid; + found_end = key.objectid + key.offset; + ASSERT(*start >= found_start && *start < found_end); + ASSERT(end > found_start); + + if (end > found_end) + end = found_end; + + ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); + first = div_u64(*start - found_start, block_group->sectorsize); + last = div_u64(end - found_start, block_group->sectorsize); + if (bit) + extent_buffer_bitmap_set(leaf, ptr, first, last - first); + else + extent_buffer_bitmap_clear(leaf, ptr, first, last - first); + btrfs_mark_buffer_dirty(leaf); + + *size -= end - *start; + *start = end; +} + +/* + * We can't use btrfs_next_item() in modify_free_space_bitmap() because + * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy + * tree walking in btrfs_next_leaf() anyways because we know exactly what we're + * looking for. + */ +static int free_space_next_bitmap(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct btrfs_path *p) +{ + struct btrfs_key key; + + if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) { + p->slots[0]++; + return 0; + } + + btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]); + btrfs_release_path(p); + + key.objectid += key.offset; + key.type = (u8)-1; + key.offset = (u64)-1; + + return btrfs_search_prev_slot(trans, root, &key, p, 0, 1); +} + +/* + * If remove is 1, then we are removing free space, thus clearing bits in the + * bitmap. If remove is 0, then we are adding free space, thus setting bits in + * the bitmap. + */ +static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path, + u64 start, u64 size, int remove) +{ + struct btrfs_root *root = fs_info->free_space_root; + struct btrfs_key key; + u64 end = start + size; + u64 cur_start, cur_size; + int prev_bit, next_bit; + int new_extents; + int ret; + + /* + * Read the bit for the block immediately before the extent of space if + * that block is within the block group. + */ + if (start > block_group->key.objectid) { + u64 prev_block = start - block_group->sectorsize; + + key.objectid = prev_block; + key.type = (u8)-1; + key.offset = (u64)-1; + + ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); + if (ret) + goto out; + + prev_bit = free_space_test_bit(block_group, path, prev_block); + + /* The previous block may have been in the previous bitmap. */ + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (start >= key.objectid + key.offset) { + ret = free_space_next_bitmap(trans, root, path); + if (ret) + goto out; + } + } else { + key.objectid = start; + key.type = (u8)-1; + key.offset = (u64)-1; + + ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); + if (ret) + goto out; + + prev_bit = -1; + } + + /* + * Iterate over all of the bitmaps overlapped by the extent of space, + * clearing/setting bits as required. + */ + cur_start = start; + cur_size = size; + while (1) { + free_space_set_bits(block_group, path, &cur_start, &cur_size, + !remove); + if (cur_size == 0) + break; + ret = free_space_next_bitmap(trans, root, path); + if (ret) + goto out; + } + + /* + * Read the bit for the block immediately after the extent of space if + * that block is within the block group. + */ + if (end < block_group->key.objectid + block_group->key.offset) { + /* The next block may be in the next bitmap. */ + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (end >= key.objectid + key.offset) { + ret = free_space_next_bitmap(trans, root, path); + if (ret) + goto out; + } + + next_bit = free_space_test_bit(block_group, path, end); + } else { + next_bit = -1; + } + + if (remove) { + new_extents = -1; + if (prev_bit == 1) { + /* Leftover on the left. */ + new_extents++; + } + if (next_bit == 1) { + /* Leftover on the right. */ + new_extents++; + } + } else { + new_extents = 1; + if (prev_bit == 1) { + /* Merging with neighbor on the left. */ + new_extents--; + } + if (next_bit == 1) { + /* Merging with neighbor on the right. */ + new_extents--; + } + } + + btrfs_release_path(path); + ret = update_free_space_extent_count(trans, fs_info, block_group, path, + new_extents); + +out: + return ret; +} + +static int remove_free_space_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path, + u64 start, u64 size) +{ + struct btrfs_root *root = fs_info->free_space_root; + struct btrfs_key key; + u64 found_start, found_end; + u64 end = start + size; + int new_extents = -1; + int ret; + + key.objectid = start; + key.type = (u8)-1; + key.offset = (u64)-1; + + ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); + if (ret) + goto out; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); + + found_start = key.objectid; + found_end = key.objectid + key.offset; + ASSERT(start >= found_start && end <= found_end); + + /* + * Okay, now that we've found the free space extent which contains the + * free space that we are removing, there are four cases: + * + * 1. We're using the whole extent: delete the key we found and + * decrement the free space extent count. + * 2. We are using part of the extent starting at the beginning: delete + * the key we found and insert a new key representing the leftover at + * the end. There is no net change in the number of extents. + * 3. We are using part of the extent ending at the end: delete the key + * we found and insert a new key representing the leftover at the + * beginning. There is no net change in the number of extents. + * 4. We are using part of the extent in the middle: delete the key we + * found and insert two new keys representing the leftovers on each + * side. Where we used to have one extent, we now have two, so increment + * the extent count. We may need to convert the block group to bitmaps + * as a result. + */ + + /* Delete the existing key (cases 1-4). */ + ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + + /* Add a key for leftovers at the beginning (cases 3 and 4). */ + if (start > found_start) { + key.objectid = found_start; + key.type = BTRFS_FREE_SPACE_EXTENT_KEY; + key.offset = start - found_start; + + btrfs_release_path(path); + ret = btrfs_insert_empty_item(trans, root, path, &key, 0); + if (ret) + goto out; + new_extents++; + } + + /* Add a key for leftovers at the end (cases 2 and 4). */ + if (end < found_end) { + key.objectid = end; + key.type = BTRFS_FREE_SPACE_EXTENT_KEY; + key.offset = found_end - end; + + btrfs_release_path(path); + ret = btrfs_insert_empty_item(trans, root, path, &key, 0); + if (ret) + goto out; + new_extents++; + } + + btrfs_release_path(path); + ret = update_free_space_extent_count(trans, fs_info, block_group, path, + new_extents); + +out: + return ret; +} + +int __remove_from_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path, u64 start, u64 size) +{ + struct btrfs_free_space_info *info; + u32 flags; + int ret; + + if (block_group->needs_free_space) { + ret = __add_block_group_free_space(trans, fs_info, block_group, + path); + if (ret) + return ret; + } + + info = search_free_space_info(NULL, fs_info, block_group, path, 0); + if (IS_ERR(info)) + return PTR_ERR(info); + flags = btrfs_free_space_flags(path->nodes[0], info); + btrfs_release_path(path); + + if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) { + return modify_free_space_bitmap(trans, fs_info, block_group, + path, start, size, 1); + } else { + return remove_free_space_extent(trans, fs_info, block_group, + path, start, size); + } +} + +int remove_from_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 start, u64 size) +{ + struct btrfs_block_group_cache *block_group; + struct btrfs_path *path; + int ret; + + if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) + return 0; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + block_group = btrfs_lookup_block_group(fs_info, start); + if (!block_group) { + ASSERT(0); + ret = -ENOENT; + goto out; + } + + mutex_lock(&block_group->free_space_lock); + ret = __remove_from_free_space_tree(trans, fs_info, block_group, path, + start, size); + mutex_unlock(&block_group->free_space_lock); + + btrfs_put_block_group(block_group); +out: + btrfs_free_path(path); + if (ret) + btrfs_abort_transaction(trans, fs_info->free_space_root, ret); + return ret; +} + +static int add_free_space_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path, + u64 start, u64 size) +{ + struct btrfs_root *root = fs_info->free_space_root; + struct btrfs_key key, new_key; + u64 found_start, found_end; + u64 end = start + size; + int new_extents = 1; + int ret; + + /* + * We are adding a new extent of free space, but we need to merge + * extents. There are four cases here: + * + * 1. The new extent does not have any immediate neighbors to merge + * with: add the new key and increment the free space extent count. We + * may need to convert the block group to bitmaps as a result. + * 2. The new extent has an immediate neighbor before it: remove the + * previous key and insert a new key combining both of them. There is no + * net change in the number of extents. + * 3. The new extent has an immediate neighbor after it: remove the next + * key and insert a new key combining both of them. There is no net + * change in the number of extents. + * 4. The new extent has immediate neighbors on both sides: remove both + * of the keys and insert a new key combining all of them. Where we used + * to have two extents, we now have one, so decrement the extent count. + */ + + new_key.objectid = start; + new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY; + new_key.offset = size; + + /* Search for a neighbor on the left. */ + if (start == block_group->key.objectid) + goto right; + key.objectid = start - 1; + key.type = (u8)-1; + key.offset = (u64)-1; + + ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); + if (ret) + goto out; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { + ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); + btrfs_release_path(path); + goto right; + } + + found_start = key.objectid; + found_end = key.objectid + key.offset; + ASSERT(found_start >= block_group->key.objectid && + found_end > block_group->key.objectid); + ASSERT(found_start < start && found_end <= start); + + /* + * Delete the neighbor on the left and absorb it into the new key (cases + * 2 and 4). + */ + if (found_end == start) { + ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + new_key.objectid = found_start; + new_key.offset += key.offset; + new_extents--; + } + btrfs_release_path(path); + +right: + /* Search for a neighbor on the right. */ + if (end == block_group->key.objectid + block_group->key.offset) + goto insert; + key.objectid = end; + key.type = (u8)-1; + key.offset = (u64)-1; + + ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); + if (ret) + goto out; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { + ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); + btrfs_release_path(path); + goto insert; + } + + found_start = key.objectid; + found_end = key.objectid + key.offset; + ASSERT(found_start >= block_group->key.objectid && + found_end > block_group->key.objectid); + ASSERT((found_start < start && found_end <= start) || + (found_start >= end && found_end > end)); + + /* + * Delete the neighbor on the right and absorb it into the new key + * (cases 3 and 4). + */ + if (found_start == end) { + ret = btrfs_del_item(trans, root, path); + if (ret) + goto out; + new_key.offset += key.offset; + new_extents--; + } + btrfs_release_path(path); + +insert: + /* Insert the new key (cases 1-4). */ + ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0); + if (ret) + goto out; + + btrfs_release_path(path); + ret = update_free_space_extent_count(trans, fs_info, block_group, path, + new_extents); + +out: + return ret; +} + +int __add_to_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path, u64 start, u64 size) +{ + struct btrfs_free_space_info *info; + u32 flags; + int ret; + + if (block_group->needs_free_space) { + ret = __add_block_group_free_space(trans, fs_info, block_group, + path); + if (ret) + return ret; + } + + info = search_free_space_info(NULL, fs_info, block_group, path, 0); + if (IS_ERR(info)) + return PTR_ERR(info); + flags = btrfs_free_space_flags(path->nodes[0], info); + btrfs_release_path(path); + + if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) { + return modify_free_space_bitmap(trans, fs_info, block_group, + path, start, size, 0); + } else { + return add_free_space_extent(trans, fs_info, block_group, path, + start, size); + } +} + +int add_to_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 start, u64 size) +{ + struct btrfs_block_group_cache *block_group; + struct btrfs_path *path; + int ret; + + if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) + return 0; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + block_group = btrfs_lookup_block_group(fs_info, start); + if (!block_group) { + ASSERT(0); + ret = -ENOENT; + goto out; + } + + mutex_lock(&block_group->free_space_lock); + ret = __add_to_free_space_tree(trans, fs_info, block_group, path, start, + size); + mutex_unlock(&block_group->free_space_lock); + + btrfs_put_block_group(block_group); +out: + btrfs_free_path(path); + if (ret) + btrfs_abort_transaction(trans, fs_info->free_space_root, ret); + return ret; +} + +/* + * Populate the free space tree by walking the extent tree. Operations on the + * extent tree that happen as a result of writes to the free space tree will go + * through the normal add/remove hooks. + */ +static int populate_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group) +{ + struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_path *path, *path2; + struct btrfs_key key; + u64 start, end; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = 1; + + path2 = btrfs_alloc_path(); + if (!path2) { + btrfs_free_path(path); + return -ENOMEM; + } + + ret = add_new_free_space_info(trans, fs_info, block_group, path2); + if (ret) + goto out; + + /* + * Iterate through all of the extent and metadata items in this block + * group, adding the free space between them and the free space at the + * end. Note that EXTENT_ITEM and METADATA_ITEM are less than + * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's + * contained in. + */ + key.objectid = block_group->key.objectid; + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); + if (ret < 0) + goto out; + ASSERT(ret == 0); + + start = block_group->key.objectid; + end = block_group->key.objectid + block_group->key.offset; + while (1) { + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY) { + if (key.objectid >= end) + break; + + if (start < key.objectid) { + ret = __add_to_free_space_tree(trans, fs_info, + block_group, + path2, start, + key.objectid - + start); + if (ret) + goto out; + } + start = key.objectid; + if (key.type == BTRFS_METADATA_ITEM_KEY) + start += fs_info->tree_root->nodesize; + else + start += key.offset; + } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { + if (key.objectid != block_group->key.objectid) + break; + } + + ret = btrfs_next_item(extent_root, path); + if (ret < 0) + goto out; + if (ret) + break; + } + if (start < end) { + ret = __add_to_free_space_tree(trans, fs_info, block_group, + path2, start, end - start); + if (ret) + goto out; + } + + ret = 0; +out: + btrfs_free_path(path2); + btrfs_free_path(path); + return ret; +} + +int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *free_space_root; + struct btrfs_block_group_cache *block_group; + struct rb_node *node; + int ret; + + trans = btrfs_start_transaction(tree_root, 0); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + free_space_root = btrfs_create_tree(trans, fs_info, + BTRFS_FREE_SPACE_TREE_OBJECTID); + if (IS_ERR(free_space_root)) { + ret = PTR_ERR(free_space_root); + goto abort; + } + fs_info->free_space_root = free_space_root; + + node = rb_first(&fs_info->block_group_cache_tree); + while (node) { + block_group = rb_entry(node, struct btrfs_block_group_cache, + cache_node); + ret = populate_free_space_tree(trans, fs_info, block_group); + if (ret) + goto abort; + node = rb_next(node); + } + + btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); + + ret = btrfs_commit_transaction(trans, tree_root); + if (ret) + return ret; + + return 0; + +abort: + btrfs_abort_transaction(trans, tree_root, ret); + btrfs_end_transaction(trans, tree_root); + return ret; +} + +static int clear_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_path *path; + struct btrfs_key key; + int nr; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + path->leave_spinning = 1; + + key.objectid = 0; + key.type = 0; + key.offset = 0; + + while (1) { + ret = btrfs_search_slot(trans, root, &key, path, -1, 1); + if (ret < 0) + goto out; + + nr = btrfs_header_nritems(path->nodes[0]); + if (!nr) + break; + + path->slots[0] = 0; + ret = btrfs_del_items(trans, root, path, 0, nr); + if (ret) + goto out; + + btrfs_release_path(path); + } + + ret = 0; +out: + btrfs_free_path(path); + return ret; +} + +int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *free_space_root = fs_info->free_space_root; + int ret; + + trans = btrfs_start_transaction(tree_root, 0); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE); + fs_info->free_space_root = NULL; + + ret = clear_free_space_tree(trans, free_space_root); + if (ret) + goto abort; + + ret = btrfs_del_root(trans, tree_root, &free_space_root->root_key); + if (ret) + goto abort; + + list_del(&free_space_root->dirty_list); + + btrfs_tree_lock(free_space_root->node); + clean_tree_block(trans, tree_root->fs_info, free_space_root->node); + btrfs_tree_unlock(free_space_root->node); + btrfs_free_tree_block(trans, free_space_root, free_space_root->node, + 0, 1); + + free_extent_buffer(free_space_root->node); + free_extent_buffer(free_space_root->commit_root); + kfree(free_space_root); + + ret = btrfs_commit_transaction(trans, tree_root); + if (ret) + return ret; + + return 0; + +abort: + btrfs_abort_transaction(trans, tree_root, ret); + btrfs_end_transaction(trans, tree_root); + return ret; +} + +static int __add_block_group_free_space(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path) +{ + u64 start, end; + int ret; + + start = block_group->key.objectid; + end = block_group->key.objectid + block_group->key.offset; + + block_group->needs_free_space = 0; + + ret = add_new_free_space_info(trans, fs_info, block_group, path); + if (ret) + return ret; + + return __add_to_free_space_tree(trans, fs_info, block_group, path, + block_group->key.objectid, + block_group->key.offset); +} + +int add_block_group_free_space(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group) +{ + struct btrfs_path *path = NULL; + int ret = 0; + + if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) + return 0; + + mutex_lock(&block_group->free_space_lock); + if (!block_group->needs_free_space) + goto out; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + ret = __add_block_group_free_space(trans, fs_info, block_group, path); + +out: + btrfs_free_path(path); + mutex_unlock(&block_group->free_space_lock); + if (ret) + btrfs_abort_transaction(trans, fs_info->free_space_root, ret); + return ret; +} + +int remove_block_group_free_space(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group) +{ + struct btrfs_root *root = fs_info->free_space_root; + struct btrfs_path *path; + struct btrfs_key key, found_key; + struct extent_buffer *leaf; + u64 start, end; + int done = 0, nr; + int ret; + + if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) + return 0; + + if (block_group->needs_free_space) { + /* We never added this block group to the free space tree. */ + return 0; + } + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + start = block_group->key.objectid; + end = block_group->key.objectid + block_group->key.offset; + + key.objectid = end - 1; + key.type = (u8)-1; + key.offset = (u64)-1; + + while (!done) { + ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); + if (ret) + goto out; + + leaf = path->nodes[0]; + nr = 0; + path->slots[0]++; + while (path->slots[0] > 0) { + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); + + if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { + ASSERT(found_key.objectid == block_group->key.objectid); + ASSERT(found_key.offset == block_group->key.offset); + done = 1; + nr++; + path->slots[0]--; + break; + } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY || + found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { + ASSERT(found_key.objectid >= start); + ASSERT(found_key.objectid < end); + ASSERT(found_key.objectid + found_key.offset <= end); + nr++; + path->slots[0]--; + } else { + ASSERT(0); + } + } + + ret = btrfs_del_items(trans, root, path, path->slots[0], nr); + if (ret) + goto out; + btrfs_release_path(path); + } + + ret = 0; +out: + btrfs_free_path(path); + if (ret) + btrfs_abort_transaction(trans, root, ret); + return ret; +} + +static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, + struct btrfs_path *path, + u32 expected_extent_count) +{ + struct btrfs_block_group_cache *block_group; + struct btrfs_fs_info *fs_info; + struct btrfs_root *root; + struct btrfs_key key; + int prev_bit = 0, bit; + /* Initialize to silence GCC. */ + u64 extent_start = 0; + u64 end, offset; + u64 total_found = 0; + u32 extent_count = 0; + int ret; + + block_group = caching_ctl->block_group; + fs_info = block_group->fs_info; + root = fs_info->free_space_root; + + end = block_group->key.objectid + block_group->key.offset; + + while (1) { + ret = btrfs_next_item(root, path); + if (ret < 0) + goto out; + if (ret) + break; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type == BTRFS_FREE_SPACE_INFO_KEY) + break; + + ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); + ASSERT(key.objectid < end && key.objectid + key.offset <= end); + + caching_ctl->progress = key.objectid; + + offset = key.objectid; + while (offset < key.objectid + key.offset) { + bit = free_space_test_bit(block_group, path, offset); + if (prev_bit == 0 && bit == 1) { + extent_start = offset; + } else if (prev_bit == 1 && bit == 0) { + total_found += add_new_free_space(block_group, + fs_info, + extent_start, + offset); + if (total_found > CACHING_CTL_WAKE_UP) { + total_found = 0; + wake_up(&caching_ctl->wait); + } + extent_count++; + } + prev_bit = bit; + offset += block_group->sectorsize; + } + } + if (prev_bit == 1) { + total_found += add_new_free_space(block_group, fs_info, + extent_start, end); + extent_count++; + } + + if (extent_count != expected_extent_count) { + btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u", + block_group->key.objectid, extent_count, + expected_extent_count); + ASSERT(0); + ret = -EIO; + goto out; + } + + caching_ctl->progress = (u64)-1; + + ret = 0; +out: + return ret; +} + +static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, + struct btrfs_path *path, + u32 expected_extent_count) +{ + struct btrfs_block_group_cache *block_group; + struct btrfs_fs_info *fs_info; + struct btrfs_root *root; + struct btrfs_key key; + u64 end; + u64 total_found = 0; + u32 extent_count = 0; + int ret; + + block_group = caching_ctl->block_group; + fs_info = block_group->fs_info; + root = fs_info->free_space_root; + + end = block_group->key.objectid + block_group->key.offset; + + while (1) { + ret = btrfs_next_item(root, path); + if (ret < 0) + goto out; + if (ret) + break; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + + if (key.type == BTRFS_FREE_SPACE_INFO_KEY) + break; + + ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); + ASSERT(key.objectid < end && key.objectid + key.offset <= end); + + caching_ctl->progress = key.objectid; + + total_found += add_new_free_space(block_group, fs_info, + key.objectid, + key.objectid + key.offset); + if (total_found > CACHING_CTL_WAKE_UP) { + total_found = 0; + wake_up(&caching_ctl->wait); + } + extent_count++; + } + + if (extent_count != expected_extent_count) { + btrfs_err(fs_info, "incorrect extent count for %llu; counted %u, expected %u", + block_group->key.objectid, extent_count, + expected_extent_count); + ASSERT(0); + ret = -EIO; + goto out; + } + + caching_ctl->progress = (u64)-1; + + ret = 0; +out: + return ret; +} + +int load_free_space_tree(struct btrfs_caching_control *caching_ctl) +{ + struct btrfs_block_group_cache *block_group; + struct btrfs_fs_info *fs_info; + struct btrfs_free_space_info *info; + struct btrfs_path *path; + u32 extent_count, flags; + int ret; + + block_group = caching_ctl->block_group; + fs_info = block_group->fs_info; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + /* + * Just like caching_thread() doesn't want to deadlock on the extent + * tree, we don't want to deadlock on the free space tree. + */ + path->skip_locking = 1; + path->search_commit_root = 1; + path->reada = 1; + + info = search_free_space_info(NULL, fs_info, block_group, path, 0); + if (IS_ERR(info)) { + ret = PTR_ERR(info); + goto out; + } + extent_count = btrfs_free_space_extent_count(path->nodes[0], info); + flags = btrfs_free_space_flags(path->nodes[0], info); + + /* + * We left path pointing to the free space info item, so now + * load_free_space_foo can just iterate through the free space tree from + * there. + */ + if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) + ret = load_free_space_bitmaps(caching_ctl, path, extent_count); + else + ret = load_free_space_extents(caching_ctl, path, extent_count); + +out: + btrfs_free_path(path); + return ret; +} diff --git a/fs/btrfs/free-space-tree.h b/fs/btrfs/free-space-tree.h new file mode 100644 index 0000000..54ffced --- /dev/null +++ b/fs/btrfs/free-space-tree.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2015 Facebook. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#ifndef __BTRFS_FREE_SPACE_TREE +#define __BTRFS_FREE_SPACE_TREE + +/* + * The default size for new free space bitmap items. The last bitmap in a block + * group may be truncated, and none of the free space tree code assumes that + * existing bitmaps are this size. + */ +#define BTRFS_FREE_SPACE_BITMAP_SIZE 256 +#define BTRFS_FREE_SPACE_BITMAP_BITS (BTRFS_FREE_SPACE_BITMAP_SIZE * BITS_PER_BYTE) + +void set_free_space_tree_thresholds(struct btrfs_block_group_cache *block_group); +int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info); +int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info); +int load_free_space_tree(struct btrfs_caching_control *caching_ctl); +int add_block_group_free_space(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group); +int remove_block_group_free_space(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group); +int add_to_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 start, u64 size); +int remove_from_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + u64 start, u64 size); + +/* Exposed for testing. */ +struct btrfs_free_space_info * +search_free_space_info(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path, int cow); +int __add_to_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path, u64 start, u64 size); +int __remove_from_free_space_tree(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path, u64 start, u64 size); +int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path); +int convert_free_space_to_extents(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *block_group, + struct btrfs_path *path); +int free_space_test_bit(struct btrfs_block_group_cache *block_group, + struct btrfs_path *path, u64 offset); + +#endif -- cgit v0.10.2 From 7c55ee0c4afba4434d973117234577ae6ff77a1c Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 29 Sep 2015 20:50:36 -0700 Subject: Btrfs: add free space tree sanity tests This tests the operations on the free space tree trying to excercise all of the main cases for both formats. Between this and xfstests, the free space tree should have pretty good coverage. Signed-off-by: Omar Sandoval Signed-off-by: Chris Mason diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 7661697..128ce17 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -16,4 +16,5 @@ btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \ tests/extent-buffer-tests.o tests/btrfs-tests.o \ - tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o + tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \ + tests/free-space-tree-tests.o diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 11d1eab..442bf43 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2212,6 +2212,9 @@ static int btrfs_run_sanity_tests(void) if (ret) goto out; ret = btrfs_test_qgroups(); + if (ret) + goto out; + ret = btrfs_test_free_space_tree(); out: btrfs_destroy_test_fs(); return ret; diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index 9626252..ba28cef 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -21,6 +21,9 @@ #include #include "btrfs-tests.h" #include "../ctree.h" +#include "../free-space-cache.h" +#include "../free-space-tree.h" +#include "../transaction.h" #include "../volumes.h" #include "../disk-io.h" #include "../qgroup.h" @@ -122,6 +125,9 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void) INIT_LIST_HEAD(&fs_info->tree_mod_seq_list); INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); + extent_io_tree_init(&fs_info->freed_extents[0], NULL); + extent_io_tree_init(&fs_info->freed_extents[1], NULL); + fs_info->pinned_extents = &fs_info->freed_extents[0]; return fs_info; } @@ -169,3 +175,49 @@ void btrfs_free_dummy_root(struct btrfs_root *root) kfree(root); } +struct btrfs_block_group_cache * +btrfs_alloc_dummy_block_group(unsigned long length) +{ + struct btrfs_block_group_cache *cache; + + cache = kzalloc(sizeof(*cache), GFP_NOFS); + if (!cache) + return NULL; + cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), + GFP_NOFS); + if (!cache->free_space_ctl) { + kfree(cache); + return NULL; + } + + cache->key.objectid = 0; + cache->key.offset = length; + cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; + cache->sectorsize = 4096; + cache->full_stripe_len = 4096; + + INIT_LIST_HEAD(&cache->list); + INIT_LIST_HEAD(&cache->cluster_list); + INIT_LIST_HEAD(&cache->bg_list); + btrfs_init_free_space_ctl(cache); + mutex_init(&cache->free_space_lock); + + return cache; +} + +void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache) +{ + if (!cache) + return; + __btrfs_remove_free_space_cache(cache->free_space_ctl); + kfree(cache->free_space_ctl); + kfree(cache); +} + +void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans) +{ + memset(trans, 0, sizeof(*trans)); + trans->transid = 1; + INIT_LIST_HEAD(&trans->qgroup_ref_list); + trans->type = __TRANS_DUMMY; +} diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h index fd39542..054b8c7 100644 --- a/fs/btrfs/tests/btrfs-tests.h +++ b/fs/btrfs/tests/btrfs-tests.h @@ -24,17 +24,23 @@ #define test_msg(fmt, ...) pr_info("BTRFS: selftest: " fmt, ##__VA_ARGS__) struct btrfs_root; +struct btrfs_trans_handle; int btrfs_test_free_space_cache(void); int btrfs_test_extent_buffer_operations(void); int btrfs_test_extent_io(void); int btrfs_test_inodes(void); int btrfs_test_qgroups(void); +int btrfs_test_free_space_tree(void); int btrfs_init_test_fs(void); void btrfs_destroy_test_fs(void); struct inode *btrfs_new_test_inode(void); struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(void); void btrfs_free_dummy_root(struct btrfs_root *root); +struct btrfs_block_group_cache * +btrfs_alloc_dummy_block_group(unsigned long length); +void btrfs_free_dummy_block_group(struct btrfs_block_group_cache *cache); +void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans); #else static inline int btrfs_test_free_space_cache(void) { @@ -63,6 +69,10 @@ static inline int btrfs_test_qgroups(void) { return 0; } +static inline int btrfs_test_free_space_tree(void) +{ + return 0; +} #endif #endif diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 2299bfd..bae6c59 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -22,35 +22,6 @@ #include "../free-space-cache.h" #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) -static struct btrfs_block_group_cache *init_test_block_group(void) -{ - struct btrfs_block_group_cache *cache; - - cache = kzalloc(sizeof(*cache), GFP_NOFS); - if (!cache) - return NULL; - cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), - GFP_NOFS); - if (!cache->free_space_ctl) { - kfree(cache); - return NULL; - } - - cache->key.objectid = 0; - cache->key.offset = 1024 * 1024 * 1024; - cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; - cache->sectorsize = 4096; - cache->full_stripe_len = 4096; - - spin_lock_init(&cache->lock); - INIT_LIST_HEAD(&cache->list); - INIT_LIST_HEAD(&cache->cluster_list); - INIT_LIST_HEAD(&cache->bg_list); - - btrfs_init_free_space_ctl(cache); - - return cache; -} /* * This test just does basic sanity checking, making sure we can add an exten @@ -883,7 +854,7 @@ int btrfs_test_free_space_cache(void) test_msg("Running btrfs free space cache tests\n"); - cache = init_test_block_group(); + cache = btrfs_alloc_dummy_block_group(1024 * 1024 * 1024); if (!cache) { test_msg("Couldn't run the tests\n"); return 0; @@ -901,9 +872,7 @@ int btrfs_test_free_space_cache(void) ret = test_steal_space_from_bitmap_to_extent(cache); out: - __btrfs_remove_free_space_cache(cache->free_space_ctl); - kfree(cache->free_space_ctl); - kfree(cache); + btrfs_free_dummy_block_group(cache); test_msg("Free space cache tests finished\n"); return ret; } diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c new file mode 100644 index 0000000..d05fe1a --- /dev/null +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -0,0 +1,571 @@ +/* + * Copyright (C) 2015 Facebook. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include "btrfs-tests.h" +#include "../ctree.h" +#include "../disk-io.h" +#include "../free-space-tree.h" +#include "../transaction.h" + +struct free_space_extent { + u64 start, length; +}; + +/* + * The test cases align their operations to this in order to hit some of the + * edge cases in the bitmap code. + */ +#define BITMAP_RANGE (BTRFS_FREE_SPACE_BITMAP_BITS * 4096) + +static int __check_free_space_extents(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache, + struct btrfs_path *path, + struct free_space_extent *extents, + unsigned int num_extents) +{ + struct btrfs_free_space_info *info; + struct btrfs_key key; + int prev_bit = 0, bit; + u64 extent_start = 0, offset, end; + u32 flags, extent_count; + unsigned int i; + int ret; + + info = search_free_space_info(trans, fs_info, cache, path, 0); + if (IS_ERR(info)) { + test_msg("Could not find free space info\n"); + ret = PTR_ERR(info); + goto out; + } + flags = btrfs_free_space_flags(path->nodes[0], info); + extent_count = btrfs_free_space_extent_count(path->nodes[0], info); + + if (extent_count != num_extents) { + test_msg("Extent count is wrong\n"); + ret = -EINVAL; + goto out; + } + if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) { + if (path->slots[0] != 0) + goto invalid; + end = cache->key.objectid + cache->key.offset; + i = 0; + while (++path->slots[0] < btrfs_header_nritems(path->nodes[0])) { + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.type != BTRFS_FREE_SPACE_BITMAP_KEY) + goto invalid; + offset = key.objectid; + while (offset < key.objectid + key.offset) { + bit = free_space_test_bit(cache, path, offset); + if (prev_bit == 0 && bit == 1) { + extent_start = offset; + } else if (prev_bit == 1 && bit == 0) { + if (i >= num_extents) + goto invalid; + if (i >= num_extents || + extent_start != extents[i].start || + offset - extent_start != extents[i].length) + goto invalid; + i++; + } + prev_bit = bit; + offset += cache->sectorsize; + } + } + if (prev_bit == 1) { + if (i >= num_extents || + extent_start != extents[i].start || + end - extent_start != extents[i].length) + goto invalid; + i++; + } + if (i != num_extents) + goto invalid; + } else { + if (btrfs_header_nritems(path->nodes[0]) != num_extents + 1 || + path->slots[0] != 0) + goto invalid; + for (i = 0; i < num_extents; i++) { + path->slots[0]++; + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY || + key.objectid != extents[i].start || + key.offset != extents[i].length) + goto invalid; + } + } + + ret = 0; +out: + btrfs_release_path(path); + return ret; +invalid: + test_msg("Free space tree is invalid\n"); + ret = -EINVAL; + goto out; +} + +static int check_free_space_extents(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache, + struct btrfs_path *path, + struct free_space_extent *extents, + unsigned int num_extents) +{ + struct btrfs_free_space_info *info; + u32 flags; + int ret; + + info = search_free_space_info(trans, fs_info, cache, path, 0); + if (IS_ERR(info)) { + test_msg("Could not find free space info\n"); + btrfs_release_path(path); + return PTR_ERR(info); + } + flags = btrfs_free_space_flags(path->nodes[0], info); + btrfs_release_path(path); + + ret = __check_free_space_extents(trans, fs_info, cache, path, extents, + num_extents); + if (ret) + return ret; + + /* Flip it to the other format and check that for good measure. */ + if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) { + ret = convert_free_space_to_extents(trans, fs_info, cache, path); + if (ret) { + test_msg("Could not convert to extents\n"); + return ret; + } + } else { + ret = convert_free_space_to_bitmaps(trans, fs_info, cache, path); + if (ret) { + test_msg("Could not convert to bitmaps\n"); + return ret; + } + } + return __check_free_space_extents(trans, fs_info, cache, path, extents, + num_extents); +} + +static int test_empty_block_group(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache, + struct btrfs_path *path) +{ + struct free_space_extent extents[] = { + {cache->key.objectid, cache->key.offset}, + }; + + return check_free_space_extents(trans, fs_info, cache, path, + extents, ARRAY_SIZE(extents)); +} + +static int test_remove_all(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache, + struct btrfs_path *path) +{ + struct free_space_extent extents[] = {}; + int ret; + + ret = __remove_from_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid, + cache->key.offset); + if (ret) { + test_msg("Could not remove free space\n"); + return ret; + } + + return check_free_space_extents(trans, fs_info, cache, path, + extents, ARRAY_SIZE(extents)); +} + +static int test_remove_beginning(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache, + struct btrfs_path *path) +{ + struct free_space_extent extents[] = { + {cache->key.objectid + BITMAP_RANGE, + cache->key.offset - BITMAP_RANGE}, + }; + int ret; + + ret = __remove_from_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid, BITMAP_RANGE); + if (ret) { + test_msg("Could not remove free space\n"); + return ret; + } + + return check_free_space_extents(trans, fs_info, cache, path, + extents, ARRAY_SIZE(extents)); + +} + +static int test_remove_end(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache, + struct btrfs_path *path) +{ + struct free_space_extent extents[] = { + {cache->key.objectid, cache->key.offset - BITMAP_RANGE}, + }; + int ret; + + ret = __remove_from_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid + + cache->key.offset - BITMAP_RANGE, + BITMAP_RANGE); + if (ret) { + test_msg("Could not remove free space\n"); + return ret; + } + + return check_free_space_extents(trans, fs_info, cache, path, + extents, ARRAY_SIZE(extents)); +} + +static int test_remove_middle(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache, + struct btrfs_path *path) +{ + struct free_space_extent extents[] = { + {cache->key.objectid, BITMAP_RANGE}, + {cache->key.objectid + 2 * BITMAP_RANGE, + cache->key.offset - 2 * BITMAP_RANGE}, + }; + int ret; + + ret = __remove_from_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid + BITMAP_RANGE, + BITMAP_RANGE); + if (ret) { + test_msg("Could not remove free space\n"); + return ret; + } + + return check_free_space_extents(trans, fs_info, cache, path, + extents, ARRAY_SIZE(extents)); +} + +static int test_merge_left(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache, + struct btrfs_path *path) +{ + struct free_space_extent extents[] = { + {cache->key.objectid, 2 * BITMAP_RANGE}, + }; + int ret; + + ret = __remove_from_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid, + cache->key.offset); + if (ret) { + test_msg("Could not remove free space\n"); + return ret; + } + + ret = __add_to_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid, BITMAP_RANGE); + if (ret) { + test_msg("Could not add free space\n"); + return ret; + } + + ret = __add_to_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid + BITMAP_RANGE, + BITMAP_RANGE); + if (ret) { + test_msg("Could not add free space\n"); + return ret; + } + + return check_free_space_extents(trans, fs_info, cache, path, + extents, ARRAY_SIZE(extents)); +} + +static int test_merge_right(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache, + struct btrfs_path *path) +{ + struct free_space_extent extents[] = { + {cache->key.objectid + BITMAP_RANGE, 2 * BITMAP_RANGE}, + }; + int ret; + + ret = __remove_from_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid, + cache->key.offset); + if (ret) { + test_msg("Could not remove free space\n"); + return ret; + } + + ret = __add_to_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid + 2 * BITMAP_RANGE, + BITMAP_RANGE); + if (ret) { + test_msg("Could not add free space\n"); + return ret; + } + + ret = __add_to_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid + BITMAP_RANGE, + BITMAP_RANGE); + if (ret) { + test_msg("Could not add free space\n"); + return ret; + } + + return check_free_space_extents(trans, fs_info, cache, path, + extents, ARRAY_SIZE(extents)); +} + +static int test_merge_both(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache, + struct btrfs_path *path) +{ + struct free_space_extent extents[] = { + {cache->key.objectid, 3 * BITMAP_RANGE}, + }; + int ret; + + ret = __remove_from_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid, + cache->key.offset); + if (ret) { + test_msg("Could not remove free space\n"); + return ret; + } + + ret = __add_to_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid, BITMAP_RANGE); + if (ret) { + test_msg("Could not add free space\n"); + return ret; + } + + ret = __add_to_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid + 2 * BITMAP_RANGE, + BITMAP_RANGE); + if (ret) { + test_msg("Could not add free space\n"); + return ret; + } + + ret = __add_to_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid + BITMAP_RANGE, + BITMAP_RANGE); + if (ret) { + test_msg("Could not add free space\n"); + return ret; + } + + return check_free_space_extents(trans, fs_info, cache, path, + extents, ARRAY_SIZE(extents)); +} + +static int test_merge_none(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *cache, + struct btrfs_path *path) +{ + struct free_space_extent extents[] = { + {cache->key.objectid, BITMAP_RANGE}, + {cache->key.objectid + 2 * BITMAP_RANGE, BITMAP_RANGE}, + {cache->key.objectid + 4 * BITMAP_RANGE, BITMAP_RANGE}, + }; + int ret; + + ret = __remove_from_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid, + cache->key.offset); + if (ret) { + test_msg("Could not remove free space\n"); + return ret; + } + + ret = __add_to_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid, BITMAP_RANGE); + if (ret) { + test_msg("Could not add free space\n"); + return ret; + } + + ret = __add_to_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid + 4 * BITMAP_RANGE, + BITMAP_RANGE); + if (ret) { + test_msg("Could not add free space\n"); + return ret; + } + + ret = __add_to_free_space_tree(trans, fs_info, cache, path, + cache->key.objectid + 2 * BITMAP_RANGE, + BITMAP_RANGE); + if (ret) { + test_msg("Could not add free space\n"); + return ret; + } + + return check_free_space_extents(trans, fs_info, cache, path, + extents, ARRAY_SIZE(extents)); +} + +typedef int (*test_func_t)(struct btrfs_trans_handle *, + struct btrfs_fs_info *, + struct btrfs_block_group_cache *, + struct btrfs_path *); + +static int run_test(test_func_t test_func, int bitmaps) +{ + struct btrfs_root *root = NULL; + struct btrfs_block_group_cache *cache = NULL; + struct btrfs_trans_handle trans; + struct btrfs_path *path = NULL; + int ret; + + root = btrfs_alloc_dummy_root(); + if (IS_ERR(root)) { + test_msg("Couldn't allocate dummy root\n"); + ret = PTR_ERR(root); + goto out; + } + + root->fs_info = btrfs_alloc_dummy_fs_info(); + if (!root->fs_info) { + test_msg("Couldn't allocate dummy fs info\n"); + ret = -ENOMEM; + goto out; + } + + btrfs_set_super_compat_ro_flags(root->fs_info->super_copy, + BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE); + root->fs_info->free_space_root = root; + root->fs_info->tree_root = root; + + root->node = alloc_test_extent_buffer(root->fs_info, 4096); + if (!root->node) { + test_msg("Couldn't allocate dummy buffer\n"); + ret = -ENOMEM; + goto out; + } + btrfs_set_header_level(root->node, 0); + btrfs_set_header_nritems(root->node, 0); + root->alloc_bytenr += 8192; + + cache = btrfs_alloc_dummy_block_group(8 * BITMAP_RANGE); + if (!cache) { + test_msg("Couldn't allocate dummy block group cache\n"); + ret = -ENOMEM; + goto out; + } + cache->bitmap_low_thresh = 0; + cache->bitmap_high_thresh = (u32)-1; + cache->needs_free_space = 1; + + btrfs_init_dummy_trans(&trans); + + path = btrfs_alloc_path(); + if (!path) { + test_msg("Couldn't allocate path\n"); + return -ENOMEM; + } + + ret = add_block_group_free_space(&trans, root->fs_info, cache); + if (ret) { + test_msg("Could not add block group free space\n"); + goto out; + } + + if (bitmaps) { + ret = convert_free_space_to_bitmaps(&trans, root->fs_info, + cache, path); + if (ret) { + test_msg("Could not convert block group to bitmaps\n"); + goto out; + } + } + + ret = test_func(&trans, root->fs_info, cache, path); + if (ret) + goto out; + + ret = remove_block_group_free_space(&trans, root->fs_info, cache); + if (ret) { + test_msg("Could not remove block group free space\n"); + goto out; + } + + if (btrfs_header_nritems(root->node) != 0) { + test_msg("Free space tree has leftover items\n"); + ret = -EINVAL; + goto out; + } + + ret = 0; +out: + btrfs_free_path(path); + btrfs_free_dummy_block_group(cache); + btrfs_free_dummy_root(root); + return ret; +} + +static int run_test_both_formats(test_func_t test_func) +{ + int ret; + + ret = run_test(test_func, 0); + if (ret) + return ret; + return run_test(test_func, 1); +} + +int btrfs_test_free_space_tree(void) +{ + test_func_t tests[] = { + test_empty_block_group, + test_remove_all, + test_remove_beginning, + test_remove_end, + test_remove_middle, + test_merge_left, + test_merge_right, + test_merge_both, + test_merge_none, + }; + int i; + + test_msg("Running free space tree tests\n"); + for (i = 0; i < ARRAY_SIZE(tests); i++) { + int ret = run_test_both_formats(tests[i]); + if (ret) { + test_msg("%pf failed\n", tests[i]); + return ret; + } + } + + return 0; +} diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 846d277..8ea5d34 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -23,14 +23,6 @@ #include "../qgroup.h" #include "../backref.h" -static void init_dummy_trans(struct btrfs_trans_handle *trans) -{ - memset(trans, 0, sizeof(*trans)); - trans->transid = 1; - INIT_LIST_HEAD(&trans->qgroup_ref_list); - trans->type = __TRANS_DUMMY; -} - static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid) { @@ -44,7 +36,7 @@ static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr, u32 size = sizeof(*item) + sizeof(*iref) + sizeof(*block_info); int ret; - init_dummy_trans(&trans); + btrfs_init_dummy_trans(&trans); ins.objectid = bytenr; ins.type = BTRFS_EXTENT_ITEM_KEY; @@ -94,7 +86,7 @@ static int add_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 refs; int ret; - init_dummy_trans(&trans); + btrfs_init_dummy_trans(&trans); key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; @@ -144,7 +136,7 @@ static int remove_extent_item(struct btrfs_root *root, u64 bytenr, struct btrfs_path *path; int ret; - init_dummy_trans(&trans); + btrfs_init_dummy_trans(&trans); key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; @@ -178,7 +170,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr, u64 refs; int ret; - init_dummy_trans(&trans); + btrfs_init_dummy_trans(&trans); key.objectid = bytenr; key.type = BTRFS_EXTENT_ITEM_KEY; @@ -232,7 +224,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root) struct ulist *new_roots = NULL; int ret; - init_dummy_trans(&trans); + btrfs_init_dummy_trans(&trans); test_msg("Qgroup basic add\n"); ret = btrfs_create_qgroup(NULL, fs_info, 5); @@ -326,7 +318,7 @@ static int test_multiple_refs(struct btrfs_root *root) struct ulist *new_roots = NULL; int ret; - init_dummy_trans(&trans); + btrfs_init_dummy_trans(&trans); test_msg("Qgroup multiple refs test\n"); -- cgit v0.10.2 From 1e144fb8f4a4d6d6d88c58f87e4366e3cd02ab72 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 29 Sep 2015 20:50:37 -0700 Subject: Btrfs: wire up the free space tree to the extent tree The free space tree is updated in tandem with the extent tree. There are only a handful of places where we need to hook in: 1. Block group creation 2. Block group deletion 3. Delayed refs (extent creation and deletion) 4. Block group caching Signed-off-by: Omar Sandoval Signed-off-by: Chris Mason diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e07280c..a4a4f59 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -33,6 +33,7 @@ #include "raid56.h" #include "locking.h" #include "free-space-cache.h" +#include "free-space-tree.h" #include "math.h" #include "sysfs.h" #include "qgroup.h" @@ -518,7 +519,10 @@ static noinline void caching_thread(struct btrfs_work *work) mutex_lock(&caching_ctl->mutex); down_read(&fs_info->commit_root_sem); - ret = load_extent_tree_free(caching_ctl); + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) + ret = load_free_space_tree(caching_ctl); + else + ret = load_extent_tree_free(caching_ctl); spin_lock(&block_group->lock); block_group->caching_ctl = NULL; @@ -624,8 +628,8 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, } } else { /* - * We are not going to do the fast caching, set cached to the - * appropriate value and wakeup any waiters. + * We're either using the free space tree or no caching at all. + * Set cached to the appropriate value and wakeup any waiters. */ spin_lock(&cache->lock); if (load_cache_only) { @@ -6479,6 +6483,13 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, } } + ret = add_to_free_space_tree(trans, root->fs_info, bytenr, + num_bytes); + if (ret) { + btrfs_abort_transaction(trans, extent_root, ret); + goto out; + } + ret = update_block_group(trans, root, bytenr, num_bytes, 0); if (ret) { btrfs_abort_transaction(trans, extent_root, ret); @@ -7422,6 +7433,11 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_free_path(path); + ret = remove_from_free_space_tree(trans, fs_info, ins->objectid, + ins->offset); + if (ret) + return ret; + ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); if (ret) { /* -ENOENT, logic error */ btrfs_err(fs_info, "update block group failed for %llu %llu", @@ -7503,6 +7519,11 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); + ret = remove_from_free_space_tree(trans, fs_info, ins->objectid, + num_bytes); + if (ret) + return ret; + ret = update_block_group(trans, root, ins->objectid, root->nodesize, 1); if (ret) { /* -ENOENT, logic error */ @@ -9370,6 +9391,8 @@ btrfs_create_block_group_cache(struct btrfs_root *root, u64 start, u64 size) cache->full_stripe_len = btrfs_full_stripe_len(root, &root->fs_info->mapping_tree, start); + set_free_space_tree_thresholds(cache); + atomic_set(&cache->count, 1); spin_lock_init(&cache->lock); init_rwsem(&cache->data_rwsem); @@ -9592,6 +9615,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, key.objectid, key.offset); if (ret) btrfs_abort_transaction(trans, extent_root, ret); + add_block_group_free_space(trans, root->fs_info, block_group); + /* already aborted the transaction if it failed. */ next: list_del_init(&block_group->bg_list); } @@ -9622,6 +9647,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, cache->flags = type; cache->last_byte_to_unpin = (u64)-1; cache->cached = BTRFS_CACHE_FINISHED; + cache->needs_free_space = 1; ret = exclude_super_stripes(root, cache); if (ret) { /* @@ -9984,6 +10010,10 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, unlock_chunks(root); + ret = remove_block_group_free_space(trans, root->fs_info, block_group); + if (ret) + goto out; + btrfs_put_block_group(block_group); btrfs_put_block_group(block_group); -- cgit v0.10.2 From 70f6d82ec73c3ae2d3adc6853c5bebcd73610097 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 29 Sep 2015 20:50:38 -0700 Subject: Btrfs: add free space tree mount option Now we can finally hook up everything so we can actually use free space tree. The free space tree is enabled by passing the space_cache=v2 mount option. On the first mount with the this option set, the free space tree will be created and the FREE_SPACE_TREE read-only compat bit will be set. Any time the filesystem is mounted from then on, we must use the free space tree. The clear_cache option will also clear the free space tree. Signed-off-by: Omar Sandoval Signed-off-by: Chris Mason diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0888c3e..ed610f9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -531,7 +531,10 @@ struct btrfs_super_block { #define BTRFS_FEATURE_COMPAT_SUPP 0ULL #define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL -#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL + +#define BTRFS_FEATURE_COMPAT_RO_SUPP \ + (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE) + #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL @@ -2203,6 +2206,7 @@ struct btrfs_ioctl_defrag_range_args { #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21) #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR (1 << 22) #define BTRFS_MOUNT_RESCAN_UUID_TREE (1 << 23) +#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 24) #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) #define BTRFS_DEFAULT_MAX_INLINE (8192) @@ -3744,6 +3748,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info) kfree(fs_info->csum_root); kfree(fs_info->quota_root); kfree(fs_info->uuid_root); + kfree(fs_info->free_space_root); kfree(fs_info->super_copy); kfree(fs_info->super_for_commit); security_free_mnt_opts(&fs_info->security_opts); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 1e60d00..af7ac28 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -42,6 +42,7 @@ #include "locking.h" #include "tree-log.h" #include "free-space-cache.h" +#include "free-space-tree.h" #include "inode-map.h" #include "check-integrity.h" #include "rcu-string.h" @@ -1647,6 +1648,9 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info, if (location->objectid == BTRFS_UUID_TREE_OBJECTID) return fs_info->uuid_root ? fs_info->uuid_root : ERR_PTR(-ENOENT); + if (location->objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) + return fs_info->free_space_root ? fs_info->free_space_root : + ERR_PTR(-ENOENT); again: root = btrfs_lookup_fs_root(fs_info, location->objectid); if (root) { @@ -2144,6 +2148,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) free_root_extent_buffers(info->uuid_root); if (chunk_root) free_root_extent_buffers(info->chunk_root); + free_root_extent_buffers(info->free_space_root); } void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info) @@ -2445,6 +2450,15 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info, fs_info->uuid_root = root; } + if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { + location.objectid = BTRFS_FREE_SPACE_TREE_OBJECTID; + root = btrfs_read_tree_root(tree_root, &location); + if (IS_ERR(root)) + return PTR_ERR(root); + set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); + fs_info->free_space_root = root; + } + return 0; } @@ -3073,6 +3087,30 @@ retry_root_backup: btrfs_qgroup_rescan_resume(fs_info); + if (btrfs_test_opt(tree_root, CLEAR_CACHE) && + btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { + pr_info("BTRFS: clearing free space tree\n"); + ret = btrfs_clear_free_space_tree(fs_info); + if (ret) { + pr_warn("BTRFS: failed to clear free space tree %d\n", + ret); + close_ctree(tree_root); + return ret; + } + } + + if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) && + !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { + pr_info("BTRFS: creating free space tree\n"); + ret = btrfs_create_free_space_tree(fs_info); + if (ret) { + pr_warn("BTRFS: failed to create free space tree %d\n", + ret); + close_ctree(tree_root); + return ret; + } + } + if (!fs_info->uuid_root) { pr_info("BTRFS: creating UUID tree\n"); ret = btrfs_create_uuid_tree(fs_info); diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 442bf43..bfdaf12 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -312,10 +312,11 @@ enum { Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, Opt_compress_type, Opt_compress_force, Opt_compress_force_type, Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard, - Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed, - Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache, - Opt_no_space_cache, Opt_recovery, Opt_skip_balance, - Opt_check_integrity, Opt_check_integrity_including_extent_data, + Opt_space_cache, Opt_space_cache_version, Opt_clear_cache, + Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid, + Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery, + Opt_skip_balance, Opt_check_integrity, + Opt_check_integrity_including_extent_data, Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree, Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard, Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow, @@ -354,6 +355,7 @@ static match_table_t tokens = { {Opt_discard, "discard"}, {Opt_nodiscard, "nodiscard"}, {Opt_space_cache, "space_cache"}, + {Opt_space_cache_version, "space_cache=%s"}, {Opt_clear_cache, "clear_cache"}, {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, {Opt_enospc_debug, "enospc_debug"}, @@ -392,7 +394,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) bool compress_force = false; cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); - if (cache_gen) + if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE)) + btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE); + else if (cache_gen) btrfs_set_opt(info->mount_opt, SPACE_CACHE); if (!options) @@ -626,15 +630,35 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) "turning off discard"); break; case Opt_space_cache: - btrfs_set_and_info(root, SPACE_CACHE, - "enabling disk space caching"); + case Opt_space_cache_version: + if (token == Opt_space_cache || + strcmp(args[0].from, "v1") == 0) { + btrfs_clear_opt(root->fs_info->mount_opt, + FREE_SPACE_TREE); + btrfs_set_and_info(root, SPACE_CACHE, + "enabling disk space caching"); + } else if (strcmp(args[0].from, "v2") == 0) { + btrfs_clear_opt(root->fs_info->mount_opt, + SPACE_CACHE); + btrfs_set_and_info(root, FREE_SPACE_TREE, + "enabling free space tree"); + } else { + ret = -EINVAL; + goto out; + } break; case Opt_rescan_uuid_tree: btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE); break; case Opt_no_space_cache: - btrfs_clear_and_info(root, SPACE_CACHE, - "disabling disk space caching"); + if (btrfs_test_opt(root, SPACE_CACHE)) { + btrfs_clear_and_info(root, SPACE_CACHE, + "disabling disk space caching"); + } + if (btrfs_test_opt(root, FREE_SPACE_TREE)) { + btrfs_clear_and_info(root, FREE_SPACE_TREE, + "disabling free space tree"); + } break; case Opt_inode_cache: btrfs_set_pending_and_info(info, INODE_MAP_CACHE, @@ -747,8 +771,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) } } out: + if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) && + !btrfs_test_opt(root, FREE_SPACE_TREE) && + !btrfs_test_opt(root, CLEAR_CACHE)) { + btrfs_err(root->fs_info, "cannot disable free space tree"); + ret = -EINVAL; + + } if (!ret && btrfs_test_opt(root, SPACE_CACHE)) btrfs_info(root->fs_info, "disk space caching is enabled"); + if (!ret && btrfs_test_opt(root, FREE_SPACE_TREE)) + btrfs_info(root->fs_info, "using free space tree"); kfree(orig); return ret; } @@ -1155,6 +1188,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",noacl"); if (btrfs_test_opt(root, SPACE_CACHE)) seq_puts(seq, ",space_cache"); + else if (btrfs_test_opt(root, FREE_SPACE_TREE)) + seq_puts(seq, ",space_cache=v2"); else seq_puts(seq, ",nospace_cache"); if (btrfs_test_opt(root, RESCAN_UUID_TREE)) -- cgit v0.10.2 From 0376374a98abd533fb49c6db12967bddc2f4b4b3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 18 Dec 2015 01:57:29 +0000 Subject: Btrfs: fix locking bugs when defragging leaves When running fstests btrfs/070, with a higher number of fsstress operations, I ran frequently into two different locking bugs when defragging directories. The first bug produced the following traces: [133860.229792] ------------[ cut here ]------------ [133860.251062] WARNING: CPU: 2 PID: 26057 at fs/btrfs/locking.c:46 btrfs_set_lock_blocking_rw+0x57/0xbd [btrfs]() [133860.253576] Modules linked in: btrfs crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse parport_pc i2c_piix4 psmouse parport [133860.282566] CPU: 2 PID: 26057 Comm: btrfs Tainted: G W 4.3.0-rc5-btrfs-next-17+ #1 [133860.284393] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [133860.286827] 0000000000000000 ffff880207697b78 ffffffff812566f4 0000000000000000 [133860.288341] ffff880207697bb0 ffffffff8104d0a6 ffffffffa052d4c1 ffff880178f60e00 [133860.294219] ffff880178f60e00 0000000000000000 00000000000000f6 ffff880207697bc0 [133860.295831] Call Trace: [133860.306518] [] dump_stack+0x4e/0x79 [133860.307473] [] warn_slowpath_common+0x9f/0xb8 [133860.308619] [] ? btrfs_set_lock_blocking_rw+0x57/0xbd [btrfs] [133860.310068] [] warn_slowpath_null+0x1a/0x1c [133860.312552] [] btrfs_set_lock_blocking_rw+0x57/0xbd [btrfs] [133860.314630] [] btrfs_set_lock_blocking+0xe/0x10 [btrfs] [133860.323596] [] btrfs_realloc_node+0xb3/0x341 [btrfs] [133860.325233] [] btrfs_defrag_leaves+0x239/0x2fa [btrfs] [133860.332427] [] btrfs_defrag_root+0x63/0xca [btrfs] [133860.337259] [] btrfs_ioctl_defrag+0x78/0x14e [btrfs] [133860.340147] [] btrfs_ioctl+0x746/0x24c6 [btrfs] [133860.344833] [] ? arch_local_irq_save+0x9/0xc [133860.346343] [] ? __might_fault+0x4c/0xa7 [133860.353248] [] ? __might_fault+0x4c/0xa7 [133860.354242] [] ? __might_fault+0xa5/0xa7 [133860.355232] [] ? cp_new_stat+0x15d/0x174 [133860.356237] [] do_vfs_ioctl+0x427/0x4e6 [133860.358587] [] ? SYSC_newfstat+0x25/0x2e [133860.360195] [] ? __fget_light+0x4d/0x71 [133860.361380] [] SyS_ioctl+0x57/0x79 [133860.363578] [] entry_SYSCALL_64_fastpath+0x12/0x6f [133860.366217] ---[ end trace 2cadb2f653437e49 ]--- [133860.367399] ------------[ cut here ]------------ [133860.368162] kernel BUG at fs/btrfs/locking.c:307! [133860.369430] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [133860.370205] Modules linked in: btrfs crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse parport_pc i2c_piix4 psmouse parport [133860.370205] CPU: 2 PID: 26057 Comm: btrfs Tainted: G W 4.3.0-rc5-btrfs-next-17+ #1 [133860.370205] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [133860.370205] task: ffff8800aec6db40 ti: ffff880207694000 task.ti: ffff880207694000 [133860.370205] RIP: 0010:[] [] btrfs_assert_tree_locked+0x10/0x14 [btrfs] [133860.370205] RSP: 0018:ffff880207697bc0 EFLAGS: 00010246 [133860.370205] RAX: 0000000000000000 RBX: ffff880178f60e00 RCX: 0000000000000000 [133860.370205] RDX: ffff88023ec4fb50 RSI: 00000000ffffffff RDI: ffff880178f60e00 [133860.370205] RBP: ffff880207697bc0 R08: 0000000000000001 R09: 0000000000000000 [133860.370205] R10: 0000160000000000 R11: ffffffff81651000 R12: ffff880178f60e00 [133860.370205] R13: 0000000000000000 R14: 00000000000000f6 R15: ffff8801ff409000 [133860.370205] FS: 00007f763efd48c0(0000) GS:ffff88023ec40000(0000) knlGS:0000000000000000 [133860.370205] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [133860.370205] CR2: 0000000002158048 CR3: 000000003fd6c000 CR4: 00000000000006e0 [133860.370205] Stack: [133860.370205] ffff880207697bd8 ffffffffa052d4d0 0000000000000000 ffff880207697be8 [133860.370205] ffffffffa04d5787 ffff880207697c80 ffffffffa04d99cb ffff8801ff409590 [133860.370205] ffff880207697ca8 000000f507697c80 ffff880183c11bb8 0000000000000000 [133860.370205] Call Trace: [133860.370205] [] btrfs_set_lock_blocking_rw+0x66/0xbd [btrfs] [133860.370205] [] btrfs_set_lock_blocking+0xe/0x10 [btrfs] [133860.370205] [] btrfs_realloc_node+0xb3/0x341 [btrfs] [133860.370205] [] btrfs_defrag_leaves+0x239/0x2fa [btrfs] [133860.370205] [] btrfs_defrag_root+0x63/0xca [btrfs] [133860.370205] [] btrfs_ioctl_defrag+0x78/0x14e [btrfs] [133860.370205] [] btrfs_ioctl+0x746/0x24c6 [btrfs] [133860.370205] [] ? arch_local_irq_save+0x9/0xc [133860.370205] [] ? __might_fault+0x4c/0xa7 [133860.370205] [] ? __might_fault+0x4c/0xa7 [133860.370205] [] ? __might_fault+0xa5/0xa7 [133860.370205] [] ? cp_new_stat+0x15d/0x174 [133860.370205] [] do_vfs_ioctl+0x427/0x4e6 [133860.370205] [] ? SYSC_newfstat+0x25/0x2e [133860.370205] [] ? __fget_light+0x4d/0x71 [133860.370205] [] SyS_ioctl+0x57/0x79 [133860.370205] [] entry_SYSCALL_64_fastpath+0x12/0x6f This bug happened because we assumed that by setting keep_locks to 1 in our search path, our path after a call to btrfs_search_slot() would have all nodes locked, which is not always true because unlock_up() (called by btrfs_search_slot()) will unlock a node in a path if the slot of the node below it doesn't point to the last item or beyond the last item. For example, when the tree has a heigth of 2 and path->slots[0] has a value smaller than btrfs_header_nritems(path->nodes[0]) - 1, the node at level 2 will be unlocked (also because lowest_unlock is set to 1 due to the fact that the value passed as ins_len to btrfs_search_slot is 0). This resulted in btrfs_find_next_key(), called before btrfs_realloc_node(), to release out path and call again btrfs_search_slot(), but this time with the cow parameter set to 0, meaning the resulting path got only read locks. Therefore when we called btrfs_realloc_node(), with path->nodes[1] having a read lock, it resulted in the warning and BUG_ON when calling btrfs_set_lock_blocking() against the node, as that function expects the node to have a write lock. The second bug happened often when the first bug didn't happen, and made us hang and hitting the following warning at fs/btrfs/locking.c: 251 void btrfs_tree_lock(struct extent_buffer *eb) 252 { 253 WARN_ON(eb->lock_owner == current->pid); This happened because the tree search we made at btrfs_defrag_leaves() before calling btrfs_find_next_key() locked a leaf and all the other nodes in the path, so btrfs_find_next_key() had no need to release the path and make a new search (with path->lowest_level set to 1). This made btrfs_realloc_node() attempt to write lock the same leaf again, resulting in a hang/deadlock. So fix these issues by calling btrfs_find_next_key() after calling btrfs_realloc_node() and setting the search path's lowest_level to 1 to avoid the hang/deadlock when attempting to write lock the leaves at btrfs_realloc_node(). Signed-off-by: Filipe Manana diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index f31db43..cb65089 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -89,6 +89,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, goto out; } btrfs_release_path(path); + /* + * We don't need a lock on a leaf. btrfs_realloc_node() will lock all + * leafs from path->nodes[1], so set lowest_level to 1 to avoid later + * a deadlock (attempting to write lock an already write locked leaf). + */ + path->lowest_level = 1; wret = btrfs_search_slot(trans, root, &key, path, 0, 1); if (wret < 0) { @@ -99,9 +105,12 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, ret = 0; goto out; } - path->slots[1] = btrfs_header_nritems(path->nodes[1]); - next_key_ret = btrfs_find_next_key(root, path, &key, 1, - min_trans); + /* + * The node at level 1 must always be locked when our path has + * keep_locks set and lowest_level is 1, regardless of the value of + * path->slots[1]. + */ + BUG_ON(path->locks[1] == 0); ret = btrfs_realloc_node(trans, root, path->nodes[1], 0, &last_ret, @@ -110,6 +119,18 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, WARN_ON(ret == -EAGAIN); goto out; } + /* + * Now that we reallocated the node we can find the next key. Note that + * btrfs_find_next_key() can release our path and do another search + * without COWing, this is because even with path->keep_locks = 1, + * btrfs_search_slot() / ctree.c:unlock_up() does not keeps a lock on a + * node when path->slots[node_level - 1] does not point to the last + * item or a slot beyond the last item (ctree.c:unlock_up()). Therefore + * we search for the next key after reallocating our node. + */ + path->slots[1] = btrfs_header_nritems(path->nodes[1]); + next_key_ret = btrfs_find_next_key(root, path, &key, 1, + min_trans); if (next_key_ret == 0) { memcpy(&root->defrag_progress, &key, sizeof(key)); ret = -EAGAIN; -- cgit v0.10.2 From e44081ef611832b47a86abf4e36dc0ed2e950884 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 18 Dec 2015 03:02:48 +0000 Subject: Btrfs: fix unprotected list operations at btrfs_write_dirty_block_groups We call btrfs_write_dirty_block_groups() in the critical section of a transaction's commit, when no other tasks can join the transaction and add more block groups to the transaction's list of dirty block groups, so we not taking the dirty block groups spinlock when checking for the list's emptyness, grabbing its first element or deleting elements from it. However there's a special and rare case where we can have a concurrent task adding elements to this list. We trigger writeback for space caches before at btrfs_start_dirty_block_groups() and in past iterations of the loop at btrfs_write_dirty_block_groups(), this means that when the writeback finishes (which happens asynchronously) it creates a task for the endio free space work queue that executes btrfs_finish_ordered_io() - this function is able to join the transaction, through btrfs_join_transaction_nolock(), and update the free space cache's inode item in the root tree, which can result in COWing nodes of this tree and therefore allocation of a new block group can happen, which gets added to the transaction's list of dirty block groups while the transaction commit task is operating on it concurrently. So fix this by taking the dirty block groups spinlock before doing operations on the dirty block groups list at btrfs_write_dirty_block_groups(). Signed-off-by: Filipe Manana diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c4661db..e6993f6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3684,11 +3684,21 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, return -ENOMEM; /* - * We don't need the lock here since we are protected by the transaction - * commit. We want to do the cache_save_setup first and then run the + * Even though we are in the critical section of the transaction commit, + * we can still have concurrent tasks adding elements to this + * transaction's list of dirty block groups. These tasks correspond to + * endio free space workers started when writeback finishes for a + * space cache, which run inode.c:btrfs_finish_ordered_io(), and can + * allocate new block groups as a result of COWing nodes of the root + * tree when updating the free space inode. The writeback for the space + * caches is triggered by an earlier call to + * btrfs_start_dirty_block_groups() and iterations of the following + * loop. + * Also we want to do the cache_save_setup first and then run the * delayed refs to make sure we have the best chance at doing this all * in one shot. */ + spin_lock(&cur_trans->dirty_bgs_lock); while (!list_empty(&cur_trans->dirty_bgs)) { cache = list_first_entry(&cur_trans->dirty_bgs, struct btrfs_block_group_cache, @@ -3700,11 +3710,13 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, * finish and then do it all again */ if (!list_empty(&cache->io_list)) { + spin_unlock(&cur_trans->dirty_bgs_lock); list_del_init(&cache->io_list); btrfs_wait_cache_io(root, trans, cache, &cache->io_ctl, path, cache->key.objectid); btrfs_put_block_group(cache); + spin_lock(&cur_trans->dirty_bgs_lock); } /* @@ -3712,6 +3724,7 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, * on any pending IO */ list_del_init(&cache->dirty_list); + spin_unlock(&cur_trans->dirty_bgs_lock); should_put = 1; cache_save_setup(cache, trans, path); @@ -3743,7 +3756,9 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, /* if its not on the io list, we need to put the block group */ if (should_put) btrfs_put_block_group(cache); + spin_lock(&cur_trans->dirty_bgs_lock); } + spin_unlock(&cur_trans->dirty_bgs_lock); while (!list_empty(io)) { cache = list_first_entry(io, struct btrfs_block_group_cache, -- cgit v0.10.2 From 140e639f1a3ff052c3921818e2120fdfa4427681 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 23 Dec 2015 13:30:51 -0800 Subject: btrfs: fix warning on uninit variable in btrfs_finish_chunk_alloc map->num_stripes really can't be zero, but just in case. Signed-off-by: Chris Mason diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a37cc04..a114b7b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4794,7 +4794,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, u64 dev_offset; u64 stripe_size; int i = 0; - int ret; + int ret = 0; em_tree = &extent_root->fs_info->mapping_tree.map_tree; read_lock(&em_tree->lock); -- cgit v0.10.2 From b4570aa994b8fdb3a9c04ed80a6cac69072d4d42 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 30 Dec 2015 07:37:26 -0800 Subject: btrfs: fix compiling with CONFIG_BTRFS_DEBUG enabled. Merging in the free space tree deleted a variable needed when CONFIG_BTRFS_DEBUG=y Signed-off-by: Chris Mason diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0617cb7..83fc61d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -543,11 +543,13 @@ static noinline void caching_thread(struct btrfs_work *work) struct btrfs_block_group_cache *block_group; struct btrfs_fs_info *fs_info; struct btrfs_caching_control *caching_ctl; + struct btrfs_root *extent_root; int ret; caching_ctl = container_of(work, struct btrfs_caching_control, work); block_group = caching_ctl->block_group; fs_info = block_group->fs_info; + extent_root = fs_info->extent_root; mutex_lock(&caching_ctl->mutex); down_read(&fs_info->commit_root_sem); -- cgit v0.10.2 From 511711af91f21d80b27f18b569352d6896562828 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 30 Dec 2015 07:52:35 -0800 Subject: btrfs: don't run delayed references while we are creating the free space tree This is a short term solution to make sure btrfs_run_delayed_refs() doesn't change the extent tree while we are scanning it to create the free space tree. Longer term we need to synchronize scanning the block groups one by one, similar to what happens during a balance. Signed-off-by: Chris Mason diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d79ba05..9a88d0c 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1856,6 +1856,8 @@ struct btrfs_fs_info { * and will be latter freed. Protected by fs_info->chunk_mutex. */ struct list_head pinned_chunks; + + int creating_free_space_tree; }; struct btrfs_subvolume_writers { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 823c1ce..dc6b73a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3065,6 +3065,18 @@ retry_root_backup: if (sb->s_flags & MS_RDONLY) return 0; + if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) && + !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { + pr_info("BTRFS: creating free space tree\n"); + ret = btrfs_create_free_space_tree(fs_info); + if (ret) { + pr_warn("BTRFS: failed to create free space tree %d\n", + ret); + close_ctree(tree_root); + return ret; + } + } + down_read(&fs_info->cleanup_work_sem); if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) || (ret = btrfs_orphan_cleanup(fs_info->tree_root))) { @@ -3102,18 +3114,6 @@ retry_root_backup: } } - if (btrfs_test_opt(tree_root, FREE_SPACE_TREE) && - !btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { - pr_info("BTRFS: creating free space tree\n"); - ret = btrfs_create_free_space_tree(fs_info); - if (ret) { - pr_warn("BTRFS: failed to create free space tree %d\n", - ret); - close_ctree(tree_root); - return ret; - } - } - if (!fs_info->uuid_root) { pr_info("BTRFS: creating UUID tree\n"); ret = btrfs_create_uuid_tree(fs_info); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 83fc61d..add4af6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2926,6 +2926,9 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, if (trans->aborted) return 0; + if (root->fs_info->creating_free_space_tree) + return 0; + if (root == root->fs_info->extent_root) root = root->fs_info->tree_root; diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index cbe36dd..393e36b 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -1067,6 +1067,8 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, if (ret) goto out; + mutex_lock(&block_group->free_space_lock); + /* * Iterate through all of the extent and metadata items in this block * group, adding the free space between them and the free space at the @@ -1080,7 +1082,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); if (ret < 0) - goto out; + goto out_locked; ASSERT(ret == 0); start = block_group->key.objectid; @@ -1100,7 +1102,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, key.objectid - start); if (ret) - goto out; + goto out_locked; } start = key.objectid; if (key.type == BTRFS_METADATA_ITEM_KEY) @@ -1114,7 +1116,7 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, ret = btrfs_next_item(extent_root, path); if (ret < 0) - goto out; + goto out_locked; if (ret) break; } @@ -1122,10 +1124,12 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans, ret = __add_to_free_space_tree(trans, fs_info, block_group, path2, start, end - start); if (ret) - goto out; + goto out_locked; } ret = 0; +out_locked: + mutex_unlock(&block_group->free_space_lock); out: btrfs_free_path(path2); btrfs_free_path(path); @@ -1145,6 +1149,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) if (IS_ERR(trans)) return PTR_ERR(trans); + fs_info->creating_free_space_tree = 1; free_space_root = btrfs_create_tree(trans, fs_info, BTRFS_FREE_SPACE_TREE_OBJECTID); if (IS_ERR(free_space_root)) { @@ -1164,6 +1169,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) } btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); + fs_info->creating_free_space_tree = 0; ret = btrfs_commit_transaction(trans, tree_root); if (ret) @@ -1172,6 +1178,7 @@ int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) return 0; abort: + fs_info->creating_free_space_tree = 0; btrfs_abort_transaction(trans, tree_root, ret); btrfs_end_transaction(trans, tree_root); return ret; -- cgit v0.10.2 From 2bc0bb5fe71dee6fdaf81ff81f6e739ff9f7ff19 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 30 Dec 2015 02:42:30 +0000 Subject: Btrfs: fix race between free space endio workers and space cache writeout While running a stress test I ran into the following trace/transaction abort: [471626.672243] ------------[ cut here ]------------ [471626.673322] WARNING: CPU: 9 PID: 19107 at fs/btrfs/extent-tree.c:3740 btrfs_write_dirty_block_groups+0x17c/0x214 [btrfs]() [471626.675492] BTRFS: Transaction aborted (error -2) [471626.676748] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse parport_pc i2c_piix [471626.688802] CPU: 14 PID: 19107 Comm: fsstress Tainted: G W 4.3.0-rc5-btrfs-next-17+ #1 [471626.690148] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [471626.691901] 0000000000000000 ffff880016037cf0 ffffffff812566f4 ffff880016037d38 [471626.695009] ffff880016037d28 ffffffff8104d0a6 ffffffffa040c84e 00000000fffffffe [471626.697490] ffff88011fe855f8 ffff88000c484cb0 ffff88000d195000 ffff880016037d90 [471626.699201] Call Trace: [471626.699804] [] dump_stack+0x4e/0x79 [471626.701049] [] warn_slowpath_common+0x9f/0xb8 [471626.702542] [] ? btrfs_write_dirty_block_groups+0x17c/0x214 [btrfs] [471626.704326] [] warn_slowpath_fmt+0x48/0x50 [471626.705636] [] ? write_one_cache_group.isra.32+0x77/0x82 [btrfs] [471626.707048] [] btrfs_write_dirty_block_groups+0x17c/0x214 [btrfs] [471626.708616] [] commit_cowonly_roots+0x1d7/0x25a [btrfs] [471626.709950] [] btrfs_commit_transaction+0x4c4/0x991 [btrfs] [471626.711286] [] ? signal_pending_state+0x31/0x31 [471626.712611] [] btrfs_sync_fs+0x145/0x1ad [btrfs] [471626.715610] [] ? SyS_tee+0x226/0x226 [471626.716718] [] sync_fs_one_sb+0x20/0x22 [471626.717672] [] iterate_supers+0x75/0xc2 [471626.718800] [] sys_sync+0x52/0x80 [471626.719990] [] entry_SYSCALL_64_fastpath+0x12/0x6f [471626.721835] ---[ end trace baf57f43d76693f4 ]--- [471626.722954] BTRFS: error (device sdc) in btrfs_write_dirty_block_groups:3740: errno=-2 No such entry This is a very rare situation and it happened due to a race between a free space endio worker and writing the space caches for dirty block groups at a transaction's commit critical section. The steps leading to this are: 1) A task calls btrfs_commit_transaction() and starts the writeout of the space caches for all currently dirty block groups (i.e. it calls btrfs_start_dirty_block_groups()); 2) The previous step starts writeback for space caches; 3) When the writeback finishes it queues jobs for free space endio work queue (fs_info->endio_freespace_worker) that execute btrfs_finish_ordered_io(); 4) The task committing the transaction sets the transaction's state to TRANS_STATE_COMMIT_DOING and shortly after calls btrfs_write_dirty_block_groups(); 5) A free space endio job joins the transaction, through btrfs_join_transaction_nolock(), and updates a free space inode item in the root tree through btrfs_update_inode_fallback(); 6) Updating the free space inode item resulted in COWing one or more nodes/leaves of the root tree, and that resulted in creating a new metadata block group, which gets added to the transaction's list of dirty block groups (this is a very rare case); 7) The free space endio job has not released yet its transaction handle at this point, so the new metadata block group was not yet fully created (didn't go through btrfs_create_pending_block_groups() yet); 8) The transaction commit task sees the new metadata block group in the transaction's list of dirty block groups and processes it. When it attempts to update the block group's block group item in the extent tree, through write_one_cache_group(), it isn't able to find it and aborts the transaction with error -ENOENT - this is because the free space endio job hasn't yet released its transaction handle (which calls btrfs_create_pending_block_groups()) and therefore the block group item was not yet added to the extent tree. Fix this waiting for free space endio jobs if we fail to find a block group item in the extent tree and then retry once updating the block group item. Signed-off-by: Filipe Manana diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index add4af6..cf8983e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3768,6 +3768,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, } if (!ret) { ret = write_one_cache_group(trans, root, path, cache); + /* + * One of the free space endio workers might have + * created a new block group while updating a free space + * cache's inode (at inode.c:btrfs_finish_ordered_io()) + * and hasn't released its transaction handle yet, in + * which case the new block group is still attached to + * its transaction handle and its creation has not + * finished yet (no block group item in the extent tree + * yet, etc). If this is the case, wait for all free + * space endio workers to finish and retry. This is a + * a very rare case so no need for a more efficient and + * complex approach. + */ + if (ret == -ENOENT) { + wait_event(cur_trans->writer_wait, + atomic_read(&cur_trans->num_writers) == 1); + ret = write_one_cache_group(trans, root, path, + cache); + } if (ret) btrfs_abort_transaction(trans, root, ret); } -- cgit v0.10.2 From a879719b8c90e15c9e7fa7266d5e3c0ca962f9df Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 31 Dec 2015 18:07:59 +0000 Subject: Btrfs: send, don't BUG_ON() when an empty symlink is found When a symlink is successfully created it always has an inline extent containing the source path. However if an error happens when creating the symlink, we can leave in the subvolume's tree a symlink inode without any such inline extent item - this happens if after btrfs_symlink() calls btrfs_end_transaction() and before it calls the inode eviction handler (through the final iput() call), the transaction gets committed and a crash happens before the eviction handler gets called, or if a snapshot of the subvolume is made before the eviction handler gets called. Sadly we can't just avoid this by making btrfs_symlink() call btrfs_end_transaction() after it calls the eviction handler, because the later can commit the current transaction before it removes any items from the subvolume tree (if it encounters ENOSPC errors while reserving space for removing all the items). So make send fail more gracefully, with an -EIO error, and print a message to dmesg/syslog informing that there's an empty symlink inode, so that the user can delete the empty symlink or do something else about it. Reported-by: Stephen R. van den Berg Signed-off-by: Filipe Manana diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 355a458..63a6152 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1469,7 +1469,21 @@ static int read_symlink(struct btrfs_root *root, ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto out; - BUG_ON(ret); + if (ret) { + /* + * An empty symlink inode. Can happen in rare error paths when + * creating a symlink (transaction committed before the inode + * eviction handler removed the symlink inode items and a crash + * happened in between or the subvol was snapshoted in between). + * Print an informative message to dmesg/syslog so that the user + * can delete the symlink. + */ + btrfs_err(root->fs_info, + "Found empty symlink inode %llu at root %llu", + ino, root->root_key.objectid); + ret = -EIO; + goto out; + } ei = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_file_extent_item); -- cgit v0.10.2 From d50866d00fb39fcf72307001763ee9cc92625a43 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 31 Dec 2015 18:08:24 +0000 Subject: Btrfs: don't leave dangling dentry if symlink creation failed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we are creating a symlink we might fail with an error after we created its inode and added the corresponding directory indexes to its parent inode. In this case we end up never removing the directory indexes because the inode eviction handler, called for our symlink inode on the final iput(), only removes items associated with the symlink inode and not with the parent inode. Example: $ mkfs.btrfs -f /dev/sdi $ mount /dev/sdi /mnt $ touch /mnt/foo $ ln -s /mnt/foo /mnt/bar ln: failed to create symbolic link ‘bar’: Cannot allocate memory $ umount /mnt $ btrfsck /dev/sdi Checking filesystem on /dev/sdi UUID: d5acb5ba-31bd-42da-b456-89dca2e716e1 checking extents checking free space cache checking fs roots root 5 inode 258 errors 2001, no inode item, link count wrong unresolved ref dir 256 index 3 namelen 3 name bar filetype 7 errors 4, no inode ref found 131073 bytes used err is 1 total csum bytes: 0 total tree bytes: 131072 total fs tree bytes: 32768 total extent tree bytes: 16384 btree space waste bytes: 124305 file data blocks allocated: 262144 referenced 262144 btrfs-progs v4.2.3 So fix this by adding the directory index entries as the very last step of symlink creation. Signed-off-by: Filipe Manana diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index bdb0008..2ea2e0e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9693,10 +9693,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (err) goto out_unlock_inode; - err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); - if (err) - goto out_unlock_inode; - path = btrfs_alloc_path(); if (!path) { err = -ENOMEM; @@ -9733,6 +9729,13 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, inode_set_bytes(inode, name_len); btrfs_i_size_write(inode, name_len); err = btrfs_update_inode(trans, root, inode); + /* + * Last step, add directory indexes for our symlink inode. This is the + * last step to avoid extra cleanup of these indexes if an error happens + * elsewhere above. + */ + if (!err) + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) { drop_inode = 1; goto out_unlock_inode; -- cgit v0.10.2 From 9269d12b2d57d9e3d13036bb750762d1110d425c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 31 Dec 2015 18:16:29 +0000 Subject: Btrfs: fix number of transaction units required to create symlink We weren't accounting for the insertion of an inline extent item for the symlink inode nor that we need to update the parent inode item (through the call to btrfs_add_nondir()). So fix this by including two more transaction units. Signed-off-by: Filipe Manana diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2ea2e0e..5dbc07a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9660,9 +9660,11 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, /* * 2 items for inode item and ref * 2 items for dir items + * 1 item for updating parent inode item + * 1 item for the inline extent item * 1 item for xattr if selinux is on */ - trans = btrfs_start_transaction(root, 5); + trans = btrfs_start_transaction(root, 7); if (IS_ERR(trans)) return PTR_ERR(trans); -- cgit v0.10.2 From 271dba4521aed0c37c063548f876b49f5cd64b2e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 5 Jan 2016 16:24:05 +0000 Subject: Btrfs: fix transaction handle leak on failure to create hard link If we failed to create a hard link we were not always releasing the the transaction handle we got before, resulting in a memory leak and preventing any other tasks from being able to commit the current transaction. Fix this by always releasing our transaction handle. Signed-off-by: Filipe Manana Reviewed-by: Liu Bo diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5dbc07a..018c2a6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6488,7 +6488,7 @@ out_unlock_inode: static int btrfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { - struct btrfs_trans_handle *trans; + struct btrfs_trans_handle *trans = NULL; struct btrfs_root *root = BTRFS_I(dir)->root; struct inode *inode = d_inode(old_dentry); u64 index; @@ -6514,6 +6514,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, trans = btrfs_start_transaction(root, 5); if (IS_ERR(trans)) { err = PTR_ERR(trans); + trans = NULL; goto fail; } @@ -6547,9 +6548,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, btrfs_log_new_name(trans, inode, NULL, parent); } - btrfs_end_transaction(trans, root); btrfs_balance_delayed_items(root); fail: + if (trans) + btrfs_end_transaction(trans, root); if (drop_inode) { inode_dec_link_count(inode); iput(inode); -- cgit v0.10.2 From b2acdddfad13c38a1e8b927d83c3cf321f63601a Mon Sep 17 00:00:00 2001 From: Anand Jain Date: Wed, 7 Oct 2015 17:23:23 +0800 Subject: Btrfs: add missing brelse when superblock checksum fails Looks like oversight, call brelse() when checksum fails. Further down the code, in the non error path, we do call brelse() and so we don't see brelse() in the goto error paths. Signed-off-by: Anand Jain Reviewed-by: David Sterba Signed-off-by: David Sterba diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 974be09..216f977 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2668,6 +2668,7 @@ int open_ctree(struct super_block *sb, if (btrfs_check_super_csum(bh->b_data)) { printk(KERN_ERR "BTRFS: superblock checksum mismatch\n"); err = -EINVAL; + brelse(bh); goto fail_alloc; } -- cgit v0.10.2 From be7bd730841e69fe8f70120098596f648cd1f3ff Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 22 Oct 2015 15:05:09 -0400 Subject: Btrfs: igrab inode in writepage We hit this panic on a few of our boxes this week where we have an ordered_extent with an NULL inode. We do an igrab() of the inode in writepages, but weren't doing it in writepage which can be called directly from the VM on dirty pages. If the inode has been unlinked then we could have I_FREEING set which means igrab() would return NULL and we get this panic. Fix this by trying to igrab in btrfs_writepage, and if it returns NULL then just redirty the page and return AOP_WRITEPAGE_ACTIVATE; so the VM knows it wasn't successful. Thanks, Signed-off-by: Josef Bacik Reviewed-by: Liu Bo Signed-off-by: David Sterba diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a70c579..e8d9f9c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8534,15 +8534,28 @@ int btrfs_readpage(struct file *file, struct page *page) static int btrfs_writepage(struct page *page, struct writeback_control *wbc) { struct extent_io_tree *tree; - + struct inode *inode = page->mapping->host; + int ret; if (current->flags & PF_MEMALLOC) { redirty_page_for_writepage(wbc, page); unlock_page(page); return 0; } + + /* + * If we are under memory pressure we will call this directly from the + * VM, we need to make sure we have the inode referenced for the ordered + * extent. If not just return like we didn't do anything. + */ + if (!igrab(inode)) { + redirty_page_for_writepage(wbc, page); + return AOP_WRITEPAGE_ACTIVATE; + } tree = &BTRFS_I(page->mapping->host)->io_tree; - return extent_write_full_page(tree, page, btrfs_get_extent, wbc); + ret = extent_write_full_page(tree, page, btrfs_get_extent, wbc); + btrfs_add_delayed_iput(inode); + return ret; } static int btrfs_writepages(struct address_space *mapping, -- cgit v0.10.2 From c5ca87819d7f10625b0940b356adb8ac0f1080d7 Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Thu, 19 Nov 2015 17:26:22 +0800 Subject: btrfs: Support convert to -d dup for btrfs-convert Since we will add support for -d dup for non-mixed filesystem, kernel need to support converting to this raid-type. This patch remove limitation of above case. Tested by following script: (combination of dup conversion with fsck): export TEST_DEV='/dev/vdc' export TEST_DIR='/var/ltf/tester/mnt' do_dup_test() { local m_from="$1" local d_from="$2" local m_to="$3" local d_to="$4" echo "Convert from -m $m_from -d $d_from to -m $m_to -d $d_to" umount "$TEST_DIR" &>/dev/null ./mkfs.btrfs -f -m "$m_from" -d "$d_from" "$TEST_DEV" >/dev/null || return 1 mount "$TEST_DEV" "$TEST_DIR" || return 1 cp -a /sbin/* "$TEST_DIR" [[ "$m_from" != "$m_to" ]] && { ./btrfs balance start -f -mconvert="$m_to" "$TEST_DIR" || return 1 } [[ "$d_from" != "$d_to" ]] && { local opt=() [[ "$d_to" == single ]] && opt+=("-f") ./btrfs balance start "${opt[@]}" -dconvert="$d_to" "$TEST_DIR" || return 1 } umount "$TEST_DIR" || return 1 ./btrfsck "$TEST_DEV" || return 1 echo return 0 } test_all() { for m_from in single dup; do for d_from in single dup; do for m_to in single dup; do for d_to in single dup; do do_dup_test "$m_from" "$d_from" "$m_to" "$d_to" || return 1 done done done done } test_all Signed-off-by: Zhao Lei Reviewed-by: David Sterba Signed-off-by: David Sterba diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a23399e..33b0250 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3723,14 +3723,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl, goto out; } - /* allow dup'ed data chunks only in mixed mode */ - if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && - (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) { - btrfs_err(fs_info, "dup for data is not allowed"); - ret = -EINVAL; - goto out; - } - /* allow to reduce meta or sys integrity only if force set */ allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10 | -- cgit v0.10.2 From 8089fe62c6603860f6796ca80519b92391292f21 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 19 Nov 2015 14:15:51 +0100 Subject: btrfs: put delayed item hook into inode Inodes for delayed iput allocate a trivial helper structure, let's place the list hook directly into the inode and save a kmalloc (killing a __GFP_NOFAIL as a bonus) at the cost of increasing size of btrfs_inode. The inode can be put into the delayed_iputs list more than once and we have to keep the count. This means we can't use the list_splice to process a bunch of inodes because we'd lost track of the count if the inode is put into the delayed iputs again while it's processed. Signed-off-by: David Sterba diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 0ef5cc1..61205e3 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -192,6 +192,10 @@ struct btrfs_inode { /* File creation time. */ struct timespec i_otime; + /* Hook into fs_info->delayed_iputs */ + struct list_head delayed_iput; + long delayed_iput_count; + struct inode vfs_inode; }; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e8d9f9c..255d0f9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3106,55 +3106,47 @@ static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio, start, (size_t)(end - start + 1)); } -struct delayed_iput { - struct list_head list; - struct inode *inode; -}; - -/* JDM: If this is fs-wide, why can't we add a pointer to - * btrfs_inode instead and avoid the allocation? */ void btrfs_add_delayed_iput(struct inode *inode) { struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; - struct delayed_iput *delayed; + struct btrfs_inode *binode = BTRFS_I(inode); if (atomic_add_unless(&inode->i_count, -1, 1)) return; - delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL); - delayed->inode = inode; - spin_lock(&fs_info->delayed_iput_lock); - list_add_tail(&delayed->list, &fs_info->delayed_iputs); + if (binode->delayed_iput_count == 0) { + ASSERT(list_empty(&binode->delayed_iput)); + list_add_tail(&binode->delayed_iput, &fs_info->delayed_iputs); + } else { + binode->delayed_iput_count++; + } spin_unlock(&fs_info->delayed_iput_lock); } void btrfs_run_delayed_iputs(struct btrfs_root *root) { - LIST_HEAD(list); struct btrfs_fs_info *fs_info = root->fs_info; - struct delayed_iput *delayed; - int empty; - - spin_lock(&fs_info->delayed_iput_lock); - empty = list_empty(&fs_info->delayed_iputs); - spin_unlock(&fs_info->delayed_iput_lock); - if (empty) - return; down_read(&fs_info->delayed_iput_sem); - spin_lock(&fs_info->delayed_iput_lock); - list_splice_init(&fs_info->delayed_iputs, &list); - spin_unlock(&fs_info->delayed_iput_lock); - - while (!list_empty(&list)) { - delayed = list_entry(list.next, struct delayed_iput, list); - list_del(&delayed->list); - iput(delayed->inode); - kfree(delayed); + while (!list_empty(&fs_info->delayed_iputs)) { + struct btrfs_inode *inode; + + inode = list_first_entry(&fs_info->delayed_iputs, + struct btrfs_inode, delayed_iput); + if (inode->delayed_iput_count) { + inode->delayed_iput_count--; + list_move_tail(&inode->delayed_iput, + &fs_info->delayed_iputs); + } else { + list_del_init(&inode->delayed_iput); + } + spin_unlock(&fs_info->delayed_iput_lock); + iput(&inode->vfs_inode); + spin_lock(&fs_info->delayed_iput_lock); } - + spin_unlock(&fs_info->delayed_iput_lock); up_read(&root->fs_info->delayed_iput_sem); } @@ -9037,6 +9029,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->dir_index = 0; ei->last_unlink_trans = 0; ei->last_log_commit = 0; + ei->delayed_iput_count = 0; spin_lock_init(&ei->lock); ei->outstanding_extents = 0; @@ -9061,6 +9054,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) mutex_init(&ei->delalloc_mutex); btrfs_ordered_inode_tree_init(&ei->ordered_tree); INIT_LIST_HEAD(&ei->delalloc_inodes); + INIT_LIST_HEAD(&ei->delayed_iput); RB_CLEAR_NODE(&ei->rb_node); return inode; -- cgit v0.10.2 From 35b3ad50baa4a5fc2ae616c0513d2987bfb52a85 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 30 Nov 2015 16:51:29 +0100 Subject: btrfs: better packing of btrfs_delayed_extent_op btrfs_delayed_extent_op can be packed in a better way, it's 40 bytes now and has 8 unused bytes. Reducing the level type to u8 makes it possible to squeeze it to the padding byte after key. The bitfields were switched to bool as there's space to store the full byte without increasing the whole structure, besides that the generated assembly is smaller. struct btrfs_delayed_extent_op { struct btrfs_disk_key key; /* 0 17 */ u8 level; /* 17 1 */ bool update_key; /* 18 1 */ bool update_flags; /* 19 1 */ bool is_data; /* 20 1 */ /* XXX 3 bytes hole, try to pack */ u64 flags_to_set; /* 24 8 */ /* size: 32, cachelines: 1, members: 6 */ /* sum members: 29, holes: 1, sum holes: 3 */ /* last cacheline: 32 bytes */ }; The final size is 32 bytes which gives +26 object per slab page. text data bss dec hex filename 938811 43670 23144 1005625 f5839 fs/btrfs/btrfs.ko.before 938747 43670 23144 1005561 f57f9 fs/btrfs/btrfs.ko.after Signed-off-by: David Sterba diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index e06dd75..914ac13 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -493,12 +493,12 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, memcpy(&existing_ref->extent_op->key, &ref->extent_op->key, sizeof(ref->extent_op->key)); - existing_ref->extent_op->update_key = 1; + existing_ref->extent_op->update_key = true; } if (ref->extent_op->update_flags) { existing_ref->extent_op->flags_to_set |= ref->extent_op->flags_to_set; - existing_ref->extent_op->update_flags = 1; + existing_ref->extent_op->update_flags = true; } btrfs_free_delayed_extent_op(ref->extent_op); } diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 00ed02c..c24b653 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -75,11 +75,11 @@ struct btrfs_delayed_ref_node { struct btrfs_delayed_extent_op { struct btrfs_disk_key key; + u8 level; + bool update_key; + bool update_flags; + bool is_data; u64 flags_to_set; - int level; - unsigned int update_key:1; - unsigned int update_flags:1; - unsigned int is_data:1; }; /* diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c4661db..d796b53 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2988,9 +2988,9 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, return -ENOMEM; extent_op->flags_to_set = flags; - extent_op->update_flags = 1; - extent_op->update_key = 0; - extent_op->is_data = is_data ? 1 : 0; + extent_op->update_flags = true; + extent_op->update_key = false; + extent_op->is_data = is_data ? true : false; extent_op->level = level; ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, @@ -7980,12 +7980,9 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, else memset(&extent_op->key, 0, sizeof(extent_op->key)); extent_op->flags_to_set = flags; - if (skinny_metadata) - extent_op->update_key = 0; - else - extent_op->update_key = 1; - extent_op->update_flags = 1; - extent_op->is_data = 0; + extent_op->update_key = skinny_metadata ? false : true; + extent_op->update_flags = true; + extent_op->is_data = false; extent_op->level = level; ret = btrfs_add_delayed_tree_ref(root->fs_info, trans, diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h deleted file mode 100644 index e69de29..0000000 -- cgit v0.10.2 From f5cdedd73fa71b74dcc42f2a11a5735d89ce7c4f Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 30 Nov 2015 17:27:06 +0100 Subject: btrfs: handle invalid num_stripes in sys_array We can handle the special case of num_stripes == 0 directly inside btrfs_read_sys_array. The BUG_ON in btrfs_chunk_item_size is there to catch other unhandled cases where we fail to validate external data. A crafted or corrupted image crashes at mount time: BTRFS: device fsid 9006933e-2a9a-44f0-917f-514252aeec2c devid 1 transid 7 /dev/loop0 BTRFS info (device loop0): disk space caching is enabled BUG: failure at fs/btrfs/ctree.h:337/btrfs_chunk_item_size()! Kernel panic - not syncing: BUG! CPU: 0 PID: 313 Comm: mount Not tainted 4.2.5-00657-ge047887-dirty #25 Stack: 637af890 60062489 602aeb2e 604192ba 60387961 00000011 637af8a0 6038a835 637af9c0 6038776b 634ef32b 00000000 Call Trace: [<6001c86d>] show_stack+0xfe/0x15b [<6038a835>] dump_stack+0x2a/0x2c [<6038776b>] panic+0x13e/0x2b3 [<6020f099>] btrfs_read_sys_array+0x25d/0x2ff [<601cfbbe>] open_ctree+0x192d/0x27af [<6019c2c1>] btrfs_mount+0x8f5/0xb9a [<600bc9a7>] mount_fs+0x11/0xf3 [<600d5167>] vfs_kern_mount+0x75/0x11a [<6019bcb0>] btrfs_mount+0x2e4/0xb9a [<600bc9a7>] mount_fs+0x11/0xf3 [<600d5167>] vfs_kern_mount+0x75/0x11a [<600d710b>] do_mount+0xa35/0xbc9 [<600d7557>] SyS_mount+0x95/0xc8 [<6001e884>] handle_syscall+0x6b/0x8e Reported-by: Jiri Slaby Reported-by: Vegard Nossum CC: stable@vger.kernel.org # 3.19+ Signed-off-by: David Sterba diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 33b0250..dc6697c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6504,6 +6504,14 @@ int btrfs_read_sys_array(struct btrfs_root *root) goto out_short_read; num_stripes = btrfs_chunk_num_stripes(sb, chunk); + if (!num_stripes) { + printk(KERN_ERR + "BTRFS: invalid number of stripes %u in sys_array at offset %u\n", + num_stripes, cur_offset); + ret = -EIO; + break; + } + len = btrfs_chunk_item_size(num_stripes); if (cur_offset + len > array_size) goto out_short_read; -- cgit v0.10.2 From 93a3d46780b0f207f76608078eb965cf7b83902c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 30 Nov 2015 17:27:09 +0100 Subject: btrfs: verbose error when we find an unexpected item in sys_array Signed-off-by: David Sterba diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dc6697c..0577d7f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6520,6 +6520,9 @@ int btrfs_read_sys_array(struct btrfs_root *root) if (ret) break; } else { + printk(KERN_ERR + "BTRFS: unexpected item type %u in sys_array at offset %u\n", + (u32)key.type, cur_offset); ret = -EIO; break; } -- cgit v0.10.2 From 0de270fa83d5a45664e3f2428d432145037ac432 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 1 Dec 2015 18:09:12 +0100 Subject: btrfs: drop duplicate prefix from scrub workqueues The helper btrfs_alloc_workqueue will add the "btrfs-" prefix. Signed-off-by: David Sterba diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b091d94..1d1f639 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3735,27 +3735,27 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, if (fs_info->scrub_workers_refcnt == 0) { if (is_dev_replace) fs_info->scrub_workers = - btrfs_alloc_workqueue("btrfs-scrub", flags, + btrfs_alloc_workqueue("scrub", flags, 1, 4); else fs_info->scrub_workers = - btrfs_alloc_workqueue("btrfs-scrub", flags, + btrfs_alloc_workqueue("scrub", flags, max_active, 4); if (!fs_info->scrub_workers) goto fail_scrub_workers; fs_info->scrub_wr_completion_workers = - btrfs_alloc_workqueue("btrfs-scrubwrc", flags, + btrfs_alloc_workqueue("scrubwrc", flags, max_active, 2); if (!fs_info->scrub_wr_completion_workers) goto fail_scrub_wr_completion_workers; fs_info->scrub_nocow_workers = - btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); + btrfs_alloc_workqueue("scrubnc", flags, 1, 0); if (!fs_info->scrub_nocow_workers) goto fail_scrub_nocow_workers; fs_info->scrub_parity_workers = - btrfs_alloc_workqueue("btrfs-scrubparity", flags, + btrfs_alloc_workqueue("scrubparity", flags, max_active, 2); if (!fs_info->scrub_parity_workers) goto fail_scrub_parity_workers; -- cgit v0.10.2 From 100d57025cce6bf568a10660c0d884bcc64c580e Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 8 Dec 2015 14:39:32 +0100 Subject: btrfs: don't use slab cache for struct btrfs_delalloc_work Although we prefer to use separate caches for various structs, it seems better not to do that for struct btrfs_delalloc_work. Objects of this type are allocated rarely, when transaction commit calls btrfs_start_delalloc_roots, requesting delayed iputs. The objects are temporary (with some IO involved) but still allocated and freed within __start_delalloc_inodes. Memory allocation failure is handled. The slab cache is empty most of the time (observed on several systems), so if we need to allocate a new slab object, the first one has to allocate a full page. In a potential case of low memory conditions this might fail with higher probability compared to using the generic slab caches. Signed-off-by: David Sterba diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 255d0f9..0214a81 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -77,7 +77,6 @@ static const struct file_operations btrfs_dir_file_operations; static struct extent_io_ops btrfs_extent_io_ops; static struct kmem_cache *btrfs_inode_cachep; -static struct kmem_cache *btrfs_delalloc_work_cachep; struct kmem_cache *btrfs_trans_handle_cachep; struct kmem_cache *btrfs_transaction_cachep; struct kmem_cache *btrfs_path_cachep; @@ -9159,8 +9158,6 @@ void btrfs_destroy_cachep(void) kmem_cache_destroy(btrfs_path_cachep); if (btrfs_free_space_cachep) kmem_cache_destroy(btrfs_free_space_cachep); - if (btrfs_delalloc_work_cachep) - kmem_cache_destroy(btrfs_delalloc_work_cachep); } int btrfs_init_cachep(void) @@ -9195,13 +9192,6 @@ int btrfs_init_cachep(void) if (!btrfs_free_space_cachep) goto fail; - btrfs_delalloc_work_cachep = kmem_cache_create("btrfs_delalloc_work", - sizeof(struct btrfs_delalloc_work), 0, - SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, - NULL); - if (!btrfs_delalloc_work_cachep) - goto fail; - return 0; fail: btrfs_destroy_cachep(); @@ -9446,7 +9436,7 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, { struct btrfs_delalloc_work *work; - work = kmem_cache_zalloc(btrfs_delalloc_work_cachep, GFP_NOFS); + work = kmalloc(sizeof(*work), GFP_NOFS); if (!work) return NULL; @@ -9465,7 +9455,7 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work) { wait_for_completion(&work->completion); - kmem_cache_free(btrfs_delalloc_work_cachep, work); + kfree(work); } /* -- cgit v0.10.2 From e40da0e58a208940bf3d1745f9bd8cf1e27904d4 Mon Sep 17 00:00:00 2001 From: Byongho Lee Date: Tue, 19 May 2015 23:46:45 +0900 Subject: btrfs: remove unused inode argument from uncompress_inline() The inode argument is never used from the beginning, so remove it. Signed-off-by: Byongho Lee Reviewed-by: David Sterba Signed-off-by: David Sterba diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a70c579..1f72290 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6687,7 +6687,7 @@ static int merge_extent_mapping(struct extent_map_tree *em_tree, } static noinline int uncompress_inline(struct btrfs_path *path, - struct inode *inode, struct page *page, + struct page *page, size_t pg_offset, u64 extent_offset, struct btrfs_file_extent_item *item) { @@ -6883,8 +6883,7 @@ next: if (create == 0 && !PageUptodate(page)) { if (btrfs_file_extent_compression(leaf, item) != BTRFS_COMPRESS_NONE) { - ret = uncompress_inline(path, inode, page, - pg_offset, + ret = uncompress_inline(path, page, pg_offset, extent_offset, item); if (ret) { err = ret; -- cgit v0.10.2 From 9780c4976f5518f805b32fc00ed045b636fe8fa8 Mon Sep 17 00:00:00 2001 From: Alexandru Moise <00moses.alexander00@gmail.com> Date: Sun, 18 Oct 2015 21:35:41 +0000 Subject: btrfs: switch __btrfs_fs_incompat return type from int to bool Conform to __btrfs_fs_incompat() cast-to-bool (!!) by explicitly returning boolean not int. Signed-off-by: Alexandru Moise <00moses.alexander00@gmail.com> Signed-off-by: David Sterba diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 35489e7..eddc461 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -4250,7 +4250,7 @@ static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, #define btrfs_fs_incompat(fs_info, opt) \ __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt) -static inline int __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag) +static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag) { struct btrfs_super_block *disk_super; disk_super = fs_info->super_copy; -- cgit v0.10.2 From 575a75d6fabf6e1217204deca79aea26d6670a12 Mon Sep 17 00:00:00 2001 From: Alexandru Moise <00moses.alexander00@gmail.com> Date: Sun, 25 Oct 2015 19:35:44 +0000 Subject: btrfs: pass proper enum type to start_transaction() Signed-off-by: Alexandru Moise <00moses.alexander00@gmail.com> Reviewed-by: David Sterba Signed-off-by: David Sterba diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index be8eae8..fc82b02 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -634,17 +634,20 @@ struct btrfs_trans_handle *btrfs_start_transaction_lflush( struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) { - return start_transaction(root, 0, TRANS_JOIN, 0); + return start_transaction(root, 0, TRANS_JOIN, + BTRFS_RESERVE_NO_FLUSH); } struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) { - return start_transaction(root, 0, TRANS_JOIN_NOLOCK, 0); + return start_transaction(root, 0, TRANS_JOIN_NOLOCK, + BTRFS_RESERVE_NO_FLUSH); } struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) { - return start_transaction(root, 0, TRANS_USERSPACE, 0); + return start_transaction(root, 0, TRANS_USERSPACE, + BTRFS_RESERVE_NO_FLUSH); } /* @@ -662,7 +665,8 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root */ struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) { - return start_transaction(root, 0, TRANS_ATTACH, 0); + return start_transaction(root, 0, TRANS_ATTACH, + BTRFS_RESERVE_NO_FLUSH); } /* @@ -677,7 +681,8 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root) { struct btrfs_trans_handle *trans; - trans = start_transaction(root, 0, TRANS_ATTACH, 0); + trans = start_transaction(root, 0, TRANS_ATTACH, + BTRFS_RESERVE_NO_FLUSH); if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT) btrfs_wait_for_commit(root, 0); -- cgit v0.10.2 From 352dd9c8d3b34e406584c95be45cbc4f1ec5a405 Mon Sep 17 00:00:00 2001 From: Alexandru Moise <00moses.alexander00@gmail.com> Date: Sun, 25 Oct 2015 20:15:06 +0000 Subject: btrfs: zero out delayed node upon allocation It's slightly cleaner to zero-out the delayed node upon allocation than to do it by hand in btrfs_init_delayed_node() for a few members Signed-off-by: Alexandru Moise <00moses.alexander00@gmail.com> Signed-off-by: David Sterba diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index e0941fb..0be47e4 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -54,16 +54,11 @@ static inline void btrfs_init_delayed_node( delayed_node->root = root; delayed_node->inode_id = inode_id; atomic_set(&delayed_node->refs, 0); - delayed_node->count = 0; - delayed_node->flags = 0; delayed_node->ins_root = RB_ROOT; delayed_node->del_root = RB_ROOT; mutex_init(&delayed_node->mutex); - delayed_node->index_cnt = 0; INIT_LIST_HEAD(&delayed_node->n_list); INIT_LIST_HEAD(&delayed_node->p_list); - delayed_node->bytes_reserved = 0; - memset(&delayed_node->inode_item, 0, sizeof(delayed_node->inode_item)); } static inline int btrfs_is_continuous_delayed_item( @@ -132,7 +127,7 @@ again: if (node) return node; - node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS); + node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS); if (!node) return ERR_PTR(-ENOMEM); btrfs_init_delayed_node(node, root, ino); -- cgit v0.10.2 From 7928d672ffb06085d06fe3c27a93549d37e53ab3 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 30 Nov 2015 11:02:31 +0100 Subject: btrfs: cleanup, remove stray return statements Signed-off-by: David Sterba diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 974be09..c76f6ea 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3962,7 +3962,6 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root, balance_dirty_pages_ratelimited( root->fs_info->btree_inode->i_mapping); } - return; } void btrfs_btree_balance_dirty(struct btrfs_root *root) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1f72290..3b39cd8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5304,7 +5304,6 @@ void btrfs_evict_inode(struct inode *inode) no_delete: btrfs_remove_delayed_node(inode); clear_inode(inode); - return; } /* diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 8077461..d13128c 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -56,7 +56,6 @@ void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) atomic_dec(&eb->spinning_readers); read_unlock(&eb->lock); } - return; } /* @@ -96,7 +95,6 @@ void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) waitqueue_active(&eb->read_lock_wq)) wake_up(&eb->read_lock_wq); } - return; } /* diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 1a33d3e..6d70754 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -503,7 +503,6 @@ static void cache_rbio(struct btrfs_raid_bio *rbio) } spin_unlock_irqrestore(&table->cache_lock, flags); - return; } /* @@ -906,7 +905,6 @@ static void raid_write_end_io(struct bio *bio) err = -EIO; rbio_orig_end_io(rbio, err); - return; } /* diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index b091d94..b57a15a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1514,8 +1514,6 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info, if (sblock->no_io_error_seen) scrub_recheck_block_checksum(sblock); - - return; } static inline int scrub_check_fsid(u8 fsid[], diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a23399e..8c761ae 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1642,7 +1642,6 @@ static void update_dev_time(char *path_name) return; file_update_time(filp); filp_close(filp, NULL); - return; } static int btrfs_rm_dev_item(struct btrfs_root *root, -- cgit v0.10.2 From ee22184b53c823f6956314c2815d4068e3820737 Mon Sep 17 00:00:00 2001 From: Byongho Lee Date: Tue, 15 Dec 2015 01:42:10 +0900 Subject: Btrfs: use linux/sizes.h to represent constants We use many constants to represent size and offset value. And to make code readable we use '256 * 1024 * 1024' instead of '268435456' to represent '256MB'. However we can make far more readable with 'SZ_256MB' which is defined in the 'linux/sizes.h'. So this patch replaces 'xxx * 1024 * 1024' kind of expression with single 'SZ_xxxMB' if 'xxx' is a power of 2 then 'xxx * SZ_1M' if 'xxx' is not a power of 2. And I haven't touched to '4096' & '8192' because it's more intuitive than 'SZ_4KB' & 'SZ_8KB'. Signed-off-by: Byongho Lee Signed-off-by: David Sterba diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5b8e235..cb7720f 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1555,7 +1555,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } - search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1); + search_start = buf->start & ~((u64)SZ_1G - 1); if (parent) btrfs_set_lock_blocking(parent); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index eddc461..6202557 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -35,6 +35,7 @@ #include #include #include +#include #include "extent_io.h" #include "extent_map.h" #include "async-thread.h" @@ -196,9 +197,9 @@ static int btrfs_csum_sizes[] = { 4 }; /* ioprio of readahead is set to idle */ #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) -#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024) +#define BTRFS_DIRTY_METADATA_THRESH SZ_32M -#define BTRFS_MAX_EXTENT_SIZE (128 * 1024 * 1024) +#define BTRFS_MAX_EXTENT_SIZE SZ_128M /* * The key defines the order in the tree, and so it also defines (optimal) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c76f6ea..e8eebe7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2809,7 +2809,7 @@ int open_ctree(struct super_block *sb, fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, - 4 * 1024 * 1024 / PAGE_CACHE_SIZE); + SZ_4M / PAGE_CACHE_SIZE); tree_root->nodesize = nodesize; tree_root->sectorsize = sectorsize; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index adeb318..a407d1b 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -19,7 +19,7 @@ #ifndef __DISKIO__ #define __DISKIO__ -#define BTRFS_SUPER_INFO_OFFSET (64 * 1024) +#define BTRFS_SUPER_INFO_OFFSET SZ_64K #define BTRFS_SUPER_INFO_SIZE 4096 #define BTRFS_SUPER_MIRROR_MAX 3 @@ -35,7 +35,7 @@ enum btrfs_wq_endio_type { static inline u64 btrfs_sb_offset(int mirror) { - u64 start = 16 * 1024; + u64 start = SZ_16K; if (mirror) return start << (BTRFS_SUPER_MIRROR_SHIFT * mirror); return BTRFS_SUPER_INFO_OFFSET; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c4661db..1ea56d8 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -521,7 +521,7 @@ next: else last = key.objectid + key.offset; - if (total_found > (1024 * 1024 * 2)) { + if (total_found > SZ_2M) { total_found = 0; if (wakeup) wake_up(&caching_ctl->wait); @@ -3328,7 +3328,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, * If this block group is smaller than 100 megs don't bother caching the * block group. */ - if (block_group->key.offset < (100 * 1024 * 1024)) { + if (block_group->key.offset < (100 * SZ_1M)) { spin_lock(&block_group->lock); block_group->disk_cache_state = BTRFS_DC_WRITTEN; spin_unlock(&block_group->lock); @@ -3428,7 +3428,7 @@ again: * taking up quite a bit since it's not folded into the other space * cache. */ - num_pages = div_u64(block_group->key.offset, 256 * 1024 * 1024); + num_pages = div_u64(block_group->key.offset, SZ_256M); if (!num_pages) num_pages = 1; @@ -4239,14 +4239,13 @@ static int should_alloc_chunk(struct btrfs_root *root, */ if (force == CHUNK_ALLOC_LIMITED) { thresh = btrfs_super_total_bytes(root->fs_info->super_copy); - thresh = max_t(u64, 64 * 1024 * 1024, - div_factor_fine(thresh, 1)); + thresh = max_t(u64, SZ_64M, div_factor_fine(thresh, 1)); if (num_bytes - num_allocated < thresh) return 1; } - if (num_allocated + 2 * 1024 * 1024 < div_factor(num_bytes, 8)) + if (num_allocated + SZ_2M < div_factor(num_bytes, 8)) return 0; return 1; } @@ -4446,7 +4445,7 @@ out: * transaction. */ if (trans->can_flush_pending_bgs && - trans->chunk_bytes_reserved >= (2 * 1024 * 1024ull)) { + trans->chunk_bytes_reserved >= (u64)SZ_2M) { btrfs_create_pending_block_groups(trans, trans->root); btrfs_trans_release_chunk_metadata(trans); } @@ -4544,7 +4543,7 @@ static inline int calc_reclaim_items_nr(struct btrfs_root *root, u64 to_reclaim) return nr; } -#define EXTENT_SIZE_PER_ITEM (256 * 1024) +#define EXTENT_SIZE_PER_ITEM SZ_256K /* * shrink metadata reservation for delalloc @@ -4749,8 +4748,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, u64 expected; u64 to_reclaim; - to_reclaim = min_t(u64, num_online_cpus() * 1024 * 1024, - 16 * 1024 * 1024); + to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M); spin_lock(&space_info->lock); if (can_overcommit(root, space_info, to_reclaim, BTRFS_RESERVE_FLUSH_ALL)) { @@ -4761,8 +4759,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_root *root, used = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_pinned + space_info->bytes_readonly + space_info->bytes_may_use; - if (can_overcommit(root, space_info, 1024 * 1024, - BTRFS_RESERVE_FLUSH_ALL)) + if (can_overcommit(root, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL)) expected = div_factor_fine(space_info->total_bytes, 95); else expected = div_factor_fine(space_info->total_bytes, 90); @@ -5318,7 +5315,7 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) spin_lock(&sinfo->lock); spin_lock(&block_rsv->lock); - block_rsv->size = min_t(u64, num_bytes, 512 * 1024 * 1024); + block_rsv->size = min_t(u64, num_bytes, SZ_512M); num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + sinfo->bytes_reserved + sinfo->bytes_readonly + @@ -6222,11 +6219,11 @@ fetch_cluster_info(struct btrfs_root *root, struct btrfs_space_info *space_info, return ret; if (ssd) - *empty_cluster = 2 * 1024 * 1024; + *empty_cluster = SZ_2M; if (space_info->flags & BTRFS_BLOCK_GROUP_METADATA) { ret = &root->fs_info->meta_alloc_cluster; if (!ssd) - *empty_cluster = 64 * 1024; + *empty_cluster = SZ_64K; } else if ((space_info->flags & BTRFS_BLOCK_GROUP_DATA) && ssd) { ret = &root->fs_info->data_alloc_cluster; } @@ -9124,7 +9121,7 @@ static int inc_block_group_ro(struct btrfs_block_group_cache *cache, int force) if ((sinfo->flags & (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) && !force) - min_allocable_bytes = 1 * 1024 * 1024; + min_allocable_bytes = SZ_1M; else min_allocable_bytes = 0; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9abe187..43a5c5b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4387,7 +4387,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, u64 end = start + PAGE_CACHE_SIZE - 1; if (gfpflags_allow_blocking(mask) && - page->mapping->host->i_size > 16 * 1024 * 1024) { + page->mapping->host->i_size > SZ_16M) { u64 len; while (start <= end) { len = end - start + 1; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index cfe99be..1b2ff8b 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -30,7 +30,7 @@ #include "volumes.h" #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) -#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) +#define MAX_CACHE_BYTES_PER_GIG SZ_32K struct btrfs_trim_range { u64 start; @@ -1656,11 +1656,10 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) * at or below 32k, so we need to adjust how much memory we allow to be * used by extent based free space tracking */ - if (size < 1024 * 1024 * 1024) + if (size < SZ_1G) max_bytes = MAX_CACHE_BYTES_PER_GIG; else - max_bytes = MAX_CACHE_BYTES_PER_GIG * - div_u64(size, 1024 * 1024 * 1024); + max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); /* * we want to account for 1 more bitmap than what we have so we can make @@ -2489,8 +2488,7 @@ void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group) * track of free space, and if we pass 1/2 of that we want to * start converting things over to using bitmaps */ - ctl->extents_thresh = ((1024 * 32) / 2) / - sizeof(struct btrfs_free_space); + ctl->extents_thresh = (SZ_32K / 2) / sizeof(struct btrfs_free_space); } /* diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 767a605..e5eb40c 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -282,7 +282,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) } } -#define INIT_THRESHOLD (((1024 * 32) / 2) / sizeof(struct btrfs_free_space)) +#define INIT_THRESHOLD ((SZ_32K / 2) / sizeof(struct btrfs_free_space)) #define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8) /* diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3b39cd8..a379ff6 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -414,15 +414,15 @@ static noinline void compress_file_range(struct inode *inode, unsigned long nr_pages_ret = 0; unsigned long total_compressed = 0; unsigned long total_in = 0; - unsigned long max_compressed = 128 * 1024; - unsigned long max_uncompressed = 128 * 1024; + unsigned long max_compressed = SZ_128K; + unsigned long max_uncompressed = SZ_128K; int i; int will_compress; int compress_type = root->fs_info->compress_type; int redirty = 0; /* if this is a small write inside eof, kick off a defrag */ - if ((end - start + 1) < 16 * 1024 && + if ((end - start + 1) < SZ_16K && (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) btrfs_add_inode_defrag(NULL, inode); @@ -430,7 +430,7 @@ static noinline void compress_file_range(struct inode *inode, again: will_compress = 0; nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1; - nr_pages = min(nr_pages, (128 * 1024UL) / PAGE_CACHE_SIZE); + nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_CACHE_SIZE); /* * we don't want to send crud past the end of i_size through @@ -944,7 +944,7 @@ static noinline int cow_file_range(struct inode *inode, disk_num_bytes = num_bytes; /* if this is a small write inside eof, kick off defrag */ - if (num_bytes < 64 * 1024 && + if (num_bytes < SZ_64K && (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) btrfs_add_inode_defrag(NULL, inode); @@ -1107,7 +1107,7 @@ static noinline void async_cow_submit(struct btrfs_work *work) * atomic_sub_return implies a barrier for waitqueue_active */ if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) < - 5 * 1024 * 1024 && + 5 * SZ_1M && waitqueue_active(&root->fs_info->async_submit_wait)) wake_up(&root->fs_info->async_submit_wait); @@ -1132,7 +1132,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, struct btrfs_root *root = BTRFS_I(inode)->root; unsigned long nr_pages; u64 cur_end; - int limit = 10 * 1024 * 1024; + int limit = 10 * SZ_1M; clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); @@ -1148,7 +1148,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, !btrfs_test_opt(root, FORCE_COMPRESS)) cur_end = end; else - cur_end = min(end, start + 512 * 1024 - 1); + cur_end = min(end, start + SZ_512K - 1); async_cow->end = cur_end; INIT_LIST_HEAD(&async_cow->extents); @@ -4348,7 +4348,7 @@ search_again: * up a huge file in a single leaf. Most of the time that * bytes_deleted is > 0, it will be huge by the time we get here */ - if (be_nice && bytes_deleted > 32 * 1024 * 1024) { + if (be_nice && bytes_deleted > SZ_32M) { if (btrfs_should_end_transaction(trans, root)) { err = -EAGAIN; goto error; @@ -4591,7 +4591,7 @@ error: btrfs_free_path(path); - if (be_nice && bytes_deleted > 32 * 1024 * 1024) { + if (be_nice && bytes_deleted > SZ_32M) { unsigned long updates = trans->delayed_ref_updates; if (updates) { trans->delayed_ref_updates = 0; @@ -9757,7 +9757,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, } } - cur_bytes = min(num_bytes, 256ULL * 1024 * 1024); + cur_bytes = min_t(u64, num_bytes, SZ_256M); cur_bytes = max(cur_bytes, min_size); /* * If we are severely fragmented we could end up with really diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index da94138..4b9b928 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1016,7 +1016,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) ret = false; else if ((em->block_start + em->block_len == next->block_start) && - (em->block_len > 128 * 1024 && next->block_len > 128 * 1024)) + (em->block_len > SZ_128K && next->block_len > SZ_128K)) ret = false; free_extent_map(next); @@ -1262,9 +1262,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, int defrag_count = 0; int compress_type = BTRFS_COMPRESS_ZLIB; u32 extent_thresh = range->extent_thresh; - unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; + unsigned long max_cluster = SZ_256K >> PAGE_CACHE_SHIFT; unsigned long cluster = max_cluster; - u64 new_align = ~((u64)128 * 1024 - 1); + u64 new_align = ~((u64)SZ_128K - 1); struct page **pages = NULL; if (isize == 0) @@ -1281,7 +1281,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, } if (extent_thresh == 0) - extent_thresh = 256 * 1024; + extent_thresh = SZ_256K; /* * if we were not given a file, allocate a readahead @@ -1313,7 +1313,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, if (newer_than) { ret = find_new_extents(root, inode, newer_than, - &newer_off, 64 * 1024); + &newer_off, SZ_64K); if (!ret) { range->start = newer_off; /* @@ -1403,9 +1403,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, newer_off = max(newer_off + 1, (u64)i << PAGE_CACHE_SHIFT); - ret = find_new_extents(root, inode, - newer_than, &newer_off, - 64 * 1024); + ret = find_new_extents(root, inode, newer_than, + &newer_off, SZ_64K); if (!ret) { range->start = newer_off; i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; @@ -1571,7 +1570,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, new_size = old_size + new_size; } - if (new_size < 256 * 1024 * 1024) { + if (new_size < SZ_256M) { ret = -EINVAL; goto out_free; } @@ -2160,7 +2159,7 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file, struct inode *inode; int ret; size_t buf_size; - const size_t buf_limit = 16 * 1024 * 1024; + const size_t buf_limit = SZ_16M; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -3096,7 +3095,7 @@ out_unlock: return ret; } -#define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) +#define BTRFS_MAX_DEDUPE_LEN SZ_16M static long btrfs_ioctl_file_extent_same(struct file *file, struct btrfs_ioctl_same_args __user *argp) @@ -4524,7 +4523,7 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, goto out; } - size = min_t(u32, loi->size, 64 * 1024); + size = min_t(u32, loi->size, SZ_64K); inodes = init_data_container(size); if (IS_ERR(inodes)) { ret = PTR_ERR(inodes); diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 48d425a..02e0016 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h @@ -22,8 +22,8 @@ #define BTRFS_SEND_STREAM_MAGIC "btrfs-stream" #define BTRFS_SEND_STREAM_VERSION 1 -#define BTRFS_SEND_BUF_SIZE (1024 * 64) -#define BTRFS_SEND_READ_SIZE (1024 * 48) +#define BTRFS_SEND_BUF_SIZE SZ_64K +#define BTRFS_SEND_READ_SIZE (48 * SZ_1K) enum btrfs_tlv_type { BTRFS_TLV_U8, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 24154e4..0b6ab61 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1865,7 +1865,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) * btrfs starts at an offset of at least 1MB when doing chunk * allocation. */ - skip_space = 1024 * 1024; + skip_space = SZ_1M; /* user can set the offset in fs_info->alloc_start. */ if (fs_info->alloc_start && diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c index 9e9f236..6ec2bce 100644 --- a/fs/btrfs/tests/extent-io-tests.c +++ b/fs/btrfs/tests/extent-io-tests.c @@ -18,6 +18,7 @@ #include #include +#include #include "btrfs-tests.h" #include "../extent_io.h" @@ -70,8 +71,8 @@ static int test_find_delalloc(void) struct page *page; struct page *locked_page = NULL; unsigned long index = 0; - u64 total_dirty = 256 * 1024 * 1024; - u64 max_bytes = 128 * 1024 * 1024; + u64 total_dirty = SZ_256M; + u64 max_bytes = SZ_128M; u64 start, end, test_start; u64 found; int ret = -EINVAL; @@ -133,7 +134,7 @@ static int test_find_delalloc(void) * |--- delalloc ---| * |--- search ---| */ - test_start = 64 * 1024 * 1024; + test_start = SZ_64M; locked_page = find_lock_page(inode->i_mapping, test_start >> PAGE_CACHE_SHIFT); if (!locked_page) { @@ -220,8 +221,8 @@ static int test_find_delalloc(void) * Now to test where we run into a page that is no longer dirty in the * range we want to find. */ - page = find_get_page(inode->i_mapping, (max_bytes + (1 * 1024 * 1024)) - >> PAGE_CACHE_SHIFT); + page = find_get_page(inode->i_mapping, + (max_bytes + SZ_1M) >> PAGE_CACHE_SHIFT); if (!page) { test_msg("Couldn't find our page\n"); goto out_bits; diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 8b72b00..cc287d4 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -44,7 +44,7 @@ static struct btrfs_block_group_cache *init_test_block_group(void) } cache->key.objectid = 0; - cache->key.offset = 1024 * 1024 * 1024; + cache->key.offset = SZ_1G; cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; cache->sectorsize = 4096; cache->full_stripe_len = 4096; @@ -71,59 +71,59 @@ static int test_extents(struct btrfs_block_group_cache *cache) test_msg("Running extent only tests\n"); /* First just make sure we can remove an entire entry */ - ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); + ret = btrfs_add_free_space(cache, 0, SZ_4M); if (ret) { test_msg("Error adding initial extents %d\n", ret); return ret; } - ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); + ret = btrfs_remove_free_space(cache, 0, SZ_4M); if (ret) { test_msg("Error removing extent %d\n", ret); return ret; } - if (test_check_exists(cache, 0, 4 * 1024 * 1024)) { + if (test_check_exists(cache, 0, SZ_4M)) { test_msg("Full remove left some lingering space\n"); return -1; } /* Ok edge and middle cases now */ - ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024); + ret = btrfs_add_free_space(cache, 0, SZ_4M); if (ret) { test_msg("Error adding half extent %d\n", ret); return ret; } - ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024); + ret = btrfs_remove_free_space(cache, 3 * SZ_1M, SZ_1M); if (ret) { test_msg("Error removing tail end %d\n", ret); return ret; } - ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); + ret = btrfs_remove_free_space(cache, 0, SZ_1M); if (ret) { test_msg("Error removing front end %d\n", ret); return ret; } - ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096); + ret = btrfs_remove_free_space(cache, SZ_2M, 4096); if (ret) { test_msg("Error removing middle piece %d\n", ret); return ret; } - if (test_check_exists(cache, 0, 1 * 1024 * 1024)) { + if (test_check_exists(cache, 0, SZ_1M)) { test_msg("Still have space at the front\n"); return -1; } - if (test_check_exists(cache, 2 * 1024 * 1024, 4096)) { + if (test_check_exists(cache, SZ_2M, 4096)) { test_msg("Still have space in the middle\n"); return -1; } - if (test_check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) { + if (test_check_exists(cache, 3 * SZ_1M, SZ_1M)) { test_msg("Still have space at the end\n"); return -1; } @@ -141,30 +141,30 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) test_msg("Running bitmap only tests\n"); - ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); + ret = test_add_free_space_entry(cache, 0, SZ_4M, 1); if (ret) { test_msg("Couldn't create a bitmap entry %d\n", ret); return ret; } - ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024); + ret = btrfs_remove_free_space(cache, 0, SZ_4M); if (ret) { test_msg("Error removing bitmap full range %d\n", ret); return ret; } - if (test_check_exists(cache, 0, 4 * 1024 * 1024)) { + if (test_check_exists(cache, 0, SZ_4M)) { test_msg("Left some space in bitmap\n"); return -1; } - ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1); + ret = test_add_free_space_entry(cache, 0, SZ_4M, 1); if (ret) { test_msg("Couldn't add to our bitmap entry %d\n", ret); return ret; } - ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024); + ret = btrfs_remove_free_space(cache, SZ_1M, SZ_2M); if (ret) { test_msg("Couldn't remove middle chunk %d\n", ret); return ret; @@ -177,23 +177,21 @@ static int test_bitmaps(struct btrfs_block_group_cache *cache) next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096); /* Test a bit straddling two bitmaps */ - ret = test_add_free_space_entry(cache, next_bitmap_offset - - (2 * 1024 * 1024), 4 * 1024 * 1024, 1); + ret = test_add_free_space_entry(cache, next_bitmap_offset - SZ_2M, + SZ_4M, 1); if (ret) { test_msg("Couldn't add space that straddles two bitmaps %d\n", ret); return ret; } - ret = btrfs_remove_free_space(cache, next_bitmap_offset - - (1 * 1024 * 1024), 2 * 1024 * 1024); + ret = btrfs_remove_free_space(cache, next_bitmap_offset - SZ_1M, SZ_2M); if (ret) { test_msg("Couldn't remove overlapping space %d\n", ret); return ret; } - if (test_check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024), - 2 * 1024 * 1024)) { + if (test_check_exists(cache, next_bitmap_offset - SZ_1M, SZ_2M)) { test_msg("Left some space when removing overlapping\n"); return -1; } @@ -216,43 +214,43 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) * bitmap, but the free space completely in the extent and then * completely in the bitmap. */ - ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1); + ret = test_add_free_space_entry(cache, SZ_4M, SZ_1M, 1); if (ret) { test_msg("Couldn't create bitmap entry %d\n", ret); return ret; } - ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); + ret = test_add_free_space_entry(cache, 0, SZ_1M, 0); if (ret) { test_msg("Couldn't add extent entry %d\n", ret); return ret; } - ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024); + ret = btrfs_remove_free_space(cache, 0, SZ_1M); if (ret) { test_msg("Couldn't remove extent entry %d\n", ret); return ret; } - if (test_check_exists(cache, 0, 1 * 1024 * 1024)) { + if (test_check_exists(cache, 0, SZ_1M)) { test_msg("Left remnants after our remove\n"); return -1; } /* Now to add back the extent entry and remove from the bitmap */ - ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0); + ret = test_add_free_space_entry(cache, 0, SZ_1M, 0); if (ret) { test_msg("Couldn't re-add extent entry %d\n", ret); return ret; } - ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024); + ret = btrfs_remove_free_space(cache, SZ_4M, SZ_1M); if (ret) { test_msg("Couldn't remove from bitmap %d\n", ret); return ret; } - if (test_check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) { + if (test_check_exists(cache, SZ_4M, SZ_1M)) { test_msg("Left remnants in the bitmap\n"); return -1; } @@ -261,19 +259,19 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) * Ok so a little more evil, extent entry and bitmap at the same offset, * removing an overlapping chunk. */ - ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1); + ret = test_add_free_space_entry(cache, SZ_1M, SZ_4M, 1); if (ret) { test_msg("Couldn't add to a bitmap %d\n", ret); return ret; } - ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024); + ret = btrfs_remove_free_space(cache, SZ_512K, 3 * SZ_1M); if (ret) { test_msg("Couldn't remove overlapping space %d\n", ret); return ret; } - if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) { + if (test_check_exists(cache, SZ_512K, 3 * SZ_1M)) { test_msg("Left over pieces after removing overlapping\n"); return -1; } @@ -281,25 +279,25 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) __btrfs_remove_free_space_cache(cache->free_space_ctl); /* Now with the extent entry offset into the bitmap */ - ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1); + ret = test_add_free_space_entry(cache, SZ_4M, SZ_4M, 1); if (ret) { test_msg("Couldn't add space to the bitmap %d\n", ret); return ret; } - ret = test_add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0); + ret = test_add_free_space_entry(cache, SZ_2M, SZ_2M, 0); if (ret) { test_msg("Couldn't add extent to the cache %d\n", ret); return ret; } - ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024); + ret = btrfs_remove_free_space(cache, 3 * SZ_1M, SZ_4M); if (ret) { test_msg("Problem removing overlapping space %d\n", ret); return ret; } - if (test_check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) { + if (test_check_exists(cache, 3 * SZ_1M, SZ_4M)) { test_msg("Left something behind when removing space"); return -1; } @@ -315,29 +313,26 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) * [ del ] */ __btrfs_remove_free_space_cache(cache->free_space_ctl); - ret = test_add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024, - 4 * 1024 * 1024, 1); + ret = test_add_free_space_entry(cache, bitmap_offset + SZ_4M, SZ_4M, 1); if (ret) { test_msg("Couldn't add bitmap %d\n", ret); return ret; } - ret = test_add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024, - 5 * 1024 * 1024, 0); + ret = test_add_free_space_entry(cache, bitmap_offset - SZ_1M, + 5 * SZ_1M, 0); if (ret) { test_msg("Couldn't add extent entry %d\n", ret); return ret; } - ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024, - 5 * 1024 * 1024); + ret = btrfs_remove_free_space(cache, bitmap_offset + SZ_1M, 5 * SZ_1M); if (ret) { test_msg("Failed to free our space %d\n", ret); return ret; } - if (test_check_exists(cache, bitmap_offset + 1 * 1024 * 1024, - 5 * 1024 * 1024)) { + if (test_check_exists(cache, bitmap_offset + SZ_1M, 5 * SZ_1M)) { test_msg("Left stuff over\n"); return -1; } @@ -350,19 +345,19 @@ static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache) * to return -EAGAIN back from btrfs_remove_extent, make sure this * doesn't happen. */ - ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1); + ret = test_add_free_space_entry(cache, SZ_1M, SZ_2M, 1); if (ret) { test_msg("Couldn't add bitmap entry %d\n", ret); return ret; } - ret = test_add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0); + ret = test_add_free_space_entry(cache, 3 * SZ_1M, SZ_1M, 0); if (ret) { test_msg("Couldn't add extent entry %d\n", ret); return ret; } - ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024); + ret = btrfs_remove_free_space(cache, SZ_1M, 3 * SZ_1M); if (ret) { test_msg("Error removing bitmap and extent overlapping %d\n", ret); return ret; @@ -475,16 +470,15 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) /* * Extent entry covering free space range [128Mb - 256Kb, 128Mb - 128Kb[ */ - ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 - 256 * 1024, - 128 * 1024, 0); + ret = test_add_free_space_entry(cache, SZ_128M - SZ_256K, SZ_128K, 0); if (ret) { test_msg("Couldn't add extent entry %d\n", ret); return ret; } /* Bitmap entry covering free space range [128Mb + 512Kb, 256Mb[ */ - ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 + 512 * 1024, - 128 * 1024 * 1024 - 512 * 1024, 1); + ret = test_add_free_space_entry(cache, SZ_128M + SZ_512K, + SZ_128M - SZ_512K, 1); if (ret) { test_msg("Couldn't add bitmap entry %d\n", ret); return ret; @@ -502,21 +496,19 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * [128Mb + 512Kb, 128Mb + 768Kb[ */ ret = btrfs_remove_free_space(cache, - 128 * 1024 * 1024 + 768 * 1024, - 128 * 1024 * 1024 - 768 * 1024); + SZ_128M + 768 * SZ_1K, + SZ_128M - 768 * SZ_1K); if (ret) { test_msg("Failed to free part of bitmap space %d\n", ret); return ret; } /* Confirm that only those 2 ranges are marked as free. */ - if (!test_check_exists(cache, 128 * 1024 * 1024 - 256 * 1024, - 128 * 1024)) { + if (!test_check_exists(cache, SZ_128M - SZ_256K, SZ_128K)) { test_msg("Free space range missing\n"); return -ENOENT; } - if (!test_check_exists(cache, 128 * 1024 * 1024 + 512 * 1024, - 256 * 1024)) { + if (!test_check_exists(cache, SZ_128M + SZ_512K, SZ_256K)) { test_msg("Free space range missing\n"); return -ENOENT; } @@ -525,8 +517,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * Confirm that the bitmap range [128Mb + 768Kb, 256Mb[ isn't marked * as free anymore. */ - if (test_check_exists(cache, 128 * 1024 * 1024 + 768 * 1024, - 128 * 1024 * 1024 - 768 * 1024)) { + if (test_check_exists(cache, SZ_128M + 768 * SZ_1K, + SZ_128M - 768 * SZ_1K)) { test_msg("Bitmap region not removed from space cache\n"); return -EINVAL; } @@ -535,8 +527,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * Confirm that the region [128Mb + 256Kb, 128Mb + 512Kb[, which is * covered by the bitmap, isn't marked as free. */ - if (test_check_exists(cache, 128 * 1024 * 1024 + 256 * 1024, - 256 * 1024)) { + if (test_check_exists(cache, SZ_128M + SZ_256K, SZ_256K)) { test_msg("Invalid bitmap region marked as free\n"); return -EINVAL; } @@ -545,8 +536,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * Confirm that the region [128Mb, 128Mb + 256Kb[, which is covered * by the bitmap too, isn't marked as free either. */ - if (test_check_exists(cache, 128 * 1024 * 1024, - 256 * 1024)) { + if (test_check_exists(cache, SZ_128M, SZ_256K)) { test_msg("Invalid bitmap region marked as free\n"); return -EINVAL; } @@ -556,13 +546,13 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * lets make sure the free space cache marks it as free in the bitmap, * and doesn't insert a new extent entry to represent this region. */ - ret = btrfs_add_free_space(cache, 128 * 1024 * 1024, 512 * 1024); + ret = btrfs_add_free_space(cache, SZ_128M, SZ_512K); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; } /* Confirm the region is marked as free. */ - if (!test_check_exists(cache, 128 * 1024 * 1024, 512 * 1024)) { + if (!test_check_exists(cache, SZ_128M, SZ_512K)) { test_msg("Bitmap region not marked as free\n"); return -ENOENT; } @@ -581,8 +571,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * The goal is to test that the bitmap entry space stealing doesn't * steal this space region. */ - ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 + 16 * 1024 * 1024, - 4096); + ret = btrfs_add_free_space(cache, SZ_128M + SZ_16M, 4096); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; @@ -601,15 +590,13 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * expand the range covered by the existing extent entry that represents * the free space [128Mb - 256Kb, 128Mb - 128Kb[. */ - ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 - 128 * 1024, - 128 * 1024); + ret = btrfs_add_free_space(cache, SZ_128M - SZ_128K, SZ_128K); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; } /* Confirm the region is marked as free. */ - if (!test_check_exists(cache, 128 * 1024 * 1024 - 128 * 1024, - 128 * 1024)) { + if (!test_check_exists(cache, SZ_128M - SZ_128K, SZ_128K)) { test_msg("Extent region not marked as free\n"); return -ENOENT; } @@ -637,21 +624,20 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * that represents the 1Mb free space, and therefore we're able to * allocate the whole free space at once. */ - if (!test_check_exists(cache, 128 * 1024 * 1024 - 256 * 1024, - 1 * 1024 * 1024)) { + if (!test_check_exists(cache, SZ_128M - SZ_256K, SZ_1M)) { test_msg("Expected region not marked as free\n"); return -ENOENT; } - if (cache->free_space_ctl->free_space != (1 * 1024 * 1024 + 4096)) { + if (cache->free_space_ctl->free_space != (SZ_1M + 4096)) { test_msg("Cache free space is not 1Mb + 4Kb\n"); return -EINVAL; } offset = btrfs_find_space_for_alloc(cache, - 0, 1 * 1024 * 1024, 0, + 0, SZ_1M, 0, &max_extent_size); - if (offset != (128 * 1024 * 1024 - 256 * 1024)) { + if (offset != (SZ_128M - SZ_256K)) { test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n", offset); return -EINVAL; @@ -670,7 +656,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) offset = btrfs_find_space_for_alloc(cache, 0, 4096, 0, &max_extent_size); - if (offset != (128 * 1024 * 1024 + 16 * 1024 * 1024)) { + if (offset != (SZ_128M + SZ_16M)) { test_msg("Failed to allocate 4Kb from space cache, returned offset is: %llu\n", offset); return -EINVAL; @@ -691,16 +677,14 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) /* * Extent entry covering free space range [128Mb + 128Kb, 128Mb + 256Kb[ */ - ret = test_add_free_space_entry(cache, 128 * 1024 * 1024 + 128 * 1024, - 128 * 1024, 0); + ret = test_add_free_space_entry(cache, SZ_128M + SZ_128K, SZ_128K, 0); if (ret) { test_msg("Couldn't add extent entry %d\n", ret); return ret; } /* Bitmap entry covering free space range [0, 128Mb - 512Kb[ */ - ret = test_add_free_space_entry(cache, 0, - 128 * 1024 * 1024 - 512 * 1024, 1); + ret = test_add_free_space_entry(cache, 0, SZ_128M - SZ_512K, 1); if (ret) { test_msg("Couldn't add bitmap entry %d\n", ret); return ret; @@ -717,22 +701,18 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * [128Mb + 128b, 128Mb + 256Kb[ * [128Mb - 768Kb, 128Mb - 512Kb[ */ - ret = btrfs_remove_free_space(cache, - 0, - 128 * 1024 * 1024 - 768 * 1024); + ret = btrfs_remove_free_space(cache, 0, SZ_128M - 768 * SZ_1K); if (ret) { test_msg("Failed to free part of bitmap space %d\n", ret); return ret; } /* Confirm that only those 2 ranges are marked as free. */ - if (!test_check_exists(cache, 128 * 1024 * 1024 + 128 * 1024, - 128 * 1024)) { + if (!test_check_exists(cache, SZ_128M + SZ_128K, SZ_128K)) { test_msg("Free space range missing\n"); return -ENOENT; } - if (!test_check_exists(cache, 128 * 1024 * 1024 - 768 * 1024, - 256 * 1024)) { + if (!test_check_exists(cache, SZ_128M - 768 * SZ_1K, SZ_256K)) { test_msg("Free space range missing\n"); return -ENOENT; } @@ -741,8 +721,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * Confirm that the bitmap range [0, 128Mb - 768Kb[ isn't marked * as free anymore. */ - if (test_check_exists(cache, 0, - 128 * 1024 * 1024 - 768 * 1024)) { + if (test_check_exists(cache, 0, SZ_128M - 768 * SZ_1K)) { test_msg("Bitmap region not removed from space cache\n"); return -EINVAL; } @@ -751,8 +730,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * Confirm that the region [128Mb - 512Kb, 128Mb[, which is * covered by the bitmap, isn't marked as free. */ - if (test_check_exists(cache, 128 * 1024 * 1024 - 512 * 1024, - 512 * 1024)) { + if (test_check_exists(cache, SZ_128M - SZ_512K, SZ_512K)) { test_msg("Invalid bitmap region marked as free\n"); return -EINVAL; } @@ -762,15 +740,13 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * lets make sure the free space cache marks it as free in the bitmap, * and doesn't insert a new extent entry to represent this region. */ - ret = btrfs_add_free_space(cache, 128 * 1024 * 1024 - 512 * 1024, - 512 * 1024); + ret = btrfs_add_free_space(cache, SZ_128M - SZ_512K, SZ_512K); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; } /* Confirm the region is marked as free. */ - if (!test_check_exists(cache, 128 * 1024 * 1024 - 512 * 1024, - 512 * 1024)) { + if (!test_check_exists(cache, SZ_128M - SZ_512K, SZ_512K)) { test_msg("Bitmap region not marked as free\n"); return -ENOENT; } @@ -789,7 +765,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * The goal is to test that the bitmap entry space stealing doesn't * steal this space region. */ - ret = btrfs_add_free_space(cache, 32 * 1024 * 1024, 8192); + ret = btrfs_add_free_space(cache, SZ_32M, 8192); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; @@ -800,13 +776,13 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * expand the range covered by the existing extent entry that represents * the free space [128Mb + 128Kb, 128Mb + 256Kb[. */ - ret = btrfs_add_free_space(cache, 128 * 1024 * 1024, 128 * 1024); + ret = btrfs_add_free_space(cache, SZ_128M, SZ_128K); if (ret) { test_msg("Error adding free space: %d\n", ret); return ret; } /* Confirm the region is marked as free. */ - if (!test_check_exists(cache, 128 * 1024 * 1024, 128 * 1024)) { + if (!test_check_exists(cache, SZ_128M, SZ_128K)) { test_msg("Extent region not marked as free\n"); return -ENOENT; } @@ -834,21 +810,19 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * that represents the 1Mb free space, and therefore we're able to * allocate the whole free space at once. */ - if (!test_check_exists(cache, 128 * 1024 * 1024 - 768 * 1024, - 1 * 1024 * 1024)) { + if (!test_check_exists(cache, SZ_128M - 768 * SZ_1K, SZ_1M)) { test_msg("Expected region not marked as free\n"); return -ENOENT; } - if (cache->free_space_ctl->free_space != (1 * 1024 * 1024 + 8192)) { + if (cache->free_space_ctl->free_space != (SZ_1M + 8192)) { test_msg("Cache free space is not 1Mb + 8Kb\n"); return -EINVAL; } - offset = btrfs_find_space_for_alloc(cache, - 0, 1 * 1024 * 1024, 0, + offset = btrfs_find_space_for_alloc(cache, 0, SZ_1M, 0, &max_extent_size); - if (offset != (128 * 1024 * 1024 - 768 * 1024)) { + if (offset != (SZ_128M - 768 * SZ_1K)) { test_msg("Failed to allocate 1Mb from space cache, returned offset is: %llu\n", offset); return -EINVAL; @@ -867,7 +841,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) offset = btrfs_find_space_for_alloc(cache, 0, 8192, 0, &max_extent_size); - if (offset != (32 * 1024 * 1024)) { + if (offset != SZ_32M) { test_msg("Failed to allocate 8Kb from space cache, returned offset is: %llu\n", offset); return -EINVAL; diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 054fc0d..5de55fd 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -100,7 +100,7 @@ static void insert_inode_item_key(struct btrfs_root *root) static void setup_file_extents(struct btrfs_root *root) { int slot = 0; - u64 disk_bytenr = 1 * 1024 * 1024; + u64 disk_bytenr = SZ_1M; u64 offset = 0; /* First we want a hole */ diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8c761ae..adb79ad 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1406,7 +1406,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, * we don't want to overwrite the superblock on the drive, * so we make sure to start at an offset of at least 1MB */ - search_start = max(root->fs_info->alloc_start, 1024ull * 1024); + search_start = max_t(u64, root->fs_info->alloc_start, SZ_1M); return find_free_dev_extent_start(trans->transaction, device, num_bytes, search_start, start, len); } @@ -3405,7 +3405,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) list_for_each_entry(device, devices, dev_list) { old_size = btrfs_device_get_total_bytes(device); size_to_free = div_factor(old_size, 1); - size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); + size_to_free = min_t(u64, size_to_free, SZ_1M); if (!device->writeable || btrfs_device_get_total_bytes(device) - btrfs_device_get_bytes_used(device) > size_to_free || @@ -4459,7 +4459,7 @@ static int btrfs_cmp_device_info(const void *a, const void *b) static u32 find_raid56_stripe_len(u32 data_devices, u32 dev_stripe_target) { /* TODO allow them to set a preferred stripe size */ - return 64 * 1024; + return SZ_64K; } static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) @@ -4527,21 +4527,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, ncopies = btrfs_raid_array[index].ncopies; if (type & BTRFS_BLOCK_GROUP_DATA) { - max_stripe_size = 1024 * 1024 * 1024; + max_stripe_size = SZ_1G; max_chunk_size = 10 * max_stripe_size; if (!devs_max) devs_max = BTRFS_MAX_DEVS(info->chunk_root); } else if (type & BTRFS_BLOCK_GROUP_METADATA) { /* for larger filesystems, use larger metadata chunks */ - if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024) - max_stripe_size = 1024 * 1024 * 1024; + if (fs_devices->total_rw_bytes > 50ULL * SZ_1G) + max_stripe_size = SZ_1G; else - max_stripe_size = 256 * 1024 * 1024; + max_stripe_size = SZ_256M; max_chunk_size = max_stripe_size; if (!devs_max) devs_max = BTRFS_MAX_DEVS(info->chunk_root); } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { - max_stripe_size = 32 * 1024 * 1024; + max_stripe_size = SZ_32M; max_chunk_size = 2 * max_stripe_size; if (!devs_max) devs_max = BTRFS_MAX_DEVS_SYS_CHUNK; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d5c84f6..30bbc3e 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -26,7 +26,7 @@ extern struct mutex uuid_mutex; -#define BTRFS_STRIPE_LEN (64 * 1024) +#define BTRFS_STRIPE_LEN SZ_64K struct buffer_head; struct btrfs_pending_bios { -- cgit v0.10.2 From b69f2bef484844ccd9038a850cdcf562afff9d08 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 18 Dec 2015 22:16:59 +0800 Subject: btrfs: use list_for_each_entry* in check-integrity.c Use list_for_each_entry*() instead of list_for_each*() to simplify the code. Signed-off-by: Geliang Tang Reviewed-by: David Sterba Signed-off-by: David Sterba diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 0340c57..861d472 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -531,13 +531,9 @@ static struct btrfsic_block *btrfsic_block_hashtable_lookup( (((unsigned int)(dev_bytenr >> 16)) ^ ((unsigned int)((uintptr_t)bdev))) & (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1); - struct list_head *elem; - - list_for_each(elem, h->table + hashval) { - struct btrfsic_block *const b = - list_entry(elem, struct btrfsic_block, - collision_resolving_node); + struct btrfsic_block *b; + list_for_each_entry(b, h->table + hashval, collision_resolving_node) { if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr) return b; } @@ -588,13 +584,9 @@ static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup( ((unsigned int)((uintptr_t)bdev_ref_to)) ^ ((unsigned int)((uintptr_t)bdev_ref_from))) & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1); - struct list_head *elem; - - list_for_each(elem, h->table + hashval) { - struct btrfsic_block_link *const l = - list_entry(elem, struct btrfsic_block_link, - collision_resolving_node); + struct btrfsic_block_link *l; + list_for_each_entry(l, h->table + hashval, collision_resolving_node) { BUG_ON(NULL == l->block_ref_to); BUG_ON(NULL == l->block_ref_from); if (l->block_ref_to->dev_state->bdev == bdev_ref_to && @@ -639,13 +631,9 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup( const unsigned int hashval = (((unsigned int)((uintptr_t)bdev)) & (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); - struct list_head *elem; - - list_for_each(elem, h->table + hashval) { - struct btrfsic_dev_state *const ds = - list_entry(elem, struct btrfsic_dev_state, - collision_resolving_node); + struct btrfsic_dev_state *ds; + list_for_each_entry(ds, h->table + hashval, collision_resolving_node) { if (ds->bdev == bdev) return ds; } @@ -1720,29 +1708,20 @@ static int btrfsic_read_block(struct btrfsic_state *state, static void btrfsic_dump_database(struct btrfsic_state *state) { - struct list_head *elem_all; + const struct btrfsic_block *b_all; BUG_ON(NULL == state); printk(KERN_INFO "all_blocks_list:\n"); - list_for_each(elem_all, &state->all_blocks_list) { - const struct btrfsic_block *const b_all = - list_entry(elem_all, struct btrfsic_block, - all_blocks_node); - struct list_head *elem_ref_to; - struct list_head *elem_ref_from; + list_for_each_entry(b_all, &state->all_blocks_list, all_blocks_node) { + const struct btrfsic_block_link *l; printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n", btrfsic_get_block_type(state, b_all), b_all->logical_bytenr, b_all->dev_state->name, b_all->dev_bytenr, b_all->mirror_num); - list_for_each(elem_ref_to, &b_all->ref_to_list) { - const struct btrfsic_block_link *const l = - list_entry(elem_ref_to, - struct btrfsic_block_link, - node_ref_to); - + list_for_each_entry(l, &b_all->ref_to_list, node_ref_to) { printk(KERN_INFO " %c @%llu (%s/%llu/%d)" " refers %u* to" " %c @%llu (%s/%llu/%d)\n", @@ -1757,12 +1736,7 @@ static void btrfsic_dump_database(struct btrfsic_state *state) l->block_ref_to->mirror_num); } - list_for_each(elem_ref_from, &b_all->ref_from_list) { - const struct btrfsic_block_link *const l = - list_entry(elem_ref_from, - struct btrfsic_block_link, - node_ref_from); - + list_for_each_entry(l, &b_all->ref_from_list, node_ref_from) { printk(KERN_INFO " %c @%llu (%s/%llu/%d)" " is ref %u* from" " %c @%llu (%s/%llu/%d)\n", @@ -1845,8 +1819,7 @@ again: &state->block_hashtable); if (NULL != block) { u64 bytenr = 0; - struct list_head *elem_ref_to; - struct list_head *tmp_ref_to; + struct btrfsic_block_link *l, *tmp; if (block->is_superblock) { bytenr = btrfs_super_bytenr((struct btrfs_super_block *) @@ -1967,13 +1940,8 @@ again: * because it still carries valueable information * like whether it was ever written and IO completed. */ - list_for_each_safe(elem_ref_to, tmp_ref_to, - &block->ref_to_list) { - struct btrfsic_block_link *const l = - list_entry(elem_ref_to, - struct btrfsic_block_link, - node_ref_to); - + list_for_each_entry_safe(l, tmp, &block->ref_to_list, + node_ref_to) { if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) btrfsic_print_rem_link(state, l); l->ref_cnt--; @@ -2436,7 +2404,7 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, struct btrfsic_block *const block, int recursion_level) { - struct list_head *elem_ref_to; + const struct btrfsic_block_link *l; int ret = 0; if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { @@ -2464,11 +2432,7 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state, * This algorithm is recursive because the amount of used stack * space is very small and the max recursion depth is limited. */ - list_for_each(elem_ref_to, &block->ref_to_list) { - const struct btrfsic_block_link *const l = - list_entry(elem_ref_to, struct btrfsic_block_link, - node_ref_to); - + list_for_each_entry(l, &block->ref_to_list, node_ref_to) { if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "rl=%d, %c @%llu (%s/%llu/%d)" @@ -2561,7 +2525,7 @@ static int btrfsic_is_block_ref_by_superblock( const struct btrfsic_block *block, int recursion_level) { - struct list_head *elem_ref_from; + const struct btrfsic_block_link *l; if (recursion_level >= 3 + BTRFS_MAX_LEVEL) { /* refer to comment at "abort cyclic linkage (case 1)" */ @@ -2576,11 +2540,7 @@ static int btrfsic_is_block_ref_by_superblock( * This algorithm is recursive because the amount of used stack space * is very small and the max recursion depth is limited. */ - list_for_each(elem_ref_from, &block->ref_from_list) { - const struct btrfsic_block_link *const l = - list_entry(elem_ref_from, struct btrfsic_block_link, - node_ref_from); - + list_for_each_entry(l, &block->ref_from_list, node_ref_from) { if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) printk(KERN_INFO "rl=%d, %c @%llu (%s/%llu/%d)" @@ -2669,7 +2629,7 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, const struct btrfsic_block *block, int indent_level) { - struct list_head *elem_ref_to; + const struct btrfsic_block_link *l; int indent_add; static char buf[80]; int cursor_position; @@ -2704,11 +2664,7 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state, } cursor_position = indent_level; - list_for_each(elem_ref_to, &block->ref_to_list) { - const struct btrfsic_block_link *const l = - list_entry(elem_ref_to, struct btrfsic_block_link, - node_ref_to); - + list_for_each_entry(l, &block->ref_to_list, node_ref_to) { while (cursor_position < indent_level) { printk(" "); cursor_position++; @@ -3165,8 +3121,7 @@ int btrfsic_mount(struct btrfs_root *root, void btrfsic_unmount(struct btrfs_root *root, struct btrfs_fs_devices *fs_devices) { - struct list_head *elem_all; - struct list_head *tmp_all; + struct btrfsic_block *b_all, *tmp_all; struct btrfsic_state *state; struct list_head *dev_head = &fs_devices->devices; struct btrfs_device *device; @@ -3206,20 +3161,12 @@ void btrfsic_unmount(struct btrfs_root *root, * just free all memory that was allocated dynamically. * Free the blocks and the block_links. */ - list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) { - struct btrfsic_block *const b_all = - list_entry(elem_all, struct btrfsic_block, - all_blocks_node); - struct list_head *elem_ref_to; - struct list_head *tmp_ref_to; - - list_for_each_safe(elem_ref_to, tmp_ref_to, - &b_all->ref_to_list) { - struct btrfsic_block_link *const l = - list_entry(elem_ref_to, - struct btrfsic_block_link, - node_ref_to); + list_for_each_entry_safe(b_all, tmp_all, &state->all_blocks_list, + all_blocks_node) { + struct btrfsic_block_link *l, *tmp; + list_for_each_entry_safe(l, tmp, &b_all->ref_to_list, + node_ref_to) { if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE) btrfsic_print_rem_link(state, l); -- cgit v0.10.2 From 7ae1681e126919e7119cebebde1d42df8d7a49b9 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 18 Dec 2015 22:17:00 +0800 Subject: btrfs: use list_for_each_entry_safe in free-space-cache.c Use list_for_each_entry_safe() instead of list_for_each_safe() to simplify the code. Signed-off-by: Geliang Tang Reviewed-by: David Sterba Signed-off-by: David Sterba diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 1b2ff8b..e5d7ec8 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -1086,14 +1086,11 @@ write_pinned_extent_entries(struct btrfs_root *root, static noinline_for_stack int write_bitmap_entries(struct btrfs_io_ctl *io_ctl, struct list_head *bitmap_list) { - struct list_head *pos, *n; + struct btrfs_free_space *entry, *next; int ret; /* Write out the bitmaps */ - list_for_each_safe(pos, n, bitmap_list) { - struct btrfs_free_space *entry = - list_entry(pos, struct btrfs_free_space, list); - + list_for_each_entry_safe(entry, next, bitmap_list, list) { ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); if (ret) return -ENOSPC; @@ -1119,13 +1116,10 @@ static int flush_dirty_cache(struct inode *inode) static void noinline_for_stack cleanup_bitmap_list(struct list_head *bitmap_list) { - struct list_head *pos, *n; + struct btrfs_free_space *entry, *next; - list_for_each_safe(pos, n, bitmap_list) { - struct btrfs_free_space *entry = - list_entry(pos, struct btrfs_free_space, list); + list_for_each_entry_safe(entry, next, bitmap_list, list) list_del_init(&entry->list); - } } static void noinline_for_stack -- cgit v0.10.2 From a7ca42256d9fad572fb7f2c471514d7d3572b1db Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 21 Dec 2015 23:50:23 +0800 Subject: btrfs: use list_for_each_entry* in backref.c Use list_for_each_entry*() to simplify the code. Signed-off-by: Geliang Tang Reviewed-by: David Sterba Signed-off-by: David Sterba diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index d453d62..08405a3 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -520,13 +520,10 @@ static inline int ref_for_same_block(struct __prelim_ref *ref1, static int __add_missing_keys(struct btrfs_fs_info *fs_info, struct list_head *head) { - struct list_head *pos; + struct __prelim_ref *ref; struct extent_buffer *eb; - list_for_each(pos, head) { - struct __prelim_ref *ref; - ref = list_entry(pos, struct __prelim_ref, list); - + list_for_each_entry(ref, head, list) { if (ref->parent) continue; if (ref->key_for_search.type) @@ -563,23 +560,15 @@ static int __add_missing_keys(struct btrfs_fs_info *fs_info, */ static void __merge_refs(struct list_head *head, int mode) { - struct list_head *pos1; + struct __prelim_ref *ref1; - list_for_each(pos1, head) { - struct list_head *n2; - struct list_head *pos2; - struct __prelim_ref *ref1; + list_for_each_entry(ref1, head, list) { + struct __prelim_ref *ref2 = ref1, *tmp; - ref1 = list_entry(pos1, struct __prelim_ref, list); - - for (pos2 = pos1->next, n2 = pos2->next; pos2 != head; - pos2 = n2, n2 = pos2->next) { - struct __prelim_ref *ref2; + list_for_each_entry_safe_continue(ref2, tmp, head, list) { struct __prelim_ref *xchg; struct extent_inode_elem *eie; - ref2 = list_entry(pos2, struct __prelim_ref, list); - if (!ref_for_same_block(ref1, ref2)) continue; if (mode == 1) { -- cgit v0.10.2 From 28f0779a3fd6ef015303780f0b9a92b24728bc4b Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 19 Nov 2015 11:42:24 +0100 Subject: btrfs tests: replace whole ops structure for free space tests Preparatory work for making btrfs_free_space_op constant. In test_steal_space_from_bitmap_to_extent, we substitute use_bitmap with own version thus preventing constification. We can rework it so we replace the whole structure with the correct function pointers. Signed-off-by: David Sterba diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 8b72b00..46c2f29 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -445,9 +445,11 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) int ret; u64 offset; u64 max_extent_size; - - bool (*use_bitmap_op)(struct btrfs_free_space_ctl *, - struct btrfs_free_space *); + struct btrfs_free_space_op test_free_space_ops = { + .recalc_thresholds = cache->free_space_ctl->op->recalc_thresholds, + .use_bitmap = test_use_bitmap, + }; + struct btrfs_free_space_op *orig_free_space_ops; test_msg("Running space stealing from bitmap to extent\n"); @@ -469,8 +471,8 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) * that forces use of bitmaps as soon as we have at least 1 * extent entry. */ - use_bitmap_op = cache->free_space_ctl->op->use_bitmap; - cache->free_space_ctl->op->use_bitmap = test_use_bitmap; + orig_free_space_ops = cache->free_space_ctl->op; + cache->free_space_ctl->op = &test_free_space_ops; /* * Extent entry covering free space range [128Mb - 256Kb, 128Mb - 128Kb[ @@ -877,7 +879,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) if (ret) return ret; - cache->free_space_ctl->op->use_bitmap = use_bitmap_op; + cache->free_space_ctl->op = orig_free_space_ops; __btrfs_remove_free_space_cache(cache->free_space_ctl); return 0; -- cgit v0.10.2 From 20e5506baf3fd651e245bc970d8c11a734ee1b8a Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 19 Nov 2015 11:42:28 +0100 Subject: btrfs: constify remaining structs with function pointers * struct extent_io_ops * struct btrfs_free_space_op Signed-off-by: David Sterba diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index cfe99be..ed8a3b7 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2016,7 +2016,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl, return true; } -static struct btrfs_free_space_op free_space_op = { +static const struct btrfs_free_space_op free_space_op = { .recalc_thresholds = recalculate_thresholds, .use_bitmap = use_bitmap, }; diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h index f251865..33178c4 100644 --- a/fs/btrfs/free-space-cache.h +++ b/fs/btrfs/free-space-cache.h @@ -37,7 +37,7 @@ struct btrfs_free_space_ctl { int total_bitmaps; int unit; u64 start; - struct btrfs_free_space_op *op; + const struct btrfs_free_space_op *op; void *private; struct mutex cache_writeout_mutex; struct list_head trimming_ranges; diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 767a605..53014e9 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -334,7 +334,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl, return true; } -static struct btrfs_free_space_op free_ino_op = { +static const struct btrfs_free_space_op free_ino_op = { .recalc_thresholds = recalculate_thresholds, .use_bitmap = use_bitmap, }; @@ -356,7 +356,7 @@ static bool pinned_use_bitmap(struct btrfs_free_space_ctl *ctl, return false; } -static struct btrfs_free_space_op pinned_free_ino_op = { +static const struct btrfs_free_space_op pinned_free_ino_op = { .recalc_thresholds = pinned_recalc_thresholds, .use_bitmap = pinned_use_bitmap, }; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0214a81..c4cfba7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -74,7 +74,7 @@ static const struct inode_operations btrfs_file_inode_operations; static const struct address_space_operations btrfs_aops; static const struct address_space_operations btrfs_symlink_aops; static const struct file_operations btrfs_dir_file_operations; -static struct extent_io_ops btrfs_extent_io_ops; +static const struct extent_io_ops btrfs_extent_io_ops; static struct kmem_cache *btrfs_inode_cachep; struct kmem_cache *btrfs_trans_handle_cachep; @@ -10020,7 +10020,7 @@ static const struct file_operations btrfs_dir_file_operations = { .fsync = btrfs_sync_file, }; -static struct extent_io_ops btrfs_extent_io_ops = { +static const struct extent_io_ops btrfs_extent_io_ops = { .fill_delalloc = run_delalloc_range, .submit_bio_hook = btrfs_submit_bio_hook, .merge_bio_hook = btrfs_merge_bio_hook, diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index 46c2f29..69a11e6 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -445,11 +445,11 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group_cache *cache) int ret; u64 offset; u64 max_extent_size; - struct btrfs_free_space_op test_free_space_ops = { + const struct btrfs_free_space_op test_free_space_ops = { .recalc_thresholds = cache->free_space_ctl->op->recalc_thresholds, .use_bitmap = test_use_bitmap, }; - struct btrfs_free_space_op *orig_free_space_ops; + const struct btrfs_free_space_op *orig_free_space_ops; test_msg("Running space stealing from bitmap to extent\n"); -- cgit v0.10.2 From 4d4ab6d6bc05ba65169de9a5391e6ccbe09d8719 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 19 Nov 2015 11:42:31 +0100 Subject: btrfs: constify static arrays There are a few statically initialized arrays that can be made const. The remaining (like file_system_type, sysfs attributes or prop handlers) do not allow that due to type mismatch when passed to the APIs or because the structures are modified through other members. Signed-off-by: David Sterba diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 35489e7..99059fc 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -174,7 +174,7 @@ struct btrfs_ordered_sum; /* csum types */ #define BTRFS_CSUM_TYPE_CRC32 0 -static int btrfs_csum_sizes[] = { 4 }; +static const int btrfs_csum_sizes[] = { 4 }; /* four bytes for CRC32 */ #define BTRFS_EMPTY_DIR_SIZE 0 diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c4cfba7..3e1398b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -83,7 +83,7 @@ struct kmem_cache *btrfs_path_cachep; struct kmem_cache *btrfs_free_space_cachep; #define S_SHIFT 12 -static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { +static const unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { [S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE, [S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR, [S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index da94138..fd429d7 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -5286,7 +5286,7 @@ out_unlock: static int btrfs_ioctl_get_supported_features(struct file *file, void __user *arg) { - static struct btrfs_ioctl_feature_flags features[3] = { + static const struct btrfs_ioctl_feature_flags features[3] = { INIT_FEATURE_FLAGS(SUPP), INIT_FEATURE_FLAGS(SAFE_SET), INIT_FEATURE_FLAGS(SAFE_CLEAR) diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 24154e4..b813fd7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -309,7 +309,7 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_degraded, "degraded"}, {Opt_subvol, "subvol=%s"}, {Opt_subvolid, "subvolid=%s"}, -- cgit v0.10.2 From e4058b54d1e442b6b3eca949f0d63d49ba2b020d Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 27 Nov 2015 16:31:35 +0100 Subject: btrfs: cleanup, use enum values for btrfs_path reada Replace the integers by enums for better readability. The value 2 does not have any meaning since a717531942f488209dded30f6bc648167bcefa72 "Btrfs: do less aggressive btree readahead" (2009-01-22). Signed-off-by: David Sterba diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 5b8e235..be1be04 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2248,7 +2248,6 @@ static void reada_for_search(struct btrfs_root *root, u64 target; u64 nread = 0; u64 gen; - int direction = path->reada; struct extent_buffer *eb; u32 nr; u32 blocksize; @@ -2276,16 +2275,16 @@ static void reada_for_search(struct btrfs_root *root, nr = slot; while (1) { - if (direction < 0) { + if (path->reada == READA_BACK) { if (nr == 0) break; nr--; - } else if (direction > 0) { + } else if (path->reada == READA_FORWARD) { nr++; if (nr >= nritems) break; } - if (path->reada < 0 && objectid) { + if (path->reada == READA_BACK && objectid) { btrfs_node_key(node, &disk_key, nr); if (btrfs_disk_key_objectid(&disk_key) != objectid) break; @@ -2493,7 +2492,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, btrfs_set_path_blocking(p); free_extent_buffer(tmp); - if (p->reada) + if (p->reada != READA_NONE) reada_for_search(root, p, level, slot, key->objectid); btrfs_release_path(p); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 99059fc..a1cf4c8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -590,6 +590,7 @@ struct btrfs_node { * The slots array records the index of the item or block pointer * used while walking the tree. */ +enum { READA_NONE = 0, READA_BACK, READA_FORWARD }; struct btrfs_path { struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d796b53..9e3639b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -438,7 +438,7 @@ static noinline void caching_thread(struct btrfs_work *work) */ path->skip_locking = 1; path->search_commit_root = 1; - path->reada = 1; + path->reada = READA_FORWARD; key.objectid = last; key.offset = 0; @@ -2115,7 +2115,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - path->reada = 1; + path->reada = READA_FORWARD; path->leave_spinning = 1; /* this will setup the path even if it fails to insert the back ref */ ret = insert_inline_extent_backref(trans, fs_info->extent_root, path, @@ -2141,7 +2141,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); - path->reada = 1; + path->reada = READA_FORWARD; path->leave_spinning = 1; /* now insert the actual backref */ ret = insert_extent_backref(trans, root->fs_info->extent_root, @@ -2254,7 +2254,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, } again: - path->reada = 1; + path->reada = READA_FORWARD; path->leave_spinning = 1; ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 0, 1); @@ -6438,7 +6438,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - path->reada = 1; + path->reada = READA_FORWARD; path->leave_spinning = 1; is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; @@ -9688,7 +9688,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = 1; + path->reada = READA_FORWARD; cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy); if (btrfs_test_opt(root, SPACE_CACHE) && diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 58ece65..a67e1c8 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -202,7 +202,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, } if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8) - path->reada = 2; + path->reada = READA_FORWARD; WARN_ON(bio->bi_vcnt <= 0); @@ -328,7 +328,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, if (search_commit) { path->skip_locking = 1; - path->reada = 2; + path->reada = READA_FORWARD; path->search_commit_root = 1; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 53014e9..1951ad6 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -48,7 +48,7 @@ static int caching_kthread(void *data) /* Since the commit root is read-only, we can safely skip locking. */ path->skip_locking = 1; path->search_commit_root = 1; - path->reada = 2; + path->reada = READA_FORWARD; key.objectid = BTRFS_FIRST_FREE_OBJECTID; key.offset = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 3e1398b..dc2c1be 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3342,7 +3342,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) ret = -ENOMEM; goto out; } - path->reada = -1; + path->reada = READA_BACK; key.objectid = BTRFS_ORPHAN_OBJECTID; key.type = BTRFS_ORPHAN_ITEM_KEY; @@ -4308,7 +4308,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = -1; + path->reada = READA_BACK; /* * We want to drop from the next block forward in case this new size is @@ -5744,7 +5744,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) if (!path) return -ENOMEM; - path->reada = 1; + path->reada = READA_FORWARD; if (key_type == BTRFS_DIR_INDEX_KEY) { INIT_LIST_HEAD(&ins_list); @@ -6775,7 +6775,7 @@ again: * Chances are we'll be called again, so go ahead and do * readahead */ - path->reada = 1; + path->reada = READA_FORWARD; } ret = btrfs_lookup_file_extent(trans, root, path, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fd429d7..6e89641 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3478,7 +3478,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, return ret; } - path->reada = 2; + path->reada = READA_FORWARD; /* clone data */ key.objectid = btrfs_ino(src); key.type = BTRFS_EXTENT_DATA_KEY; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b4ca545..ef6d8fc 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -708,8 +708,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, err = -ENOMEM; goto out; } - path1->reada = 1; - path2->reada = 2; + path1->reada = READA_FORWARD; + path2->reada = READA_FORWARD; node = alloc_backref_node(cache); if (!node) { @@ -2130,7 +2130,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = 1; + path->reada = READA_FORWARD; reloc_root = root->reloc_root; root_item = &reloc_root->root_item; @@ -3527,7 +3527,7 @@ static int find_data_references(struct reloc_control *rc, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = 1; + path->reada = READA_FORWARD; root = read_fs_root(rc->extent_root->fs_info, ref_root); if (IS_ERR(root)) { @@ -3917,7 +3917,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = 1; + path->reada = READA_FORWARD; ret = prepare_to_relocate(rc); if (ret) { @@ -4343,7 +4343,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = -1; + path->reada = READA_BACK; key.objectid = BTRFS_TREE_RELOC_OBJECTID; key.type = BTRFS_ROOT_ITEM_KEY; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 1d1f639..143f2b7 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3507,7 +3507,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, if (!path) return -ENOMEM; - path->reada = 2; + path->reada = READA_FORWARD; path->search_commit_root = 1; path->skip_locking = 1; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0577d7f..e95d249 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1102,7 +1102,7 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = 2; + path->reada = READA_FORWARD; key.objectid = device->devid; key.offset = start; @@ -1271,7 +1271,7 @@ again: goto out; } - path->reada = 2; + path->reada = READA_FORWARD; path->search_commit_root = 1; path->skip_locking = 1; @@ -4260,7 +4260,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) if (!path) return -ENOMEM; - path->reada = 2; + path->reada = READA_FORWARD; lock_chunks(root); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 1fcd7b6..28b4e7e 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -283,7 +283,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) path = btrfs_alloc_path(); if (!path) return -ENOMEM; - path->reada = 2; + path->reada = READA_FORWARD; /* search for our xattrs */ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); -- cgit v0.10.2 From dccabfad20880bc6c8be21b538df4293506b99f8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 27 Nov 2015 16:31:38 +0100 Subject: btrfs: use smaller type for btrfs_path reada The possible values for reada are all positive and bounded, we can later save some bytes by storing it in u8. Signed-off-by: David Sterba diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index a1cf4c8..e5f9b96 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -596,7 +596,7 @@ struct btrfs_path { int slots[BTRFS_MAX_LEVEL]; /* if there is real range locking, this locks field will change */ int locks[BTRFS_MAX_LEVEL]; - int reada; + u8 reada; /* keep some upper locks as we walk down */ int lowest_level; -- cgit v0.10.2 From 7853f15b2aeeb01c587168fc3f7f0ff76a3c9bfd Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 27 Nov 2015 16:31:42 +0100 Subject: btrfs: use smaller type for btrfs_path lowest_level The level is 0..7, we can use smaller type. The size of btrfs_path is now 136 bytes from 144, which is +2 objects that fit into a 4k slab. Signed-off-by: David Sterba diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e5f9b96..09ee92d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -598,7 +598,7 @@ struct btrfs_path { int locks[BTRFS_MAX_LEVEL]; u8 reada; /* keep some upper locks as we walk down */ - int lowest_level; + u8 lowest_level; /* * set by btrfs_split_item, tells search_slot to keep all locks -- cgit v0.10.2 From 4fb72bf2e913ca3798afd9e226e2416918936e49 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 27 Nov 2015 16:31:45 +0100 Subject: btrfs: use smaller type for btrfs_path locks The values of btrfs_path::locks are 0 to 4, fit into a u8. Let's see: * overall size of btrfs_path drops down from 136 to 112 (-24 bytes), * better packing in a slab page +6 objects * the whole structure now fits to 2 cachelines * slight decrease in code size: text data bss dec hex filename 938731 43670 23144 1005545 f57e9 fs/btrfs/btrfs.ko.before 938203 43670 23144 1005017 f55d9 fs/btrfs/btrfs.ko.after (and the generated assembly does not change much) The main purpose is to decrease the size of the structure without affecting performance. The byte access is usually well behaving accross arches, the locks are not accessed frequently and sometimes just compared to zero. Note for further size reduction attempts: the slots could be made u16 but this might generate worse code on some arches (non-byte and non-int access). Also the range of operations on slots is wider compared to locks and the potential performance drop should be evaluated first. Signed-off-by: David Sterba diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 09ee92d..5e0fe09 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -595,7 +595,7 @@ struct btrfs_path { struct extent_buffer *nodes[BTRFS_MAX_LEVEL]; int slots[BTRFS_MAX_LEVEL]; /* if there is real range locking, this locks field will change */ - int locks[BTRFS_MAX_LEVEL]; + u8 locks[BTRFS_MAX_LEVEL]; u8 reada; /* keep some upper locks as we walk down */ u8 lowest_level; -- cgit v0.10.2 From a1ee736268448d74af25737568e383acb84c3c18 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 10 Nov 2015 18:53:56 +0100 Subject: btrfs: do an allocation earlier during snapshot creation We can allocate pending_snapshot earlier and do not have to do cleanup in case of failure. Signed-off-by: David Sterba diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6e89641..fa25091 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -655,22 +655,20 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) return -EINVAL; + pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); + if (!pending_snapshot) + return -ENOMEM; + atomic_inc(&root->will_be_snapshoted); smp_mb__after_atomic(); btrfs_wait_for_no_snapshoting_writes(root); ret = btrfs_start_delalloc_inodes(root, 0); if (ret) - goto out; + goto dec_and_free; btrfs_wait_ordered_extents(root, -1); - pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); - if (!pending_snapshot) { - ret = -ENOMEM; - goto out; - } - btrfs_init_block_rsv(&pending_snapshot->block_rsv, BTRFS_BLOCK_RSV_TEMP); /* @@ -686,7 +684,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, &pending_snapshot->qgroup_reserved, false); if (ret) - goto free; + goto dec_and_free; pending_snapshot->dentry = dentry; pending_snapshot->root = root; @@ -737,11 +735,11 @@ fail: btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, &pending_snapshot->block_rsv, pending_snapshot->qgroup_reserved); -free: - kfree(pending_snapshot); -out: +dec_and_free: if (atomic_dec_and_test(&root->will_be_snapshoted)) wake_up_atomic_t(&root->will_be_snapshoted); + kfree(pending_snapshot); + return ret; } -- cgit v0.10.2 From b0c0ea6338d5018e02d27c5315084fb1a5d099f6 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 10 Nov 2015 18:54:00 +0100 Subject: btrfs: allocate root item at snapshot ioctl time The actual snapshot creation is delayed until transaction commit. If we cannot get enough memory for the root item there, we have to fail the whole transaction commit which is bad. So we'll allocate the memory at the ioctl call and pass it along with the pending_snapshot struct. The potential ENOMEM will be returned to the caller of snapshot ioctl. Signed-off-by: David Sterba diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fa25091..27fc660 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -659,6 +659,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, if (!pending_snapshot) return -ENOMEM; + pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item), + GFP_NOFS); + if (!pending_snapshot->root_item) { + ret = -ENOMEM; + goto free_pending; + } + atomic_inc(&root->will_be_snapshoted); smp_mb__after_atomic(); btrfs_wait_for_no_snapshoting_writes(root); @@ -738,6 +745,8 @@ fail: dec_and_free: if (atomic_dec_and_test(&root->will_be_snapshoted)) wake_up_atomic_t(&root->will_be_snapshoted); +free_pending: + kfree(pending_snapshot->root_item); kfree(pending_snapshot); return ret; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index be8eae8..2074106 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1325,11 +1325,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, return 0; } - new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); - if (!new_root_item) { - pending->error = -ENOMEM; - goto root_item_alloc_fail; - } + ASSERT(pending->root_item); + new_root_item = pending->root_item; pending->error = btrfs_find_free_objectid(tree_root, &objectid); if (pending->error) @@ -1562,7 +1559,7 @@ clear_skip_qgroup: btrfs_clear_skip_qgroup(trans); no_free_objectid: kfree(new_root_item); -root_item_alloc_fail: + pending->root_item = NULL; btrfs_free_path(path); return ret; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 64c8221..b6f9a3c 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -137,6 +137,7 @@ struct btrfs_pending_snapshot { struct dentry *dentry; struct inode *dir; struct btrfs_root *root; + struct btrfs_root_item *root_item; struct btrfs_root *snap; struct btrfs_qgroup_inherit *inherit; /* block reservation for the operation */ -- cgit v0.10.2 From 8546b570511f428838129c00e701eda481cd7c13 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 10 Nov 2015 18:54:03 +0100 Subject: btrfs: preallocate path for snapshot creation at ioctl time We can also preallocate btrfs_path that's used during pending snapshot creation and avoid another late ENOMEM failure. Signed-off-by: David Sterba diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 27fc660..3d9b27d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -661,7 +661,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item), GFP_NOFS); - if (!pending_snapshot->root_item) { + pending_snapshot->path = btrfs_alloc_path(); + if (!pending_snapshot->root_item || !pending_snapshot->path) { ret = -ENOMEM; goto free_pending; } @@ -747,6 +748,7 @@ dec_and_free: wake_up_atomic_t(&root->will_be_snapshoted); free_pending: kfree(pending_snapshot->root_item); + btrfs_free_path(pending_snapshot->path); kfree(pending_snapshot); return ret; diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2074106..be463b7 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1319,11 +1319,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, u64 root_flags; uuid_le new_uuid; - path = btrfs_alloc_path(); - if (!path) { - pending->error = -ENOMEM; - return 0; - } + ASSERT(pending->path); + path = pending->path; ASSERT(pending->root_item); new_root_item = pending->root_item; @@ -1561,6 +1558,8 @@ no_free_objectid: kfree(new_root_item); pending->root_item = NULL; btrfs_free_path(path); + pending->path = NULL; + return ret; } diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index b6f9a3c..72be51f 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -140,6 +140,7 @@ struct btrfs_pending_snapshot { struct btrfs_root_item *root_item; struct btrfs_root *snap; struct btrfs_qgroup_inherit *inherit; + struct btrfs_path *path; /* block reservation for the operation */ struct btrfs_block_rsv block_rsv; u64 qgroup_reserved; -- cgit v0.10.2 From ca8a51b3a979d57b082b14eda38602b7f52d81d1 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sat, 10 Oct 2015 17:59:53 +0200 Subject: btrfs: statfs: report zero available if metadata are exhausted There is one ENOSPC case that's very confusing. There's Available greater than zero but no file operation succeds (besides removing files). This happens when the metadata are exhausted and there's no possibility to allocate another chunk. In this scenario it's normal that there's still some space in the data chunk and the calculation in df reflects that in the Avail value. To at least give some clue about the ENOSPC situation, let statfs report zero value in Avail, even if there's still data space available. Current: /dev/sdb1 4.0G 3.3G 719M 83% /mnt/test New: /dev/sdb1 4.0G 3.3G 0 100% /mnt/test We calculate the remaining metadata space minus global reserve. If this is (supposedly) smaller than zero, there's no space. But this does not hold in practice, the exhausted state happens where's still some positive delta. So we apply some guesswork and compare the delta to a 4M threshold. (Practically observed delta was 2M.) We probably cannot calculate the exact threshold value because this depends on the internal reservations requested by various operations, so some operations that consume a few metadata will succeed even if the Avail is zero. But this is better than the other way around. Signed-off-by: David Sterba diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b813fd7..5099e46 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1956,6 +1956,8 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) * there are other factors that may change the result (like a new metadata * chunk). * + * If metadata is exhausted, f_bavail will be 0. + * * FIXME: not accurate for mixed block groups, total and free/used are ok, * available appears slightly larger. */ @@ -1967,11 +1969,13 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) struct btrfs_space_info *found; u64 total_used = 0; u64 total_free_data = 0; + u64 total_free_meta = 0; int bits = dentry->d_sb->s_blocksize_bits; __be32 *fsid = (__be32 *)fs_info->fsid; unsigned factor = 1; struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; int ret; + u64 thresh = 0; /* * holding chunk_muext to avoid allocating new chunks, holding @@ -1997,6 +2001,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) } } } + if (found->flags & BTRFS_BLOCK_GROUP_METADATA) + total_free_meta += found->disk_total - found->disk_used; total_used += found->disk_used; } @@ -2019,6 +2025,24 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_bavail += div_u64(total_free_data, factor); buf->f_bavail = buf->f_bavail >> bits; + /* + * We calculate the remaining metadata space minus global reserve. If + * this is (supposedly) smaller than zero, there's no space. But this + * does not hold in practice, the exhausted state happens where's still + * some positive delta. So we apply some guesswork and compare the + * delta to a 4M threshold. (Practically observed delta was ~2M.) + * + * We probably cannot calculate the exact threshold value because this + * depends on the internal reservations requested by various + * operations, so some operations that consume a few metadata will + * succeed even if the Avail is zero. But this is better than the other + * way around. + */ + thresh = 4 * 1024 * 1024; + + if (total_free_meta - thresh < block_rsv->size) + buf->f_bavail = 0; + buf->f_type = BTRFS_SUPER_MAGIC; buf->f_bsize = dentry->d_sb->s_blocksize; buf->f_namelen = BTRFS_NAME_LEN; -- cgit v0.10.2 From ee592d07716139ef7c3ea8049936cbc3aafbc533 Mon Sep 17 00:00:00 2001 From: Sam Tygier Date: Wed, 6 Jan 2016 08:46:12 +0000 Subject: Btrfs: Check metadata redundancy on balance When converting a filesystem via balance check that metadata mode is at least as redundant as the data mode. For example give warning when: -dconvert=raid1 -mconvert=single Signed-off-by: Sam Tygier [ minor message reformatting ] Signed-off-by: David Sterba diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e95d249..574a717 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3748,6 +3748,13 @@ int btrfs_balance(struct btrfs_balance_control *bctl, } } while (read_seqretry(&fs_info->profiles_lock, seq)); + if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) < + btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) { + btrfs_warn(fs_info, + "metatdata profile 0x%llx has lower redundancy than data profile 0x%llx", + bctl->meta.target, bctl->data.target); + } + if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { fs_info->num_tolerated_disk_barrier_failures = min( btrfs_calc_num_tolerated_disk_barrier_failures(fs_info), -- cgit v0.10.2 From 8cdc7c5b00d945a3c823fc4277af304abb9cb43d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 6 Jan 2016 22:42:35 +0000 Subject: Btrfs: fix fitrim discarding device area reserved for boot loader's use As of the 4.3 kernel release, the fitrim ioctl can now discard any region of a disk that is not allocated to any chunk/block group, including the first megabyte which is used for our primary superblock and by the boot loader (grub for example). Fix this by not allowing to trim/discard any region in the device starting with an offset not greater than min(alloc_start_mount_option, 1Mb), just as it was not possible before 4.3. A reproducer test case for xfstests follows. seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" tmp=/tmp/$$ status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 _cleanup() { cd / rm -f $tmp.* } # get standard environment, filters and checks . ./common/rc . ./common/filter # real QA test starts here _need_to_be_root _supported_fs btrfs _supported_os Linux _require_scratch rm -f $seqres.full _scratch_mkfs >>$seqres.full 2>&1 # Write to the [0, 64Kb[ and [68Kb, 1Mb[ ranges of the device. These ranges are # reserved for a boot loader to use (GRUB for example) and btrfs should never # use them - neither for allocating metadata/data nor should trim/discard them. # The range [64Kb, 68Kb[ is used for the primary superblock of the filesystem. $XFS_IO_PROG -c "pwrite -S 0xfd 0 64K" $SCRATCH_DEV | _filter_xfs_io $XFS_IO_PROG -c "pwrite -S 0xfd 68K 956K" $SCRATCH_DEV | _filter_xfs_io # Now mount the filesystem and perform a fitrim against it. _scratch_mount _require_batched_discard $SCRATCH_MNT $FSTRIM_PROG $SCRATCH_MNT # Now unmount the filesystem and verify the content of the ranges was not # modified (no trim/discard happened on them). _scratch_unmount echo "Content of the ranges [0, 64Kb] and [68Kb, 1Mb[ after fitrim:" od -t x1 -N $((64 * 1024)) $SCRATCH_DEV od -t x1 -j $((68 * 1024)) -N $((956 * 1024)) $SCRATCH_DEV status=0 exit Reported-by: Vincent Petry Reported-by: Andrei Borzenkov Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=109341 Fixes: 499f377f49f0 (btrfs: iterate over unused chunk space in FITRIM) Cc: stable@vger.kernel.org # 4.3+ Signed-off-by: Filipe Manana diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a114b7b..b0a0eb2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1258,6 +1258,15 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction, int ret; int slot; struct extent_buffer *l; + u64 min_search_start; + + /* + * We don't want to overwrite the superblock on the drive nor any area + * used by the boot loader (grub for example), so we make sure to start + * at an offset of at least 1MB. + */ + min_search_start = max(root->fs_info->alloc_start, 1024ull * 1024); + search_start = max(search_start, min_search_start); path = btrfs_alloc_path(); if (!path) @@ -1398,18 +1407,9 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 num_bytes, u64 *start, u64 *len) { - struct btrfs_root *root = device->dev_root; - u64 search_start; - /* FIXME use last free of some kind */ - - /* - * we don't want to overwrite the superblock on the drive, - * so we make sure to start at an offset of at least 1MB - */ - search_start = max(root->fs_info->alloc_start, 1024ull * 1024); return find_free_dev_extent_start(trans->transaction, device, - num_bytes, search_start, start, len); + num_bytes, 0, start, len); } static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, -- cgit v0.10.2