summaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-12-18 17:42:05 (GMT)
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-18 17:42:05 (GMT)
commita22180d2666c018f4fef6818074d78bb76ff2bda (patch)
treea633aaf423ff39f94d00502d03dbbd99dab4b2ee /fs/btrfs/inode.c
parent2d4dce0070448bcb5ccd04553a4be4635417f565 (diff)
parent213490b301773ea9c6fb89a86424a6901fcdd069 (diff)
downloadlinux-fsl-qoriq-a22180d2666c018f4fef6818074d78bb76ff2bda.tar.xz
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs update from Chris Mason: "A big set of fixes and features. In terms of line count, most of the code comes from Stefan, who added the ability to replace a single drive in place. This is different from how btrfs normally replaces drives, and is much much much faster. Josef is plowing through our synchronous write performance. This pull request does not include the DIO_OWN_WAITING patch that was discussed on the list, but it has a number of other improvements to cut down our latencies and CPU time during fsync/O_DIRECT writes. Miao Xie has a big series of fixes and is spreading out ordered operations over more CPUs. This improves performance and reduces contention. I've put in fixes for error handling around hash collisions. These are going back to individual stable kernels as I test against them. Otherwise we have a lot of fixes and cleanups, thanks everyone! raid5/6 is being rebased against the device replacement code. I'll have it posted this Friday along with a nice series of benchmarks." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (115 commits) Btrfs: fix a bug of per-file nocow Btrfs: fix hash overflow handling Btrfs: don't take inode delalloc mutex if we're a free space inode Btrfs: fix autodefrag and umount lockup Btrfs: fix permissions of empty files not affected by umask Btrfs: put raid properties into global table Btrfs: fix BUG() in scrub when first superblock reading gives EIO Btrfs: do not call file_update_time in aio_write Btrfs: only unlock and relock if we have to Btrfs: use tokens where we can in the tree log Btrfs: optimize leaf_space_used Btrfs: don't memset new tokens Btrfs: only clear dirty on the buffer if it is marked as dirty Btrfs: move checks in set_page_dirty under DEBUG Btrfs: log changed inodes based on the extent map tree Btrfs: add path->really_keep_locks Btrfs: do not mark ems as prealloc if we are writing to them Btrfs: keep track of the extents original block length Btrfs: inline csums if we're fsyncing Btrfs: don't bother copying if we're only logging the inode ...
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c484
1 files changed, 294 insertions, 190 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 95542a1..67ed24a 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -71,6 +71,7 @@ static const struct file_operations btrfs_dir_file_operations;
static struct extent_io_ops btrfs_extent_io_ops;
static struct kmem_cache *btrfs_inode_cachep;
+static struct kmem_cache *btrfs_delalloc_work_cachep;
struct kmem_cache *btrfs_trans_handle_cachep;
struct kmem_cache *btrfs_transaction_cachep;
struct kmem_cache *btrfs_path_cachep;
@@ -94,6 +95,10 @@ static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written, int unlock);
+static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
+ u64 len, u64 orig_start,
+ u64 block_start, u64 block_len,
+ u64 orig_block_len, int type);
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir,
@@ -698,14 +703,19 @@ retry:
em->block_start = ins.objectid;
em->block_len = ins.offset;
+ em->orig_block_len = ins.offset;
em->bdev = root->fs_info->fs_devices->latest_bdev;
em->compress_type = async_extent->compress_type;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+ em->generation = -1;
while (1) {
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ if (!ret)
+ list_move(&em->list,
+ &em_tree->modified_extents);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
@@ -803,14 +813,14 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
* required to start IO on it. It may be clean and already done with
* IO when we return.
*/
-static noinline int cow_file_range(struct inode *inode,
- struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written,
- int unlock)
+static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
+ struct inode *inode,
+ struct btrfs_root *root,
+ struct page *locked_page,
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written,
+ int unlock)
{
- struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
u64 alloc_hint = 0;
u64 num_bytes;
unsigned long ram_size;
@@ -823,25 +833,10 @@ static noinline int cow_file_range(struct inode *inode,
int ret = 0;
BUG_ON(btrfs_is_free_space_inode(inode));
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- extent_clear_unlock_delalloc(inode,
- &BTRFS_I(inode)->io_tree,
- start, end, locked_page,
- EXTENT_CLEAR_UNLOCK_PAGE |
- EXTENT_CLEAR_UNLOCK |
- EXTENT_CLEAR_DELALLOC |
- EXTENT_CLEAR_DIRTY |
- EXTENT_SET_WRITEBACK |
- EXTENT_END_WRITEBACK);
- return PTR_ERR(trans);
- }
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
num_bytes = (end - start + blocksize) & ~(blocksize - 1);
num_bytes = max(blocksize, num_bytes);
disk_num_bytes = num_bytes;
- ret = 0;
/* if this is a small write inside eof, kick off defrag */
if (num_bytes < 64 * 1024 &&
@@ -900,12 +895,17 @@ static noinline int cow_file_range(struct inode *inode,
em->block_start = ins.objectid;
em->block_len = ins.offset;
+ em->orig_block_len = ins.offset;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
+ em->generation = -1;
while (1) {
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ if (!ret)
+ list_move(&em->list,
+ &em_tree->modified_extents);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
@@ -952,11 +952,9 @@ static noinline int cow_file_range(struct inode *inode,
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
}
- ret = 0;
out:
- btrfs_end_transaction(trans, root);
-
return ret;
+
out_unlock:
extent_clear_unlock_delalloc(inode,
&BTRFS_I(inode)->io_tree,
@@ -971,6 +969,39 @@ out_unlock:
goto out;
}
+static noinline int cow_file_range(struct inode *inode,
+ struct page *locked_page,
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written,
+ int unlock)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret;
+
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ extent_clear_unlock_delalloc(inode,
+ &BTRFS_I(inode)->io_tree,
+ start, end, locked_page,
+ EXTENT_CLEAR_UNLOCK_PAGE |
+ EXTENT_CLEAR_UNLOCK |
+ EXTENT_CLEAR_DELALLOC |
+ EXTENT_CLEAR_DIRTY |
+ EXTENT_SET_WRITEBACK |
+ EXTENT_END_WRITEBACK);
+ return PTR_ERR(trans);
+ }
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+
+ ret = __cow_file_range(trans, inode, root, locked_page, start, end,
+ page_started, nr_written, unlock);
+
+ btrfs_end_transaction(trans, root);
+
+ return ret;
+}
+
/*
* work queue call back to started compression on a file and pages
*/
@@ -1126,6 +1157,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
u64 extent_offset;
u64 disk_bytenr;
u64 num_bytes;
+ u64 disk_num_bytes;
int extent_type;
int ret, err;
int type;
@@ -1228,6 +1260,8 @@ next_slot:
extent_offset = btrfs_file_extent_offset(leaf, fi);
extent_end = found_key.offset +
btrfs_file_extent_num_bytes(leaf, fi);
+ disk_num_bytes =
+ btrfs_file_extent_disk_num_bytes(leaf, fi);
if (extent_end <= start) {
path->slots[0]++;
goto next_slot;
@@ -1281,9 +1315,9 @@ out_check:
btrfs_release_path(path);
if (cow_start != (u64)-1) {
- ret = cow_file_range(inode, locked_page, cow_start,
- found_key.offset - 1, page_started,
- nr_written, 1);
+ ret = __cow_file_range(trans, inode, root, locked_page,
+ cow_start, found_key.offset - 1,
+ page_started, nr_written, 1);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto error;
@@ -1298,16 +1332,21 @@ out_check:
em = alloc_extent_map();
BUG_ON(!em); /* -ENOMEM */
em->start = cur_offset;
- em->orig_start = em->start;
+ em->orig_start = found_key.offset - extent_offset;
em->len = num_bytes;
em->block_len = num_bytes;
em->block_start = disk_bytenr;
+ em->orig_block_len = disk_num_bytes;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
- set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+ set_bit(EXTENT_FLAG_FILLING, &em->flags);
+ em->generation = -1;
while (1) {
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ if (!ret)
+ list_move(&em->list,
+ &em_tree->modified_extents);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
@@ -1352,8 +1391,9 @@ out_check:
}
if (cow_start != (u64)-1) {
- ret = cow_file_range(inode, locked_page, cow_start, end,
- page_started, nr_written, 1);
+ ret = __cow_file_range(trans, inode, root, locked_page,
+ cow_start, end,
+ page_started, nr_written, 1);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
goto error;
@@ -1531,7 +1571,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
unsigned long bio_flags)
{
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
- struct btrfs_mapping_tree *map_tree;
u64 logical = (u64)bio->bi_sector << 9;
u64 length = 0;
u64 map_length;
@@ -1541,11 +1580,10 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
return 0;
length = bio->bi_size;
- map_tree = &root->fs_info->mapping_tree;
map_length = length;
- ret = btrfs_map_block(map_tree, READ, logical,
+ ret = btrfs_map_block(root->fs_info, READ, logical,
&map_length, NULL, 0);
- /* Will always return 0 or 1 with map_multi == NULL */
+ /* Will always return 0 with map_multi == NULL */
BUG_ON(ret < 0);
if (map_length < length + size)
return 1;
@@ -1586,7 +1624,12 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio,
u64 bio_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
- return btrfs_map_bio(root, rw, bio, mirror_num, 1);
+ int ret;
+
+ ret = btrfs_map_bio(root, rw, bio, mirror_num, 1);
+ if (ret)
+ bio_endio(bio, ret);
+ return ret;
}
/*
@@ -1601,6 +1644,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
int ret = 0;
int skip_sum;
int metadata = 0;
+ int async = !atomic_read(&BTRFS_I(inode)->sync_writers);
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
@@ -1610,31 +1654,43 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
if (!(rw & REQ_WRITE)) {
ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata);
if (ret)
- return ret;
+ goto out;
if (bio_flags & EXTENT_BIO_COMPRESSED) {
- return btrfs_submit_compressed_read(inode, bio,
- mirror_num, bio_flags);
+ ret = btrfs_submit_compressed_read(inode, bio,
+ mirror_num,
+ bio_flags);
+ goto out;
} else if (!skip_sum) {
ret = btrfs_lookup_bio_sums(root, inode, bio, NULL);
if (ret)
- return ret;
+ goto out;
}
goto mapit;
- } else if (!skip_sum) {
+ } else if (async && !skip_sum) {
/* csum items have already been cloned */
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
goto mapit;
/* we're doing a write, do the async checksumming */
- return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
+ ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
inode, rw, bio, mirror_num,
bio_flags, bio_offset,
__btrfs_submit_bio_start,
__btrfs_submit_bio_done);
+ goto out;
+ } else if (!skip_sum) {
+ ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
+ if (ret)
+ goto out;
}
mapit:
- return btrfs_map_bio(root, rw, bio, mirror_num, 0);
+ ret = btrfs_map_bio(root, rw, bio, mirror_num, 0);
+
+out:
+ if (ret < 0)
+ bio_endio(bio, ret);
+ return ret;
}
/*
@@ -1657,8 +1713,7 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state)
{
- if ((end & (PAGE_CACHE_SIZE - 1)) == 0)
- WARN_ON(1);
+ WARN_ON((end & (PAGE_CACHE_SIZE - 1)) == 0);
return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
cached_state, GFP_NOFS);
}
@@ -1867,22 +1922,20 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
- ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
- if (!ret) {
- if (nolock)
- trans = btrfs_join_transaction_nolock(root);
- else
- trans = btrfs_join_transaction(root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- trans = NULL;
- goto out;
- }
- trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- ret = btrfs_update_inode_fallback(trans, root, inode);
- if (ret) /* -ENOMEM or corruption */
- btrfs_abort_transaction(trans, root, ret);
+ btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+ if (nolock)
+ trans = btrfs_join_transaction_nolock(root);
+ else
+ trans = btrfs_join_transaction(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+ trans = NULL;
+ goto out;
}
+ trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+ ret = btrfs_update_inode_fallback(trans, root, inode);
+ if (ret) /* -ENOMEM or corruption */
+ btrfs_abort_transaction(trans, root, ret);
goto out;
}
@@ -1931,15 +1984,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
add_pending_csums(trans, inode, ordered_extent->file_offset,
&ordered_extent->list);
- ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
- if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
- ret = btrfs_update_inode_fallback(trans, root, inode);
- if (ret) { /* -ENOMEM or corruption */
- btrfs_abort_transaction(trans, root, ret);
- goto out_unlock;
- }
- } else {
- btrfs_set_inode_last_trans(trans, inode);
+ btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+ ret = btrfs_update_inode_fallback(trans, root, inode);
+ if (ret) { /* -ENOMEM or corruption */
+ btrfs_abort_transaction(trans, root, ret);
+ goto out_unlock;
}
ret = 0;
out_unlock:
@@ -3074,7 +3123,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
struct btrfs_trans_handle *trans;
struct inode *inode = dentry->d_inode;
int ret;
- unsigned long nr = 0;
trans = __unlink_start_trans(dir, dentry);
if (IS_ERR(trans))
@@ -3094,9 +3142,8 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
}
out:
- nr = trans->blocks_used;
__unlink_end_trans(trans, root);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
return ret;
}
@@ -3186,7 +3233,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
int err = 0;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_trans_handle *trans;
- unsigned long nr = 0;
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
@@ -3215,9 +3261,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
if (!err)
btrfs_i_size_write(inode, 0);
out:
- nr = trans->blocks_used;
__unlink_end_trans(trans, root);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
return err;
}
@@ -3497,11 +3542,11 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
if (ret)
goto out;
- ret = -ENOMEM;
again:
page = find_or_create_page(mapping, index, mask);
if (!page) {
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
+ ret = -ENOMEM;
goto out;
}
@@ -3550,7 +3595,6 @@ again:
goto out_unlock;
}
- ret = 0;
if (offset != PAGE_CACHE_SIZE) {
if (!len)
len = PAGE_CACHE_SIZE - offset;
@@ -3668,6 +3712,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
hole_em->block_start = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
+ hole_em->orig_block_len = 0;
hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
hole_em->compress_type = BTRFS_COMPRESS_NONE;
hole_em->generation = trans->transid;
@@ -3783,7 +3828,6 @@ void btrfs_evict_inode(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_block_rsv *rsv, *global_rsv;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
- unsigned long nr;
int ret;
trace_btrfs_inode_evict(inode);
@@ -3829,7 +3873,8 @@ void btrfs_evict_inode(struct inode *inode)
* inode item when doing the truncate.
*/
while (1) {
- ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size);
+ ret = btrfs_block_rsv_refill(root, rsv, min_size,
+ BTRFS_RESERVE_FLUSH_LIMIT);
/*
* Try and steal from the global reserve since we will
@@ -3847,7 +3892,7 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}
- trans = btrfs_start_transaction_noflush(root, 1);
+ trans = btrfs_start_transaction_lflush(root, 1);
if (IS_ERR(trans)) {
btrfs_orphan_del(NULL, inode);
btrfs_free_block_rsv(root, rsv);
@@ -3864,10 +3909,9 @@ void btrfs_evict_inode(struct inode *inode)
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
trans = NULL;
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
}
btrfs_free_block_rsv(root, rsv);
@@ -3883,9 +3927,8 @@ void btrfs_evict_inode(struct inode *inode)
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
btrfs_return_ino(root, btrfs_ino(inode));
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
no_delete:
clear_inode(inode);
return;
@@ -4775,8 +4818,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
if (S_ISREG(mode)) {
if (btrfs_test_opt(root, NODATASUM))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
- if (btrfs_test_opt(root, NODATACOW) ||
- (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW))
+ if (btrfs_test_opt(root, NODATACOW))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW;
}
@@ -4842,7 +4884,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
ret = btrfs_insert_dir_item(trans, root, name, name_len,
parent_inode, &key,
btrfs_inode_type(inode), index);
- if (ret == -EEXIST)
+ if (ret == -EEXIST || ret == -EOVERFLOW)
goto fail_dir_item;
else if (ret) {
btrfs_abort_transaction(trans, root, ret);
@@ -4897,7 +4939,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
int err;
int drop_inode = 0;
u64 objectid;
- unsigned long nr = 0;
u64 index = 0;
if (!new_valid_dev(rdev))
@@ -4930,6 +4971,12 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
goto out_unlock;
}
+ err = btrfs_update_inode(trans, root, inode);
+ if (err) {
+ drop_inode = 1;
+ goto out_unlock;
+ }
+
/*
* If the active LSM wants to access the inode during
* d_instantiate it needs these. Smack checks to see
@@ -4947,9 +4994,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
d_instantiate(dentry, inode);
}
out_unlock:
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
@@ -4963,9 +5009,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = NULL;
- int drop_inode = 0;
+ int drop_inode_on_err = 0;
int err;
- unsigned long nr = 0;
u64 objectid;
u64 index = 0;
@@ -4989,12 +5034,15 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
err = PTR_ERR(inode);
goto out_unlock;
}
+ drop_inode_on_err = 1;
err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
- if (err) {
- drop_inode = 1;
+ if (err)
+ goto out_unlock;
+
+ err = btrfs_update_inode(trans, root, inode);
+ if (err)
goto out_unlock;
- }
/*
* If the active LSM wants to access the inode during
@@ -5007,21 +5055,20 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
- drop_inode = 1;
- else {
- inode->i_mapping->a_ops = &btrfs_aops;
- inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
- BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
- d_instantiate(dentry, inode);
- }
+ goto out_unlock;
+
+ inode->i_mapping->a_ops = &btrfs_aops;
+ inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+ BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+ d_instantiate(dentry, inode);
+
out_unlock:
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
- if (drop_inode) {
+ if (err && drop_inode_on_err) {
inode_dec_link_count(inode);
iput(inode);
}
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
return err;
}
@@ -5032,7 +5079,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
struct btrfs_root *root = BTRFS_I(dir)->root;
struct inode *inode = old_dentry->d_inode;
u64 index;
- unsigned long nr = 0;
int err;
int drop_inode = 0;
@@ -5062,6 +5108,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
inode_inc_iversion(inode);
inode->i_ctime = CURRENT_TIME;
ihold(inode);
+ set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
@@ -5076,14 +5123,13 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
btrfs_log_new_name(trans, inode, NULL, parent);
}
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
fail:
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
}
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
return err;
}
@@ -5096,7 +5142,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
int drop_on_err = 0;
u64 objectid = 0;
u64 index = 0;
- unsigned long nr = 1;
/*
* 2 items for inode and ref
@@ -5142,11 +5187,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
drop_on_err = 0;
out_fail:
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
if (drop_on_err)
iput(inode);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
return err;
}
@@ -5340,6 +5384,7 @@ again:
if (start + len <= found_key.offset)
goto not_found;
em->start = start;
+ em->orig_start = start;
em->len = found_key.offset - start;
goto not_found_em;
}
@@ -5350,6 +5395,8 @@ again:
em->len = extent_end - extent_start;
em->orig_start = extent_start -
btrfs_file_extent_offset(leaf, item);
+ em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf,
+ item);
bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
if (bytenr == 0) {
em->block_start = EXTENT_MAP_HOLE;
@@ -5359,8 +5406,7 @@ again:
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
em->compress_type = compress_type;
em->block_start = bytenr;
- em->block_len = btrfs_file_extent_disk_num_bytes(leaf,
- item);
+ em->block_len = em->orig_block_len;
} else {
bytenr += btrfs_file_extent_offset(leaf, item);
em->block_start = bytenr;
@@ -5390,7 +5436,8 @@ again:
em->start = extent_start + extent_offset;
em->len = (copy_size + root->sectorsize - 1) &
~((u64)root->sectorsize - 1);
- em->orig_start = EXTENT_MAP_INLINE;
+ em->orig_block_len = em->len;
+ em->orig_start = em->start;
if (compress_type) {
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
em->compress_type = compress_type;
@@ -5439,11 +5486,11 @@ again:
extent_map_end(em) - 1, NULL, GFP_NOFS);
goto insert;
} else {
- printk(KERN_ERR "btrfs unknown found_type %d\n", found_type);
- WARN_ON(1);
+ WARN(1, KERN_ERR "btrfs unknown found_type %d\n", found_type);
}
not_found:
em->start = start;
+ em->orig_start = start;
em->len = len;
not_found_em:
em->block_start = EXTENT_MAP_HOLE;
@@ -5645,38 +5692,19 @@ out:
}
static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
- struct extent_map *em,
u64 start, u64 len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
+ struct extent_map *em;
struct btrfs_key ins;
u64 alloc_hint;
int ret;
- bool insert = false;
-
- /*
- * Ok if the extent map we looked up is a hole and is for the exact
- * range we want, there is no reason to allocate a new one, however if
- * it is not right then we need to free this one and drop the cache for
- * our range.
- */
- if (em->block_start != EXTENT_MAP_HOLE || em->start != start ||
- em->len != len) {
- free_extent_map(em);
- em = NULL;
- insert = true;
- btrfs_drop_extent_cache(inode, start, start + len - 1, 0);
- }
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
return ERR_CAST(trans);
- if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024)
- btrfs_add_inode_defrag(trans, inode);
-
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
alloc_hint = get_extent_allocation_hint(inode, start, len);
@@ -5687,37 +5715,10 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
goto out;
}
- if (!em) {
- em = alloc_extent_map();
- if (!em) {
- em = ERR_PTR(-ENOMEM);
- goto out;
- }
- }
-
- em->start = start;
- em->orig_start = em->start;
- em->len = ins.offset;
-
- em->block_start = ins.objectid;
- em->block_len = ins.offset;
- em->bdev = root->fs_info->fs_devices->latest_bdev;
-
- /*
- * We need to do this because if we're using the original em we searched
- * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that.
- */
- em->flags = 0;
- set_bit(EXTENT_FLAG_PINNED, &em->flags);
-
- while (insert) {
- write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
- write_unlock(&em_tree->lock);
- if (ret != -EEXIST)
- break;
- btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0);
- }
+ em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
+ ins.offset, ins.offset, 0);
+ if (IS_ERR(em))
+ goto out;
ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
ins.offset, ins.offset, 0);
@@ -5894,7 +5895,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
u64 len, u64 orig_start,
u64 block_start, u64 block_len,
- int type)
+ u64 orig_block_len, int type)
{
struct extent_map_tree *em_tree;
struct extent_map *em;
@@ -5912,15 +5913,20 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
em->block_len = block_len;
em->block_start = block_start;
em->bdev = root->fs_info->fs_devices->latest_bdev;
+ em->orig_block_len = orig_block_len;
+ em->generation = -1;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
if (type == BTRFS_ORDERED_PREALLOC)
- set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
+ set_bit(EXTENT_FLAG_FILLING, &em->flags);
do {
btrfs_drop_extent_cache(inode, em->start,
em->start + em->len - 1, 0);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);
+ if (!ret)
+ list_move(&em->list,
+ &em_tree->modified_extents);
write_unlock(&em_tree->lock);
} while (ret == -EEXIST);
@@ -6047,13 +6053,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
goto must_cow;
if (can_nocow_odirect(trans, inode, start, len) == 1) {
- u64 orig_start = em->start;
+ u64 orig_start = em->orig_start;
+ u64 orig_block_len = em->orig_block_len;
if (type == BTRFS_ORDERED_PREALLOC) {
free_extent_map(em);
em = create_pinned_em(inode, start, len,
orig_start,
- block_start, len, type);
+ block_start, len,
+ orig_block_len, type);
if (IS_ERR(em)) {
btrfs_end_transaction(trans, root);
goto unlock_err;
@@ -6077,7 +6085,8 @@ must_cow:
* it above
*/
len = bh_result->b_size;
- em = btrfs_new_extent_direct(inode, em, start, len);
+ free_extent_map(em);
+ em = btrfs_new_extent_direct(inode, start, len);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto unlock_err;
@@ -6318,6 +6327,9 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
+ if (async_submit)
+ async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers);
+
bio_get(bio);
if (!write) {
@@ -6362,7 +6374,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
{
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
struct bio *bio;
struct bio *orig_bio = dip->orig_bio;
struct bio_vec *bvec = orig_bio->bi_io_vec;
@@ -6375,7 +6386,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
int async_submit = 0;
map_length = orig_bio->bi_size;
- ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+ ret = btrfs_map_block(root->fs_info, READ, start_sector << 9,
&map_length, NULL, 0);
if (ret) {
bio_put(orig_bio);
@@ -6429,7 +6440,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
bio->bi_end_io = btrfs_end_dio_bio;
map_length = orig_bio->bi_size;
- ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+ ret = btrfs_map_block(root->fs_info, READ,
+ start_sector << 9,
&map_length, NULL, 0);
if (ret) {
bio_put(bio);
@@ -6582,9 +6594,17 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
btrfs_submit_direct, 0);
}
+#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
+
static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len)
{
+ int ret;
+
+ ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS);
+ if (ret)
+ return ret;
+
return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap);
}
@@ -6855,7 +6875,6 @@ static int btrfs_truncate(struct inode *inode)
int ret;
int err = 0;
struct btrfs_trans_handle *trans;
- unsigned long nr;
u64 mask = root->sectorsize - 1;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
@@ -6978,9 +6997,8 @@ static int btrfs_truncate(struct inode *inode)
break;
}
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
trans = btrfs_start_transaction(root, 2);
if (IS_ERR(trans)) {
@@ -7014,9 +7032,8 @@ static int btrfs_truncate(struct inode *inode)
if (ret && !err)
err = ret;
- nr = trans->blocks_used;
ret = btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
}
out:
@@ -7093,6 +7110,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
extent_io_tree_init(&ei->io_failure_tree, &inode->i_data);
ei->io_tree.track_uptodate = 1;
ei->io_failure_tree.track_uptodate = 1;
+ atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex);
mutex_init(&ei->delalloc_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
@@ -7203,6 +7221,8 @@ void btrfs_destroy_cachep(void)
kmem_cache_destroy(btrfs_path_cachep);
if (btrfs_free_space_cachep)
kmem_cache_destroy(btrfs_free_space_cachep);
+ if (btrfs_delalloc_work_cachep)
+ kmem_cache_destroy(btrfs_delalloc_work_cachep);
}
int btrfs_init_cachep(void)
@@ -7237,6 +7257,13 @@ int btrfs_init_cachep(void)
if (!btrfs_free_space_cachep)
goto fail;
+ btrfs_delalloc_work_cachep = kmem_cache_create("btrfs_delalloc_work",
+ sizeof(struct btrfs_delalloc_work), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+ NULL);
+ if (!btrfs_delalloc_work_cachep)
+ goto fail;
+
return 0;
fail:
btrfs_destroy_cachep();
@@ -7308,6 +7335,28 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (S_ISDIR(old_inode->i_mode) && new_inode &&
new_inode->i_size > BTRFS_EMPTY_DIR_SIZE)
return -ENOTEMPTY;
+
+
+ /* check for collisions, even if the name isn't there */
+ ret = btrfs_check_dir_item_collision(root, new_dir->i_ino,
+ new_dentry->d_name.name,
+ new_dentry->d_name.len);
+
+ if (ret) {
+ if (ret == -EEXIST) {
+ /* we shouldn't get
+ * eexist without a new_inode */
+ if (!new_inode) {
+ WARN_ON(1);
+ return ret;
+ }
+ } else {
+ /* maybe -EOVERFLOW */
+ return ret;
+ }
+ }
+ ret = 0;
+
/*
* we're using rename to replace one file with another.
* and the replacement file is large. Start IO on it now so
@@ -7447,6 +7496,49 @@ out_notrans:
return ret;
}
+static void btrfs_run_delalloc_work(struct btrfs_work *work)
+{
+ struct btrfs_delalloc_work *delalloc_work;
+
+ delalloc_work = container_of(work, struct btrfs_delalloc_work,
+ work);
+ if (delalloc_work->wait)
+ btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1);
+ else
+ filemap_flush(delalloc_work->inode->i_mapping);
+
+ if (delalloc_work->delay_iput)
+ btrfs_add_delayed_iput(delalloc_work->inode);
+ else
+ iput(delalloc_work->inode);
+ complete(&delalloc_work->completion);
+}
+
+struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,
+ int wait, int delay_iput)
+{
+ struct btrfs_delalloc_work *work;
+
+ work = kmem_cache_zalloc(btrfs_delalloc_work_cachep, GFP_NOFS);
+ if (!work)
+ return NULL;
+
+ init_completion(&work->completion);
+ INIT_LIST_HEAD(&work->list);
+ work->inode = inode;
+ work->wait = wait;
+ work->delay_iput = delay_iput;
+ work->work.func = btrfs_run_delalloc_work;
+
+ return work;
+}
+
+void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
+{
+ wait_for_completion(&work->completion);
+ kmem_cache_free(btrfs_delalloc_work_cachep, work);
+}
+
/*
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
@@ -7456,10 +7548,15 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
struct list_head *head = &root->fs_info->delalloc_inodes;
struct btrfs_inode *binode;
struct inode *inode;
+ struct btrfs_delalloc_work *work, *next;
+ struct list_head works;
+ int ret = 0;
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;
+ INIT_LIST_HEAD(&works);
+
spin_lock(&root->fs_info->delalloc_lock);
while (!list_empty(head)) {
binode = list_entry(head->next, struct btrfs_inode,
@@ -7469,11 +7566,14 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
list_del_init(&binode->delalloc_inodes);
spin_unlock(&root->fs_info->delalloc_lock);
if (inode) {
- filemap_flush(inode->i_mapping);
- if (delay_iput)
- btrfs_add_delayed_iput(inode);
- else
- iput(inode);
+ work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
+ if (!work) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ list_add_tail(&work->list, &works);
+ btrfs_queue_worker(&root->fs_info->flush_workers,
+ &work->work);
}
cond_resched();
spin_lock(&root->fs_info->delalloc_lock);
@@ -7492,7 +7592,12 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
atomic_read(&root->fs_info->async_delalloc_pages) == 0));
}
atomic_dec(&root->fs_info->async_submit_draining);
- return 0;
+out:
+ list_for_each_entry_safe(work, next, &works, list) {
+ list_del_init(&work->list);
+ btrfs_wait_and_free_delalloc_work(work);
+ }
+ return ret;
}
static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
@@ -7512,7 +7617,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
unsigned long ptr;
struct btrfs_file_extent_item *ei;
struct extent_buffer *leaf;
- unsigned long nr = 0;
name_len = strlen(symname) + 1;
if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
@@ -7610,13 +7714,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
out_unlock:
if (!err)
d_instantiate(dentry, inode);
- nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
if (drop_inode) {
inode_dec_link_count(inode);
iput(inode);
}
- btrfs_btree_balance_dirty(root, nr);
+ btrfs_btree_balance_dirty(root);
return err;
}
@@ -7679,6 +7782,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
em->len = ins.offset;
em->block_start = ins.objectid;
em->block_len = ins.offset;
+ em->orig_block_len = ins.offset;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
em->generation = trans->transid;