summaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/acl.c2
-rw-r--r--fs/btrfs/backref.c3
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.c395
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/inode.c57
-rw-r--r--fs/btrfs/ioctl.c9
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/relocation.c18
-rw-r--r--fs/btrfs/send.c179
-rw-r--r--fs/btrfs/transaction.c6
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c2
13 files changed, 451 insertions, 228 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index e15d2b0..0890c83 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -229,7 +229,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
if (ret > 0) {
/* we need an acl */
ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
- } else {
+ } else if (ret < 0) {
cache_no_acl(inode);
}
} else {
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 0552a59..5eb50b5 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -185,6 +185,9 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id,
{
struct __prelim_ref *ref;
+ if (root_id == BTRFS_DATA_RELOC_TREE_OBJECTID)
+ return 0;
+
ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
if (!ref)
return -ENOMEM;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 6aad98c..6e9ff8f 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1012,6 +1012,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
bytes = min(bytes, working_bytes);
kaddr = kmap_atomic(page_out);
memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
+ if (*pg_index == (vcnt - 1) && *pg_offset == 0)
+ memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
kunmap_atomic(kaddr);
flush_dcache_page(page_out);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 61b5bcd..c1123ec 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -39,7 +39,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
struct extent_buffer *src_buf);
static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
int level, int slot);
-static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb);
static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
@@ -475,6 +475,8 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
* the index is the shifted logical of the *new* root node for root replace
* operations, or the shifted logical of the affected block for all other
* operations.
+ *
+ * Note: must be called with write lock (tree_mod_log_write_lock).
*/
static noinline int
__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
@@ -483,24 +485,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
struct rb_node **new;
struct rb_node *parent = NULL;
struct tree_mod_elem *cur;
- int ret = 0;
BUG_ON(!tm);
- tree_mod_log_write_lock(fs_info);
- if (list_empty(&fs_info->tree_mod_seq_list)) {
- tree_mod_log_write_unlock(fs_info);
- /*
- * Ok we no longer care about logging modifications, free up tm
- * and return 0. Any callers shouldn't be using tm after
- * calling tree_mod_log_insert, but if they do we can just
- * change this to return a special error code to let the callers
- * do their own thing.
- */
- kfree(tm);
- return 0;
- }
-
spin_lock(&fs_info->tree_mod_seq_lock);
tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
@@ -518,18 +505,13 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
new = &((*new)->rb_left);
else if (cur->seq > tm->seq)
new = &((*new)->rb_right);
- else {
- ret = -EEXIST;
- kfree(tm);
- goto out;
- }
+ else
+ return -EEXIST;
}
rb_link_node(&tm->node, parent, new);
rb_insert_color(&tm->node, tm_root);
-out:
- tree_mod_log_write_unlock(fs_info);
- return ret;
+ return 0;
}
/*
@@ -545,19 +527,38 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
return 1;
if (eb && btrfs_header_level(eb) == 0)
return 1;
+
+ tree_mod_log_write_lock(fs_info);
+ if (list_empty(&(fs_info)->tree_mod_seq_list)) {
+ tree_mod_log_write_unlock(fs_info);
+ return 1;
+ }
+
return 0;
}
-static inline int
-__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int slot,
- enum mod_log_op op, gfp_t flags)
+/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
+static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb)
+{
+ smp_mb();
+ if (list_empty(&(fs_info)->tree_mod_seq_list))
+ return 0;
+ if (eb && btrfs_header_level(eb) == 0)
+ return 0;
+
+ return 1;
+}
+
+static struct tree_mod_elem *
+alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
+ enum mod_log_op op, gfp_t flags)
{
struct tree_mod_elem *tm;
tm = kzalloc(sizeof(*tm), flags);
if (!tm)
- return -ENOMEM;
+ return NULL;
tm->index = eb->start >> PAGE_CACHE_SHIFT;
if (op != MOD_LOG_KEY_ADD) {
@@ -567,8 +568,9 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
tm->op = op;
tm->slot = slot;
tm->generation = btrfs_node_ptr_generation(eb, slot);
+ RB_CLEAR_NODE(&tm->node);
- return __tree_mod_log_insert(fs_info, tm);
+ return tm;
}
static noinline int
@@ -576,10 +578,27 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int slot,
enum mod_log_op op, gfp_t flags)
{
- if (tree_mod_dont_log(fs_info, eb))
+ struct tree_mod_elem *tm;
+ int ret;
+
+ if (!tree_mod_need_log(fs_info, eb))
return 0;
- return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
+ tm = alloc_tree_mod_elem(eb, slot, op, flags);
+ if (!tm)
+ return -ENOMEM;
+
+ if (tree_mod_dont_log(fs_info, eb)) {
+ kfree(tm);
+ return 0;
+ }
+
+ ret = __tree_mod_log_insert(fs_info, tm);
+ tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ kfree(tm);
+
+ return ret;
}
static noinline int
@@ -587,53 +606,95 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int dst_slot, int src_slot,
int nr_items, gfp_t flags)
{
- struct tree_mod_elem *tm;
- int ret;
+ struct tree_mod_elem *tm = NULL;
+ struct tree_mod_elem **tm_list = NULL;
+ int ret = 0;
int i;
+ int locked = 0;
- if (tree_mod_dont_log(fs_info, eb))
+ if (!tree_mod_need_log(fs_info, eb))
return 0;
+ tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags);
+ if (!tm_list)
+ return -ENOMEM;
+
+ tm = kzalloc(sizeof(*tm), flags);
+ if (!tm) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+
+ tm->index = eb->start >> PAGE_CACHE_SHIFT;
+ tm->slot = src_slot;
+ tm->move.dst_slot = dst_slot;
+ tm->move.nr_items = nr_items;
+ tm->op = MOD_LOG_MOVE_KEYS;
+
+ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
+ tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
+ MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+
+ if (tree_mod_dont_log(fs_info, eb))
+ goto free_tms;
+ locked = 1;
+
/*
* When we override something during the move, we log these removals.
* This can only happen when we move towards the beginning of the
* buffer, i.e. dst_slot < src_slot.
*/
for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
- ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
- MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
- BUG_ON(ret < 0);
+ ret = __tree_mod_log_insert(fs_info, tm_list[i]);
+ if (ret)
+ goto free_tms;
}
- tm = kzalloc(sizeof(*tm), flags);
- if (!tm)
- return -ENOMEM;
+ ret = __tree_mod_log_insert(fs_info, tm);
+ if (ret)
+ goto free_tms;
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
- tm->index = eb->start >> PAGE_CACHE_SHIFT;
- tm->slot = src_slot;
- tm->move.dst_slot = dst_slot;
- tm->move.nr_items = nr_items;
- tm->op = MOD_LOG_MOVE_KEYS;
+ return 0;
+free_tms:
+ for (i = 0; i < nr_items; i++) {
+ if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+ rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+ kfree(tm_list[i]);
+ }
+ if (locked)
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+ kfree(tm);
- return __tree_mod_log_insert(fs_info, tm);
+ return ret;
}
-static inline void
-__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
+static inline int
+__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+ struct tree_mod_elem **tm_list,
+ int nritems)
{
- int i;
- u32 nritems;
+ int i, j;
int ret;
- if (btrfs_header_level(eb) == 0)
- return;
-
- nritems = btrfs_header_nritems(eb);
for (i = nritems - 1; i >= 0; i--) {
- ret = __tree_mod_log_insert_key(fs_info, eb, i,
- MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
- BUG_ON(ret < 0);
+ ret = __tree_mod_log_insert(fs_info, tm_list[i]);
+ if (ret) {
+ for (j = nritems - 1; j > i; j--)
+ rb_erase(&tm_list[j]->node,
+ &fs_info->tree_mod_log);
+ return ret;
+ }
}
+
+ return 0;
}
static noinline int
@@ -642,17 +703,38 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
struct extent_buffer *new_root, gfp_t flags,
int log_removal)
{
- struct tree_mod_elem *tm;
+ struct tree_mod_elem *tm = NULL;
+ struct tree_mod_elem **tm_list = NULL;
+ int nritems = 0;
+ int ret = 0;
+ int i;
- if (tree_mod_dont_log(fs_info, NULL))
+ if (!tree_mod_need_log(fs_info, NULL))
return 0;
- if (log_removal)
- __tree_mod_log_free_eb(fs_info, old_root);
+ if (log_removal && btrfs_header_level(old_root) > 0) {
+ nritems = btrfs_header_nritems(old_root);
+ tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+ flags);
+ if (!tm_list) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ for (i = 0; i < nritems; i++) {
+ tm_list[i] = alloc_tree_mod_elem(old_root, i,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+ }
tm = kzalloc(sizeof(*tm), flags);
- if (!tm)
- return -ENOMEM;
+ if (!tm) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
tm->index = new_root->start >> PAGE_CACHE_SHIFT;
tm->old_root.logical = old_root->start;
@@ -660,7 +742,30 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
tm->generation = btrfs_header_generation(old_root);
tm->op = MOD_LOG_ROOT_REPLACE;
- return __tree_mod_log_insert(fs_info, tm);
+ if (tree_mod_dont_log(fs_info, NULL))
+ goto free_tms;
+
+ if (tm_list)
+ ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
+ if (!ret)
+ ret = __tree_mod_log_insert(fs_info, tm);
+
+ tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ goto free_tms;
+ kfree(tm_list);
+
+ return ret;
+
+free_tms:
+ if (tm_list) {
+ for (i = 0; i < nritems; i++)
+ kfree(tm_list[i]);
+ kfree(tm_list);
+ }
+ kfree(tm);
+
+ return ret;
}
static struct tree_mod_elem *
@@ -729,31 +834,75 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
return __tree_mod_log_search(fs_info, start, min_seq, 0);
}
-static noinline void
+static noinline int
tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
struct extent_buffer *src, unsigned long dst_offset,
unsigned long src_offset, int nr_items)
{
- int ret;
+ int ret = 0;
+ struct tree_mod_elem **tm_list = NULL;
+ struct tree_mod_elem **tm_list_add, **tm_list_rem;
int i;
+ int locked = 0;
- if (tree_mod_dont_log(fs_info, NULL))
- return;
+ if (!tree_mod_need_log(fs_info, NULL))
+ return 0;
if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
- return;
+ return 0;
+ tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *),
+ GFP_NOFS);
+ if (!tm_list)
+ return -ENOMEM;
+
+ tm_list_add = tm_list;
+ tm_list_rem = tm_list + nr_items;
for (i = 0; i < nr_items; i++) {
- ret = __tree_mod_log_insert_key(fs_info, src,
- i + src_offset,
- MOD_LOG_KEY_REMOVE, GFP_NOFS);
- BUG_ON(ret < 0);
- ret = __tree_mod_log_insert_key(fs_info, dst,
- i + dst_offset,
- MOD_LOG_KEY_ADD,
- GFP_NOFS);
- BUG_ON(ret < 0);
+ tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
+ MOD_LOG_KEY_REMOVE, GFP_NOFS);
+ if (!tm_list_rem[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+
+ tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
+ MOD_LOG_KEY_ADD, GFP_NOFS);
+ if (!tm_list_add[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+
+ if (tree_mod_dont_log(fs_info, NULL))
+ goto free_tms;
+ locked = 1;
+
+ for (i = 0; i < nr_items; i++) {
+ ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
+ if (ret)
+ goto free_tms;
+ ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
+ if (ret)
+ goto free_tms;
+ }
+
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+
+ return 0;
+
+free_tms:
+ for (i = 0; i < nr_items * 2; i++) {
+ if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+ rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+ kfree(tm_list[i]);
}
+ if (locked)
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+
+ return ret;
}
static inline void
@@ -772,18 +921,58 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
{
int ret;
- ret = __tree_mod_log_insert_key(fs_info, eb, slot,
+ ret = tree_mod_log_insert_key(fs_info, eb, slot,
MOD_LOG_KEY_REPLACE,
atomic ? GFP_ATOMIC : GFP_NOFS);
BUG_ON(ret < 0);
}
-static noinline void
+static noinline int
tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
{
+ struct tree_mod_elem **tm_list = NULL;
+ int nritems = 0;
+ int i;
+ int ret = 0;
+
+ if (btrfs_header_level(eb) == 0)
+ return 0;
+
+ if (!tree_mod_need_log(fs_info, NULL))
+ return 0;
+
+ nritems = btrfs_header_nritems(eb);
+ tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+ GFP_NOFS);
+ if (!tm_list)
+ return -ENOMEM;
+
+ for (i = 0; i < nritems; i++) {
+ tm_list[i] = alloc_tree_mod_elem(eb, i,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+
if (tree_mod_dont_log(fs_info, eb))
- return;
- __tree_mod_log_free_eb(fs_info, eb);
+ goto free_tms;
+
+ ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
+ tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ goto free_tms;
+ kfree(tm_list);
+
+ return 0;
+
+free_tms:
+ for (i = 0; i < nritems; i++)
+ kfree(tm_list[i]);
+ kfree(tm_list);
+
+ return ret;
}
static noinline void
@@ -1041,8 +1230,13 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
- if (last_ref)
- tree_mod_log_free_eb(root->fs_info, buf);
+ if (last_ref) {
+ ret = tree_mod_log_free_eb(root->fs_info, buf);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
+ }
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
}
@@ -2758,7 +2952,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
int level;
int lowest_unlock = 1;
u8 lowest_level = 0;
- int prev_cmp;
+ int prev_cmp = -1;
lowest_level = p->lowest_level;
WARN_ON(p->nodes[0] != NULL);
@@ -2769,7 +2963,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
}
again:
- prev_cmp = -1;
b = get_old_root(root, time_seq);
level = btrfs_header_level(b);
p->locks[level] = BTRFS_READ_LOCK;
@@ -2787,6 +2980,11 @@ again:
*/
btrfs_unlock_up_safe(p, level + 1);
+ /*
+ * Since we can unwind eb's we want to do a real search every
+ * time.
+ */
+ prev_cmp = -1;
ret = key_search(b, key, level, &prev_cmp, &slot);
if (level != 0) {
@@ -3019,8 +3217,12 @@ static int push_node_left(struct btrfs_trans_handle *trans,
} else
push_items = min(src_nritems - 8, push_items);
- tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
- push_items);
+ ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
+ push_items);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(dst_nritems),
btrfs_node_key_ptr_offset(0),
@@ -3090,8 +3292,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
(dst_nritems) *
sizeof(struct btrfs_key_ptr));
- tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
- src_nritems - push_items, push_items);
+ ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
+ src_nritems - push_items, push_items);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3292,7 +3498,12 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_header_chunk_tree_uuid(split),
BTRFS_UUID_SIZE);
- tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
+ ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0,
+ mid, c_nritems - mid);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(split, c,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(mid),
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d58bef1..b256ddc 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7718,7 +7718,7 @@ out:
*/
if (!for_reloc && root_dropped == false)
btrfs_add_dead_root(root);
- if (err)
+ if (err && err != -EAGAIN)
btrfs_std_error(root->fs_info, err);
return err;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 51e3afa..3d03d2e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2367,10 +2367,23 @@ out_unlock:
return ret;
}
+static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
+{
+ struct old_sa_defrag_extent *old, *tmp;
+
+ if (!new)
+ return;
+
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
+ kfree(new);
+}
+
static void relink_file_extents(struct new_sa_defrag_extent *new)
{
struct btrfs_path *path;
- struct old_sa_defrag_extent *old, *tmp;
struct sa_defrag_extent_backref *backref;
struct sa_defrag_extent_backref *prev = NULL;
struct inode *inode;
@@ -2413,16 +2426,11 @@ static void relink_file_extents(struct new_sa_defrag_extent *new)
kfree(prev);
btrfs_free_path(path);
-
- list_for_each_entry_safe(old, tmp, &new->head, list) {
- list_del(&old->list);
- kfree(old);
- }
out:
+ free_sa_defrag_extent(new);
+
atomic_dec(&root->fs_info->defrag_running);
wake_up(&root->fs_info->transaction_wait);
-
- kfree(new);
}
static struct new_sa_defrag_extent *
@@ -2432,7 +2440,7 @@ record_old_file_extents(struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
struct btrfs_key key;
- struct old_sa_defrag_extent *old, *tmp;
+ struct old_sa_defrag_extent *old;
struct new_sa_defrag_extent *new;
int ret;
@@ -2480,7 +2488,7 @@ record_old_file_extents(struct inode *inode,
if (slot >= btrfs_header_nritems(l)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
- goto out_free_list;
+ goto out_free_path;
else if (ret > 0)
break;
continue;
@@ -2509,7 +2517,7 @@ record_old_file_extents(struct inode *inode,
old = kmalloc(sizeof(*old), GFP_NOFS);
if (!old)
- goto out_free_list;
+ goto out_free_path;
offset = max(new->file_pos, key.offset);
end = min(new->file_pos + new->len, key.offset + num_bytes);
@@ -2531,15 +2539,10 @@ next:
return new;
-out_free_list:
- list_for_each_entry_safe(old, tmp, &new->head, list) {
- list_del(&old->list);
- kfree(old);
- }
out_free_path:
btrfs_free_path(path);
out_kfree:
- kfree(new);
+ free_sa_defrag_extent(new);
return NULL;
}
@@ -2610,7 +2613,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
EXTENT_DEFRAG, 1, cached_state);
if (ret) {
u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
- if (last_snapshot >= BTRFS_I(inode)->generation)
+ if (0 && last_snapshot >= BTRFS_I(inode)->generation)
/* the inode is shared */
new = record_old_file_extents(inode, ordered_extent);
@@ -2710,8 +2713,14 @@ out:
btrfs_remove_ordered_extent(inode, ordered_extent);
/* for snapshot-aware defrag */
- if (new)
- relink_file_extents(new);
+ if (new) {
+ if (ret) {
+ free_sa_defrag_extent(new);
+ atomic_dec(&root->fs_info->defrag_running);
+ } else {
+ relink_file_extents(new);
+ }
+ }
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
@@ -4345,8 +4354,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
* these flags set. For all other operations the VFS set these flags
* explicitly if it wants a timestamp update.
*/
- if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
- inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
+ if (newsize != oldsize) {
+ inode_inc_iversion(inode);
+ if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
+ inode->i_ctime = inode->i_mtime =
+ current_fs_time(inode->i_sb);
+ }
if (newsize > oldsize) {
truncate_pagecache(inode, newsize);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9d46f60..669eb53 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1548,6 +1548,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
printk(KERN_INFO "btrfs: Snapshot src from "
"another FS\n");
ret = -EINVAL;
+ } else if (!inode_owner_or_capable(src_inode)) {
+ /*
+ * Subvolume creation is not restricted, but snapshots
+ * are limited to own subvolumes only
+ */
+ ret = -EPERM;
} else {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
@@ -2130,7 +2136,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
if (err == -EINTR)
- goto out;
+ goto out_drop_write;
dentry = lookup_one_len(vol_args->name, parent, namelen);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
@@ -2293,6 +2299,7 @@ out_dput:
dput(dentry);
out_unlock_dir:
mutex_unlock(&dir->i_mutex);
+out_drop_write:
mnt_drop_write_file(file);
out:
kfree(vol_args);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index c702cb6..bda1cd8 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -537,7 +537,9 @@ void btrfs_remove_ordered_extent(struct inode *inode,
*/
if (RB_EMPTY_ROOT(&tree->tree) &&
!mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
+ spin_lock(&root->fs_info->ordered_root_lock);
list_del_init(&BTRFS_I(inode)->ordered_operations);
+ spin_unlock(&root->fs_info->ordered_root_lock);
}
if (!root->nr_ordered_extents) {
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 4a35572..26450d8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4481,6 +4481,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret;
u64 disk_bytenr;
+ u64 new_bytenr;
LIST_HEAD(list);
ordered = btrfs_lookup_ordered_extent(inode, file_pos);
@@ -4492,13 +4493,24 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
if (ret)
goto out;
- disk_bytenr = ordered->start;
while (!list_empty(&list)) {
sums = list_entry(list.next, struct btrfs_ordered_sum, list);
list_del_init(&sums->list);
- sums->bytenr = disk_bytenr;
- disk_bytenr += sums->len;
+ /*
+ * We need to offset the new_bytenr based on where the csum is.
+ * We need to do this because we will read in entire prealloc
+ * extents but we may have written to say the middle of the
+ * prealloc extent, so we need to make sure the csum goes with
+ * the right disk offset.
+ *
+ * We can do this because the data reloc inode refers strictly
+ * to the on disk bytes, so we don't have to worry about
+ * disk_len vs real len like with real inodes since it's all
+ * disk length.
+ */
+ new_bytenr = ordered->start + (sums->bytenr - disk_bytenr);
+ sums->bytenr = new_bytenr;
btrfs_add_ordered_sum(inode, ordered, sums);
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index e46e0ed..741c839 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -121,7 +121,6 @@ struct send_ctx {
struct list_head name_cache_list;
int name_cache_size;
- struct file *cur_inode_filp;
char *read_buf;
};
@@ -2120,77 +2119,6 @@ out:
}
/*
- * Called for regular files when sending extents data. Opens a struct file
- * to read from the file.
- */
-static int open_cur_inode_file(struct send_ctx *sctx)
-{
- int ret = 0;
- struct btrfs_key key;
- struct path path;
- struct inode *inode;
- struct dentry *dentry;
- struct file *filp;
- int new = 0;
-
- if (sctx->cur_inode_filp)
- goto out;
-
- key.objectid = sctx->cur_ino;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
-
- inode = btrfs_iget(sctx->send_root->fs_info->sb, &key, sctx->send_root,
- &new);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
- goto out;
- }
-
- dentry = d_obtain_alias(inode);
- inode = NULL;
- if (IS_ERR(dentry)) {
- ret = PTR_ERR(dentry);
- goto out;
- }
-
- path.mnt = sctx->mnt;
- path.dentry = dentry;
- filp = dentry_open(&path, O_RDONLY | O_LARGEFILE, current_cred());
- dput(dentry);
- dentry = NULL;
- if (IS_ERR(filp)) {
- ret = PTR_ERR(filp);
- goto out;
- }
- sctx->cur_inode_filp = filp;
-
-out:
- /*
- * no xxxput required here as every vfs op
- * does it by itself on failure
- */
- return ret;
-}
-
-/*
- * Closes the struct file that was created in open_cur_inode_file
- */
-static int close_cur_inode_file(struct send_ctx *sctx)
-{
- int ret = 0;
-
- if (!sctx->cur_inode_filp)
- goto out;
-
- ret = filp_close(sctx->cur_inode_filp, NULL);
- sctx->cur_inode_filp = NULL;
-
-out:
- return ret;
-}
-
-/*
* Sends a BTRFS_SEND_C_SUBVOL command/item to userspace
*/
static int send_subvol_begin(struct send_ctx *sctx)
@@ -3622,6 +3550,72 @@ out:
return ret;
}
+static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
+{
+ struct btrfs_root *root = sctx->send_root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct inode *inode;
+ struct page *page;
+ char *addr;
+ struct btrfs_key key;
+ pgoff_t index = offset >> PAGE_CACHE_SHIFT;
+ pgoff_t last_index;
+ unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
+ ssize_t ret = 0;
+
+ key.objectid = sctx->cur_ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+
+ if (offset + len > i_size_read(inode)) {
+ if (offset > i_size_read(inode))
+ len = 0;
+ else
+ len = offset - i_size_read(inode);
+ }
+ if (len == 0)
+ goto out;
+
+ last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+ while (index <= last_index) {
+ unsigned cur_len = min_t(unsigned, len,
+ PAGE_CACHE_SIZE - pg_offset);
+ page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ if (!page) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ if (!PageUptodate(page)) {
+ btrfs_readpage(NULL, page);
+ lock_page(page);
+ if (!PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+ ret = -EIO;
+ break;
+ }
+ }
+
+ addr = kmap(page);
+ memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
+ kunmap(page);
+ unlock_page(page);
+ page_cache_release(page);
+ index++;
+ pg_offset = 0;
+ len -= cur_len;
+ ret += cur_len;
+ }
+out:
+ iput(inode);
+ return ret;
+}
+
/*
* Read some bytes from the current inode/file and send a write command to
* user space.
@@ -3630,35 +3624,20 @@ static int send_write(struct send_ctx *sctx, u64 offset, u32 len)
{
int ret = 0;
struct fs_path *p;
- loff_t pos = offset;
- int num_read = 0;
- mm_segment_t old_fs;
+ ssize_t num_read = 0;
p = fs_path_alloc();
if (!p)
return -ENOMEM;
- /*
- * vfs normally only accepts user space buffers for security reasons.
- * we only read from the file and also only provide the read_buf buffer
- * to vfs. As this buffer does not come from a user space call, it's
- * ok to temporary allow kernel space buffers.
- */
- old_fs = get_fs();
- set_fs(KERNEL_DS);
-
verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
- ret = open_cur_inode_file(sctx);
- if (ret < 0)
- goto out;
-
- ret = vfs_read(sctx->cur_inode_filp, sctx->read_buf, len, &pos);
- if (ret < 0)
- goto out;
- num_read = ret;
- if (!num_read)
+ num_read = fill_read_buf(sctx, offset, len);
+ if (num_read <= 0) {
+ if (num_read < 0)
+ ret = num_read;
goto out;
+ }
ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
if (ret < 0)
@@ -3677,7 +3656,6 @@ verbose_printk("btrfs: send_write offset=%llu, len=%d\n", offset, len);
tlv_put_failure:
out:
fs_path_free(p);
- set_fs(old_fs);
if (ret < 0)
return ret;
return num_read;
@@ -4222,10 +4200,6 @@ static int changed_inode(struct send_ctx *sctx,
u64 left_gen = 0;
u64 right_gen = 0;
- ret = close_cur_inode_file(sctx);
- if (ret < 0)
- goto out;
-
sctx->cur_ino = key->objectid;
sctx->cur_inode_new_gen = 0;
@@ -4686,11 +4660,6 @@ static int send_subvol(struct send_ctx *sctx)
}
out:
- if (!ret)
- ret = close_cur_inode_file(sctx);
- else
- close_cur_inode_file(sctx);
-
free_recorded_refs(sctx);
return ret;
}
@@ -4756,8 +4725,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
}
if (!access_ok(VERIFY_READ, arg->clone_sources,
- sizeof(*arg->clone_sources *
- arg->clone_sources_count))) {
+ sizeof(*arg->clone_sources) *
+ arg->clone_sources_count)) {
ret = -EFAULT;
goto out;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 8c81bdc..b791cfb 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1453,7 +1453,7 @@ static void do_async_commit(struct work_struct *work)
* We've got freeze protection passed with the transaction.
* Tell lockdep about it.
*/
- if (ac->newtrans->type < TRANS_JOIN_NOLOCK)
+ if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_acquire_read(
&ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
0, 1, _THIS_IP_);
@@ -1494,7 +1494,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
* Tell lockdep we've released the freeze rwsem, since the
* async commit thread will be the one to unlock it.
*/
- if (trans->type < TRANS_JOIN_NOLOCK)
+ if (ac->newtrans->type & __TRANS_FREEZABLE)
rwsem_release(
&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1, _THIS_IP_);
@@ -1552,6 +1552,8 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans,
root->fs_info->running_transaction = NULL;
spin_unlock(&root->fs_info->trans_lock);
+ if (trans->type & __TRANS_FREEZABLE)
+ sb_end_intwrite(root->fs_info->sb);
put_transaction(cur_trans);
put_transaction(cur_trans);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 79f057c..e14e1f7 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3375,7 +3375,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
btrfs_set_token_file_extent_type(leaf, fi,
BTRFS_FILE_EXTENT_REG,
&token);
- if (em->block_start == 0)
+ if (em->block_start == EXTENT_MAP_HOLE)
skip_csum = true;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 043b215..b691f37 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4488,6 +4488,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
"%Lu-%Lu\n", logical, logical+len, em->start,
em->start + em->len);
+ free_extent_map(em);
return 1;
}
@@ -4668,6 +4669,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
"found %Lu-%Lu\n", logical, em->start,
em->start + em->len);
+ free_extent_map(em);
return -EINVAL;
}