From a09a0a705dd6c80bc96b5e6f18dc103d4e1a7d63 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Mon, 11 Mar 2013 09:20:58 +0000
Subject: Btrfs: get better concurrency for snapshot-aware defrag work

Using spinning case instead of blocking will result in better concurrency
overall.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 13ab4de..1f268888 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2312,6 +2312,7 @@ again:
 	key.type = BTRFS_EXTENT_DATA_KEY;
 	key.offset = start;
 
+	path->leave_spinning = 1;
 	if (merge) {
 		struct btrfs_file_extent_item *fi;
 		u64 extent_len;
@@ -2368,6 +2369,7 @@ again:
 
 	btrfs_mark_buffer_dirty(leaf);
 	inode_add_bytes(inode, len);
+	btrfs_release_path(path);
 
 	ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
 			new->disk_len, 0,
@@ -2381,6 +2383,7 @@ again:
 	ret = 1;
 out_free_path:
 	btrfs_release_path(path);
+	path->leave_spinning = 0;
 	btrfs_end_transaction(trans, root);
 out_unlock:
 	unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start, lock_end,
-- 
cgit v0.10.2


From d340d2475c6e394013325f83f499594628a9e558 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Mon, 11 Mar 2013 09:37:45 +0000
Subject: Btrfs: remove btrfs_try_spin_lock

Remove a useless function declaration

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>

diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h
index ca52681..b81e0e9 100644
--- a/fs/btrfs/locking.h
+++ b/fs/btrfs/locking.h
@@ -26,7 +26,6 @@
 
 void btrfs_tree_lock(struct extent_buffer *eb);
 void btrfs_tree_unlock(struct extent_buffer *eb);
-int btrfs_try_spin_lock(struct extent_buffer *eb);
 
 void btrfs_tree_read_lock(struct extent_buffer *eb);
 void btrfs_tree_read_unlock(struct extent_buffer *eb);
-- 
cgit v0.10.2


From bc178622d40d87e75abc131007342429c9b03351 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Sat, 9 Mar 2013 15:18:39 +0000
Subject: btrfs: use rcu_barrier() to wait for bdev puts at unmount

Doing this would reliably fail with -EBUSY for me:

# mount /dev/sdb2 /mnt/scratch; umount /mnt/scratch; mkfs.btrfs -f /dev/sdb2
...
unable to open /dev/sdb2: Device or resource busy

because mkfs.btrfs tries to open the device O_EXCL, and somebody still has it.

Using systemtap to track bdev gets & puts shows a kworker thread doing a
blkdev put after mkfs attempts a get; this is left over from the unmount
path:

btrfs_close_devices
	__btrfs_close_devices
		call_rcu(&device->rcu, free_device);
			free_device
				INIT_WORK(&device->rcu_work, __free_device);
				schedule_work(&device->rcu_work);

so unmount might complete before __free_device fires & does its blkdev_put.

Adding an rcu_barrier() to btrfs_close_devices() causes unmount to wait
until all blkdev_put()s are done, and the device is truly free once
unmount completes.

Cc: stable@vger.kernel.org
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6b9cff4..5989a92 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -684,6 +684,12 @@ int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 		__btrfs_close_devices(fs_devices);
 		free_fs_devices(fs_devices);
 	}
+	/*
+	 * Wait for rcu kworkers under __btrfs_close_devices
+	 * to finish all blkdev_puts so device is really
+	 * free when umount is done.
+	 */
+	rcu_barrier();
 	return ret;
 }
 
-- 
cgit v0.10.2


From 492104c866cb1b62a11393adccb477f5cd2c7768 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Fri, 8 Mar 2013 15:41:02 -0500
Subject: Btrfs: return EIO if we have extent tree corruption

The callers of lookup_inline_extent_info all handle getting an error back
properly, so return an error if we have corruption instead of being a jerk and
panicing.  Still WARN_ON() since this is kind of crucial and I've been seeing it
a bit too much recently for my taste, I think we're doing something wrong
somewhere.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index aaee2b7..350b9b1 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1467,8 +1467,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
 	if (ret && !insert) {
 		err = -ENOENT;
 		goto out;
+	} else if (ret) {
+		err = -EIO;
+		WARN_ON(1);
+		goto out;
 	}
-	BUG_ON(ret); /* Corruption */
 
 	leaf = path->nodes[0];
 	item_size = btrfs_item_size_nr(leaf, path->slots[0]);
-- 
cgit v0.10.2


From 720f1e2060138855b4a1b1e8aa642f9c7feb6750 Mon Sep 17 00:00:00 2001
From: Wang Shilong <wangsl-fnst@cn.fujitsu.com>
Date: Wed, 6 Mar 2013 11:51:47 +0000
Subject: Btrfs: return as soon as possible when edquot happens

If one of qgroup fails to reserve firstly, we should return immediately,
it is unnecessary to continue check.

Signed-off-by: Wang Shilong <wangsl-fnst@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index aee4b1c..5471e47 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1525,21 +1525,23 @@ int btrfs_qgroup_reserve(struct btrfs_root *root, u64 num_bytes)
 
 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_RFER) &&
 		    qg->reserved + qg->rfer + num_bytes >
-		    qg->max_rfer)
+		    qg->max_rfer) {
 			ret = -EDQUOT;
+			goto out;
+		}
 
 		if ((qg->lim_flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) &&
 		    qg->reserved + qg->excl + num_bytes >
-		    qg->max_excl)
+		    qg->max_excl) {
 			ret = -EDQUOT;
+			goto out;
+		}
 
 		list_for_each_entry(glist, &qg->groups, next_group) {
 			ulist_add(ulist, glist->group->qgroupid,
 				  (uintptr_t)glist->group, GFP_ATOMIC);
 		}
 	}
-	if (ret)
-		goto out;
 
 	/*
 	 * no limits exceeded, now record the reservation into all qgroups
-- 
cgit v0.10.2


From 7c2ec3f0730729f4829d01f7c19538d135f86712 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Wed, 13 Mar 2013 07:43:03 -0600
Subject: Btrfs: fix warning when creating snapshots

Creating snapshot passes extent_root to commit its transaction,
but it can lead to the warning of checking root for quota in
the __btrfs_end_transaction() when someone else is committing
the current transaction.  Since we've recorded the needed root
in trans_handle, just use it to get rid of the warning.

Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index fedede1..c4a1531 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -626,14 +626,13 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
 	btrfs_trans_release_metadata(trans, root);
 	trans->block_rsv = NULL;
-	/*
-	 * the same root has to be passed to start_transaction and
-	 * end_transaction. Subvolume quota depends on this.
-	 */
-	WARN_ON(trans->root != root);
 
 	if (trans->qgroup_reserved) {
-		btrfs_qgroup_free(root, trans->qgroup_reserved);
+		/*
+		 * the same root has to be passed here between start_transaction
+		 * and end_transaction. Subvolume quota depends on this.
+		 */
+		btrfs_qgroup_free(trans->root, trans->qgroup_reserved);
 		trans->qgroup_reserved = 0;
 	}
 
-- 
cgit v0.10.2


From 3b2775942d6ccb14342f3aae55f22fbbfea8db14 Mon Sep 17 00:00:00 2001
From: Liu Bo <bo.li.liu@oracle.com>
Date: Fri, 15 Mar 2013 08:46:39 -0600
Subject: Btrfs: fix warning of free_extent_map

Users report that an extent map's list is still linked when it's actually
going to be freed from cache.

The story is that

a) when we're going to drop an extent map and may split this large one into
smaller ems, and if this large one is flagged as EXTENT_FLAG_LOGGING which means
that it's on the list to be logged, then the smaller ems split from it will also
be flagged as EXTENT_FLAG_LOGGING, and this is _not_ expected.

b) we'll keep ems from unlinking the list and freeing when they are flagged with
EXTENT_FLAG_LOGGING, because the log code holds one reference.

The end result is the warning, but the truth is that we set the flag
EXTENT_FLAG_LOGGING only during fsync.

So clear flag EXTENT_FLAG_LOGGING for extent maps split from a large one.

Reported-by: Johannes Hirte <johannes.hirte@fem.tu-ilmenau.de>
Reported-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 83c790d..7bdb47f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -591,6 +591,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
 		}
 		compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
 		clear_bit(EXTENT_FLAG_PINNED, &em->flags);
+		clear_bit(EXTENT_FLAG_LOGGING, &flags);
 		remove_extent_mapping(em_tree, em);
 		if (no_splits)
 			goto next;
-- 
cgit v0.10.2