From c64e38ea17a81721da0393584fd807f8434050fa Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 1 Nov 2010 14:32:27 -0400
Subject: xen/blkfront: map REQ_FLUSH into a full barrier

Implement a flush as a full barrier, since we have nothing weaker.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Christoph Hellwig <hch@lst.de>

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 06e2812..3a318d8 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -245,14 +245,11 @@ static int blkif_ioctl(struct block_device *bdev, fmode_t mode,
 }
 
 /*
- * blkif_queue_request
+ * Generate a Xen blkfront IO request from a blk layer request.  Reads
+ * and writes are handled as expected.  Since we lack a loose flush
+ * request, we map flushes into a full ordered barrier.
  *
- * request block io
- *
- * id: for guest use only.
- * operation: BLKIF_OP_{READ,WRITE,PROBE}
- * buffer: buffer to read/write into. this should be a
- *   virtual address in the guest os.
+ * @req: a request struct
  */
 static int blkif_queue_request(struct request *req)
 {
@@ -289,7 +286,7 @@ static int blkif_queue_request(struct request *req)
 
 	ring_req->operation = rq_data_dir(req) ?
 		BLKIF_OP_WRITE : BLKIF_OP_READ;
-	if (req->cmd_flags & REQ_HARDBARRIER)
+	if (req->cmd_flags & REQ_FLUSH)
 		ring_req->operation = BLKIF_OP_WRITE_BARRIER;
 
 	ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
@@ -1069,14 +1066,8 @@ static void blkfront_connect(struct blkfront_info *info)
 	 */
 	info->feature_flush = 0;
 
-	/*
-	 * The driver doesn't properly handled empty flushes, so
-	 * lets disable barrier support for now.
-	 */
-#if 0
 	if (!err && barrier)
 		info->feature_flush = REQ_FLUSH;
-#endif
 
 	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
 	if (err) {
-- 
cgit v0.10.2


From a945b9801a9bfd4a98bcfd9f6656b5027b254e3f Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Mon, 1 Nov 2010 17:03:14 -0400
Subject: xen/blkfront: change blk_shadow.request to proper pointer

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 3a318d8..31c8a64 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -65,7 +65,7 @@ enum blkif_state {
 
 struct blk_shadow {
 	struct blkif_request req;
-	unsigned long request;
+	struct request *request;
 	unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
 
@@ -136,7 +136,7 @@ static void add_id_to_freelist(struct blkfront_info *info,
 			       unsigned long id)
 {
 	info->shadow[id].req.id  = info->shadow_free;
-	info->shadow[id].request = 0;
+	info->shadow[id].request = NULL;
 	info->shadow_free = id;
 }
 
@@ -278,7 +278,7 @@ static int blkif_queue_request(struct request *req)
 	/* Fill out a communications ring structure. */
 	ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
 	id = get_id_from_freelist(info);
-	info->shadow[id].request = (unsigned long)req;
+	info->shadow[id].request = req;
 
 	ring_req->id = id;
 	ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
@@ -633,7 +633,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 
 		bret = RING_GET_RESPONSE(&info->ring, i);
 		id   = bret->id;
-		req  = (struct request *)info->shadow[id].request;
+		req  = info->shadow[id].request;
 
 		blkif_completion(&info->shadow[id]);
 
@@ -898,7 +898,7 @@ static int blkif_recover(struct blkfront_info *info)
 	/* Stage 3: Find pending requests and requeue them. */
 	for (i = 0; i < BLK_RING_SIZE; i++) {
 		/* Not in use? */
-		if (copy[i].request == 0)
+		if (!copy[i].request)
 			continue;
 
 		/* Grab a request slot and copy shadow state into it. */
@@ -915,9 +915,7 @@ static int blkif_recover(struct blkfront_info *info)
 				req->seg[j].gref,
 				info->xbdev->otherend_id,
 				pfn_to_mfn(info->shadow[req->id].frame[j]),
-				rq_data_dir(
-					(struct request *)
-					info->shadow[req->id].request));
+				rq_data_dir(info->shadow[req->id].request));
 		info->shadow[req->id].req = *req;
 
 		info->ring.req_prod_pvt++;
-- 
cgit v0.10.2


From be2f8373c188ed1f5d36003c9928e4d695213080 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 2 Nov 2010 10:38:33 -0400
Subject: xen/blkfront: Implement FUA with BLKIF_OP_WRITE_BARRIER

The BLKIF_OP_WRITE_BARRIER is a full ordered barrier, so we can use it
to implement FUA as well as a plain FLUSH.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Acked-by: Christoph Hellwig <hch@lst.de>

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 31c8a64..76b874a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -286,8 +286,18 @@ static int blkif_queue_request(struct request *req)
 
 	ring_req->operation = rq_data_dir(req) ?
 		BLKIF_OP_WRITE : BLKIF_OP_READ;
-	if (req->cmd_flags & REQ_FLUSH)
+
+	if (req->cmd_flags & (REQ_FLUSH | REQ_FUA)) {
+		/*
+		 * Ideally we could just do an unordered
+		 * flush-to-disk, but all we have is a full write
+		 * barrier at the moment.  However, a barrier write is
+		 * a superset of FUA, so we can implement it the same
+		 * way.  (It's also a FLUSH+FUA, since it is
+		 * guaranteed ordered WRT previous writes.)
+		 */
 		ring_req->operation = BLKIF_OP_WRITE_BARRIER;
+	}
 
 	ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
 	BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST);
@@ -1065,7 +1075,7 @@ static void blkfront_connect(struct blkfront_info *info)
 	info->feature_flush = 0;
 
 	if (!err && barrier)
-		info->feature_flush = REQ_FLUSH;
+		info->feature_flush = REQ_FLUSH | REQ_FUA;
 
 	err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
 	if (err) {
-- 
cgit v0.10.2


From dcb8baeceaa1c629bbd06f472cea023ad08a0c33 Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Date: Tue, 2 Nov 2010 11:55:58 -0400
Subject: xen/blkfront: cope with backend that fail empty
 BLKIF_OP_WRITE_BARRIER requests

Some(?) Xen block backends fail BLKIF_OP_WRITE_BARRIER requests, which
Linux uses as a cache flush operation.  In that case, disable use
of FLUSH.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Daniel Stodden <daniel.stodden@citrix.com>

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 76b874a..4f9e22f 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -656,6 +656,16 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 				printk(KERN_WARNING "blkfront: %s: write barrier op failed\n",
 				       info->gd->disk_name);
 				error = -EOPNOTSUPP;
+			}
+			if (unlikely(bret->status == BLKIF_RSP_ERROR &&
+				     info->shadow[id].req.nr_segments == 0)) {
+				printk(KERN_WARNING "blkfront: %s: empty write barrier op failed\n",
+				       info->gd->disk_name);
+				error = -EOPNOTSUPP;
+			}
+			if (unlikely(error)) {
+				if (error == -EOPNOTSUPP)
+					error = 0;
 				info->feature_flush = 0;
 				xlvbd_flush(info);
 			}
-- 
cgit v0.10.2


From d69b78ba1deaaa95ffa8dac5a9ca819ce454d52e Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Mon, 15 Nov 2010 10:20:52 +0100
Subject: ioprio: grab rcu_read_lock in sys_ioprio_{set,get}()

Using:
- CONFIG_LOCKUP_DETECTOR=y
- CONFIG_PREEMPT=y
- CONFIG_LOCKDEP=y
- CONFIG_PROVE_LOCKING=y
- CONFIG_PROVE_RCU=y
found a missing rcu lock during boot on a 512 MiB x86_64 ubuntu vm:
  ===================================================
  [ INFO: suspicious rcu_dereference_check() usage. ]
  ---------------------------------------------------
  kernel/pid.c:419 invoked rcu_dereference_check() without protection!

  other info that might help us debug this:

  rcu_scheduler_active = 1, debug_locks = 0
  1 lock held by ureadahead/1355:
   #0:  (tasklist_lock){.+.+..}, at: [<ffffffff8115bc09>] sys_ioprio_set+0x7f/0x29e

  stack backtrace:
  Pid: 1355, comm: ureadahead Not tainted 2.6.37-dbg-DEV #1
  Call Trace:
   [<ffffffff8109c10c>] lockdep_rcu_dereference+0xaa/0xb3
   [<ffffffff81088cbf>] find_task_by_pid_ns+0x44/0x5d
   [<ffffffff81088cfa>] find_task_by_vpid+0x22/0x24
   [<ffffffff8115bc3e>] sys_ioprio_set+0xb4/0x29e
   [<ffffffff8147cf21>] ? trace_hardirqs_off_thunk+0x3a/0x3c
   [<ffffffff8105c409>] sysenter_dispatch+0x7/0x2c
   [<ffffffff8147cee2>] ? trace_hardirqs_on_thunk+0x3a/0x3f

The fix is to:
a) grab rcu lock in sys_ioprio_{set,get}() and
b) avoid grabbing tasklist_lock.
Discussion in: http://marc.info/?l=linux-kernel&m=128951324702889

Signed-off-by: Greg Thelen <gthelen@google.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>

Modified by Jens to remove the now redundant inner rcu lock and
unlock since they are now protected by the outer lock.

Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

diff --git a/fs/ioprio.c b/fs/ioprio.c
index 2f7d05c..7da2a06 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -103,22 +103,15 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
 	}
 
 	ret = -ESRCH;
-	/*
-	 * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic",
-	 * so we can't use rcu_read_lock(). See re-copy of ->ioprio
-	 * in copy_process().
-	 */
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	switch (which) {
 		case IOPRIO_WHO_PROCESS:
-			rcu_read_lock();
 			if (!who)
 				p = current;
 			else
 				p = find_task_by_vpid(who);
 			if (p)
 				ret = set_task_ioprio(p, ioprio);
-			rcu_read_unlock();
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
@@ -141,12 +134,7 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
 				break;
 
 			do_each_thread(g, p) {
-				int match;
-
-				rcu_read_lock();
-				match = __task_cred(p)->uid == who;
-				rcu_read_unlock();
-				if (!match)
+				if (__task_cred(p)->uid != who)
 					continue;
 				ret = set_task_ioprio(p, ioprio);
 				if (ret)
@@ -160,7 +148,7 @@ free_uid:
 			ret = -EINVAL;
 	}
 
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -204,17 +192,15 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
 	int ret = -ESRCH;
 	int tmpio;
 
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	switch (which) {
 		case IOPRIO_WHO_PROCESS:
-			rcu_read_lock();
 			if (!who)
 				p = current;
 			else
 				p = find_task_by_vpid(who);
 			if (p)
 				ret = get_task_ioprio(p);
-			rcu_read_unlock();
 			break;
 		case IOPRIO_WHO_PGRP:
 			if (!who)
@@ -241,12 +227,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
 				break;
 
 			do_each_thread(g, p) {
-				int match;
-
-				rcu_read_lock();
-				match = __task_cred(p)->uid == user->uid;
-				rcu_read_unlock();
-				if (!match)
+				if (__task_cred(p)->uid != user->uid)
 					continue;
 				tmpio = get_task_ioprio(p);
 				if (tmpio < 0)
@@ -264,6 +245,6 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
 			ret = -EINVAL;
 	}
 
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	return ret;
 }
-- 
cgit v0.10.2


From c2f6805d470af369a7337801deeecea800dbfe1c Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Mon, 15 Nov 2010 19:32:42 +0100
Subject: blk-throttle: Fix calculation of max number of WRITES to be
 dispatched

o Currently we try to dispatch more READS and less WRITES (75%, 25%) in one
  dispatch round. ummy pointed out that there is a bug in max_nr_writes
  calculation. This patch fixes it.

Reported-by: ummy y <yummylln@yahoo.com.cn>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 56ad453..004be80 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -645,7 +645,7 @@ static int throtl_dispatch_tg(struct throtl_data *td, struct throtl_grp *tg,
 {
 	unsigned int nr_reads = 0, nr_writes = 0;
 	unsigned int max_nr_reads = throtl_grp_quantum*3/4;
-	unsigned int max_nr_writes = throtl_grp_quantum - nr_reads;
+	unsigned int max_nr_writes = throtl_grp_quantum - max_nr_reads;
 	struct bio *bio;
 
 	/* Try to dispatch 75% READS and 25% WRITES */
-- 
cgit v0.10.2


From 3e9bb2a071614f1d185740f31ac503ecba11d783 Mon Sep 17 00:00:00 2001
From: Vivek Goyal <vgoyal@redhat.com>
Date: Mon, 15 Nov 2010 19:32:43 +0100
Subject: block: fix amiga and atari floppy driver compile warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Geert, my crosstool don't produce warning below. I guess this has to do
something with compiler version.

- Geert noticed following warning during compilation.

  drivers/block/amiflop.c:1344: warning: ‘rq’ may be used uninitialized in
  this function
  drivers/block/ataflop.c:1402: warning: ‘rq’ may be used uninitialized in
  this function

- Initialize rq to NULL to fix the warning. If we can't find a suitable request
  to dispatch, this function should return NULL instead of a possibly garbage
  pointer.

- Cross compile tested only. Don't have hardware to test it.

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index a1725e6..7888501 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1341,7 +1341,7 @@ static struct request *set_next_request(void)
 {
 	struct request_queue *q;
 	int cnt = FD_MAX_UNITS;
-	struct request *rq;
+	struct request *rq = NULL;
 
 	/* Find next queue we can dispatch from */
 	fdc_queue = fdc_queue + 1;
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c
index 4e4cc6c..605a67e 100644
--- a/drivers/block/ataflop.c
+++ b/drivers/block/ataflop.c
@@ -1399,7 +1399,7 @@ static struct request *set_next_request(void)
 {
 	struct request_queue *q;
 	int old_pos = fdc_queue;
-	struct request *rq;
+	struct request *rq = NULL;
 
 	do {
 		q = unit[fdc_queue].disk->queue;
-- 
cgit v0.10.2


From bbe425cd9ae83eacd0c9f09df2bf56dc911a54cd Mon Sep 17 00:00:00 2001
From: Jens Axboe <jaxboe@fusionio.com>
Date: Wed, 17 Nov 2010 11:56:13 +0100
Subject: cciss: fix build for PROC_FS disabled

The recent patch to fix the removal of a non-existing proc
directory introduced this build problem for !CONFIG_PROC_FS:

drivers/block/cciss.c:4929: error: 'proc_cciss' undeclared (first use in this function)

Fix it by moving proc_cciss outside of the CONFIG_PROC_FS scope.

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>

diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index a67d0a6..f291587 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -66,6 +66,7 @@ MODULE_VERSION("3.6.26");
 MODULE_LICENSE("GPL");
 
 static DEFINE_MUTEX(cciss_mutex);
+static struct proc_dir_entry *proc_cciss;
 
 #include "cciss_cmd.h"
 #include "cciss.h"
@@ -363,8 +364,6 @@ static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
 #define ENG_GIG_FACTOR (ENG_GIG/512)
 #define ENGAGE_SCSI	"engage scsi"
 
-static struct proc_dir_entry *proc_cciss;
-
 static void cciss_seq_show_header(struct seq_file *seq)
 {
 	ctlr_info_t *h = seq->private;
-- 
cgit v0.10.2