9 files changed, 241 insertions, 136 deletions
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 6593ab3..8873b9b 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -350,6 +350,7 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
 
 	if (bio->bi_private)
 		complete(bio->bi_private);
+	__free_page(bio_page(bio));
 
 	bio_put(bio);
 }
@@ -372,30 +373,50 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 	struct request_queue *q = bdev_get_queue(bdev);
 	int type = flags & DISCARD_FL_BARRIER ?
 		DISCARD_BARRIER : DISCARD_NOBARRIER;
+	struct bio *bio;
+	struct page *page;
 	int ret = 0;
 
 	if (!q)
 		return -ENXIO;
 
-	if (!q->prepare_discard_fn)
+	if (!blk_queue_discard(q))
 		return -EOPNOTSUPP;
 
 	while (nr_sects && !ret) {
-		struct bio *bio = bio_alloc(gfp_mask, 0);
-		if (!bio)
-			return -ENOMEM;
+		unsigned int sector_size = q->limits.logical_block_size;
+		unsigned int max_discard_sectors =
+			min(q->limits.max_discard_sectors, UINT_MAX >> 9);
 
+		bio = bio_alloc(gfp_mask, 1);
+		if (!bio)
+			goto out;
+		bio->bi_sector = sector;
 		bio->bi_end_io = blkdev_discard_end_io;
 		bio->bi_bdev = bdev;
 		if (flags & DISCARD_FL_WAIT)
 			bio->bi_private = &wait;
 
-		bio->bi_sector = sector;
+		/*
+		 * Add a zeroed one-sector payload as that's what
+		 * our current implementations need.  If we'll ever need
+		 * more the interface will need revisiting.
+		 */
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!page)
+			goto out_free_bio;
+		if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size)
+			goto out_free_page;
 
-		if (nr_sects > queue_max_hw_sectors(q)) {
-			bio->bi_size = queue_max_hw_sectors(q) << 9;
-			nr_sects -= queue_max_hw_sectors(q);
-			sector += queue_max_hw_sectors(q);
+		/*
+		 * And override the bio size - the way discard works we
+		 * touch many more blocks on disk than the actual payload
+		 * length.
+		 */
+		if (nr_sects > max_discard_sectors) {
+			bio->bi_size = max_discard_sectors << 9;
+			nr_sects -= max_discard_sectors;
+			sector += max_discard_sectors;
 		} else {
 			bio->bi_size = nr_sects << 9;
 			nr_sects = 0;
@@ -414,5 +435,11 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
 		bio_put(bio);
 	}
 	return ret;
+out_free_page:
+	__free_page(page);
+out_free_bio:
+	bio_put(bio);
+out:
+	return -ENOMEM;
 }
 EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-core.c b/block/blk-core.c
index 8135228..ac0fa10 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -34,6 +34,7 @@
 #include "blk.h"
 
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 
 static int __make_request(struct request_queue *q, struct bio *bio);
@@ -1029,7 +1030,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
 	if (now == part->stamp)
 		return;
 
-	if (part->in_flight) {
+	if (part_in_flight(part)) {
 		__part_stat_add(cpu, part, time_in_queue,
 				part_in_flight(part) * (now - part->stamp));
 		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
@@ -1124,7 +1125,6 @@ void init_request_from_bio(struct request *req, struct bio *bio)
 		req->cmd_flags |= REQ_DISCARD;
 		if (bio_rw_flagged(bio, BIO_RW_BARRIER))
 			req->cmd_flags |= REQ_SOFTBARRIER;
-		req->q->prepare_discard_fn(req->q, req);
 	} else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
 		req->cmd_flags |= REQ_HARDBARRIER;
 
@@ -1437,7 +1437,8 @@ static inline void __generic_make_request(struct bio *bio)
 			goto end_io;
 		}
 
-		if (unlikely(nr_sectors > queue_max_hw_sectors(q))) {
+		if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) &&
+			     nr_sectors > queue_max_hw_sectors(q))) {
 			printk(KERN_ERR "bio too big device %s (%u > %u)\n",
 			       bdevname(bio->bi_bdev, b),
 			       bio_sectors(bio),
@@ -1470,7 +1471,7 @@ static inline void __generic_make_request(struct bio *bio)
 			goto end_io;
 
 		if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
-		    !q->prepare_discard_fn) {
+		    !blk_queue_discard(q)) {
 			err = -EOPNOTSUPP;
 			goto end_io;
 		}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 83413ff..66d4aa8 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -34,23 +34,6 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn)
 EXPORT_SYMBOL(blk_queue_prep_rq);
 
 /**
- * blk_queue_set_discard - set a discard_sectors function for queue
- * @q:		queue
- * @dfn:	prepare_discard function
- *
- * It's possible for a queue to register a discard callback which is used
- * to transform a discard request into the appropriate type for the
- * hardware. If none is registered, then discard requests are failed
- * with %EOPNOTSUPP.
- *
- */
-void blk_queue_set_discard(struct request_queue *q, prepare_discard_fn *dfn)
-{
-	q->prepare_discard_fn = dfn;
-}
-EXPORT_SYMBOL(blk_queue_set_discard);
-
-/**
  * blk_queue_merge_bvec - set a merge_bvec function for queue
  * @q:		queue
  * @mbfn:	merge_bvec_fn
@@ -111,7 +94,9 @@ void blk_set_default_limits(struct queue_limits *lim)
 	lim->max_hw_segments = MAX_HW_SEGMENTS;
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
 	lim->max_segment_size = MAX_SEGMENT_SIZE;
-	lim->max_sectors = lim->max_hw_sectors = SAFE_MAX_SECTORS;
+	lim->max_sectors = BLK_DEF_MAX_SECTORS;
+	lim->max_hw_sectors = INT_MAX;
+	lim->max_discard_sectors = SAFE_MAX_SECTORS;
 	lim->logical_block_size = lim->physical_block_size = lim->io_min = 512;
 	lim->bounce_pfn = (unsigned long)(BLK_BOUNCE_ANY >> PAGE_SHIFT);
 	lim->alignment_offset = 0;
@@ -164,6 +149,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->unplug_timer.data = (unsigned long)q;
 
 	blk_set_default_limits(&q->limits);
+	blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
 
 	/*
 	 * If the caller didn't supply a lock, fall back to our embedded
@@ -254,6 +240,18 @@ void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
 /**
+ * blk_queue_max_discard_sectors - set max sectors for a single discard
+ * @q:  the request queue for the device
+ * @max_discard_sectors: maximum number of sectors to discard
+ **/
+void blk_queue_max_discard_sectors(struct request_queue *q,
+		unsigned int max_discard_sectors)
+{
+	q->limits.max_discard_sectors = max_discard_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_discard_sectors);
+
+/**
  * blk_queue_max_phys_segments - set max phys segments for a request for this queue
  * @q:  the request queue for the device
  * @max_segments:  max number of segments
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b78c9c3..8a6d81a 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -452,6 +452,7 @@ int blk_register_queue(struct gendisk *disk)
 	if (ret) {
 		kobject_uevent(&q->kobj, KOBJ_REMOVE);
 		kobject_del(&q->kobj);
+		blk_trace_remove_sysfs(disk_to_dev(disk));
 		return ret;
 	}
 
@@ -465,11 +466,11 @@ void blk_unregister_queue(struct gendisk *disk)
 	if (WARN_ON(!q))
 		return;
 
-	if (q->request_fn) {
+	if (q->request_fn)
 		elv_unregister_queue(q);
 
-		kobject_uevent(&q->kobj, KOBJ_REMOVE);
-		kobject_del(&q->kobj);
-		kobject_put(&disk_to_dev(disk)->kobj);
-	}
+	kobject_uevent(&q->kobj, KOBJ_REMOVE);
+	kobject_del(&q->kobj);
+	blk_trace_remove_sysfs(disk_to_dev(disk));
+	kobject_put(&disk_to_dev(disk)->kobj);
 }
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 2e5cfeb..6b0f52c 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -359,7 +359,7 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
 		max_depth -= 2;
 		if (!max_depth)
 			max_depth = 1;
-		if (q->in_flight[0] > max_depth)
+		if (q->in_flight[BLK_RW_ASYNC] > max_depth)
 			return 1;
 	}
 
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1ca813b..069a610 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -173,6 +173,7 @@ struct cfq_data {
 	unsigned int cfq_slice[2];
 	unsigned int cfq_slice_async_rq;
 	unsigned int cfq_slice_idle;
+	unsigned int cfq_latency;
 
 	struct list_head cic_list;
 
@@ -180,6 +181,8 @@ struct cfq_data {
 	 * Fallback dummy cfqq for extreme OOM conditions
 	 */
 	struct cfq_queue oom_cfqq;
+
+	unsigned long last_end_sync_rq;
 };
 
 enum cfqq_state_flags {
@@ -227,7 +230,7 @@ CFQ_CFQQ_FNS(coop);
 	blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
 
 static void cfq_dispatch_insert(struct request_queue *, struct request *);
-static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
+static struct cfq_queue *cfq_get_queue(struct cfq_data *, bool,
 				       struct io_context *, gfp_t);
 static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
 						struct io_context *);
@@ -238,27 +241,24 @@ static inline int rq_in_driver(struct cfq_data *cfqd)
 }
 
 static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
-					    int is_sync)
+					    bool is_sync)
 {
-	return cic->cfqq[!!is_sync];
+	return cic->cfqq[is_sync];
 }
 
 static inline void cic_set_cfqq(struct cfq_io_context *cic,
-				struct cfq_queue *cfqq, int is_sync)
+				struct cfq_queue *cfqq, bool is_sync)
 {
-	cic->cfqq[!!is_sync] = cfqq;
+	cic->cfqq[is_sync] = cfqq;
 }
 
 /*
  * We regard a request as SYNC, if it's either a read or has the SYNC bit
  * set (in which case it could also be direct WRITE).
  */
-static inline int cfq_bio_sync(struct bio *bio)
+static inline bool cfq_bio_sync(struct bio *bio)
 {
-	if (bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO))
-		return 1;
-
-	return 0;
+	return bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO);
 }
 
 /*
@@ -285,7 +285,7 @@ static int cfq_queue_empty(struct request_queue *q)
  * if a queue is marked sync and has sync io queued. A sync queue with async
  * io only, should not get full sync slice length.
  */
-static inline int cfq_prio_slice(struct cfq_data *cfqd, int sync,
+static inline int cfq_prio_slice(struct cfq_data *cfqd, bool sync,
 				 unsigned short prio)
 {
 	const int base_slice = cfqd->cfq_slice[sync];
@@ -313,7 +313,7 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
  * isn't valid until the first request from the dispatch is activated
  * and the slice time set.
  */
-static inline int cfq_slice_used(struct cfq_queue *cfqq)
+static inline bool cfq_slice_used(struct cfq_queue *cfqq)
 {
 	if (cfq_cfqq_slice_new(cfqq))
 		return 0;
@@ -488,7 +488,7 @@ static unsigned long cfq_slice_offset(struct cfq_data *cfqd,
  * we will service the queues.
  */
 static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-				 int add_front)
+				 bool add_front)
 {
 	struct rb_node **p, *parent;
 	struct cfq_queue *__cfqq;
@@ -504,11 +504,20 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 		} else
 			rb_key += jiffies;
 	} else if (!add_front) {
+		/*
+		 * Get our rb key offset. Subtract any residual slice
+		 * value carried from last service. A negative resid
+		 * count indicates slice overrun, and this should position
+		 * the next service time further away in the tree.
+		 */
 		rb_key = cfq_slice_offset(cfqd, cfqq) + jiffies;
-		rb_key += cfqq->slice_resid;
+		rb_key -= cfqq->slice_resid;
 		cfqq->slice_resid = 0;
-	} else
-		rb_key = 0;
+	} else {
+		rb_key = -HZ;
+		__cfqq = cfq_rb_first(&cfqd->service_tree);
+		rb_key += __cfqq ? __cfqq->rb_key : jiffies;
+	}
 
 	if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
 		/*
@@ -542,7 +551,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 			n = &(*p)->rb_left;
 		else if (cfq_class_idle(cfqq) > cfq_class_idle(__cfqq))
 			n = &(*p)->rb_right;
-		else if (rb_key < __cfqq->rb_key)
+		else if (time_before(rb_key, __cfqq->rb_key))
 			n = &(*p)->rb_left;
 		else
 			n = &(*p)->rb_right;
@@ -822,8 +831,10 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
 	 * reposition in fifo if next is older than rq
 	 */
 	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) &&
-	    time_before(next->start_time, rq->start_time))
+	    time_before(rq_fifo_time(next), rq_fifo_time(rq))) {
 		list_move(&rq->queuelist, &next->queuelist);
+		rq_set_fifo_time(rq, rq_fifo_time(next));
+	}
 
 	cfq_remove_request(next);
 }
@@ -839,7 +850,7 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 	 * Disallow merge of a sync bio into an async request.
 	 */
 	if (cfq_bio_sync(bio) && !rq_is_sync(rq))
-		return 0;
+		return false;
 
 	/*
 	 * Lookup the cfqq that this bio will be queued with. Allow
@@ -847,13 +858,10 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
 	 */
 	cic = cfq_cic_lookup(cfqd, current->io_context);
 	if (!cic)
-		return 0;
+		return false;
 
 	cfqq = cic_to_cfqq(cic, cfq_bio_sync(bio));
-	if (cfqq == RQ_CFQQ(rq))
-		return 1;
-
-	return 0;
+	return cfqq == RQ_CFQQ(rq);
 }
 
 static void __cfq_set_active_queue(struct cfq_data *cfqd,
@@ -881,7 +889,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
  */
 static void
 __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-		    int timed_out)
+		    bool timed_out)
 {
 	cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
 
@@ -909,7 +917,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	}
 }
 
-static inline void cfq_slice_expired(struct cfq_data *cfqd, int timed_out)
+static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
 {
 	struct cfq_queue *cfqq = cfqd->active_queue;
 
@@ -1021,7 +1029,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd,
  */
 static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd,
 					      struct cfq_queue *cur_cfqq,
-					      int probe)
+					      bool probe)
 {
 	struct cfq_queue *cfqq;
 
@@ -1085,6 +1093,15 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
 	if (!cic || !atomic_read(&cic->ioc->nr_tasks))
 		return;
 
+	/*
+	 * If our average think time is larger than the remaining time
+	 * slice, then don't idle. This avoids overrunning the allotted
+	 * time slice.
+	 */
+	if (sample_valid(cic->ttime_samples) &&
+	    (cfqq->slice_end - jiffies < cic->ttime_mean))
+		return;
+
 	cfq_mark_cfqq_wait_request(cfqq);
 
 	/*
@@ -1124,9 +1141,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
  */
 static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
 {
-	struct cfq_data *cfqd = cfqq->cfqd;
-	struct request *rq;
-	int fifo;
+	struct request *rq = NULL;
 
 	if (cfq_cfqq_fifo_expire(cfqq))
 		return NULL;
@@ -1136,13 +1151,11 @@ static struct request *cfq_check_fifo(struct cfq_queue *cfqq)
 	if (list_empty(&cfqq->fifo))
 		return NULL;
 
-	fifo = cfq_cfqq_sync(cfqq);
 	rq = rq_entry_fifo(cfqq->fifo.next);
-
-	if (time_before(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo]))
+	if (time_before(jiffies, rq_fifo_time(rq)))
 		rq = NULL;
 
-	cfq_log_cfqq(cfqd, cfqq, "fifo=%p", rq);
+	cfq_log_cfqq(cfqq->cfqd, cfqq, "fifo=%p", rq);
 	return rq;
 }
 
@@ -1243,16 +1256,83 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd)
 	return dispatched;
 }
 
+static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+	unsigned int max_dispatch;
+
+	/*
+	 * Drain async requests before we start sync IO
+	 */
+	if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
+		return false;
+
+	/*
+	 * If this is an async queue and we have sync IO in flight, let it wait
+	 */
+	if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
+		return false;
+
+	max_dispatch = cfqd->cfq_quantum;
+	if (cfq_class_idle(cfqq))
+		max_dispatch = 1;
+
+	/*
+	 * Does this cfqq already have too much IO in flight?
+	 */
+	if (cfqq->dispatched >= max_dispatch) {
+		/*
+		 * idle queue must always only have a single IO in flight
+		 */
+		if (cfq_class_idle(cfqq))
+			return false;
+
+		/*
+		 * We have other queues, don't allow more IO from this one
+		 */
+		if (cfqd->busy_queues > 1)
+			return false;
+
+		/*
+		 * Sole queue user, allow bigger slice
+		 */
+		max_dispatch *= 4;
+	}
+
+	/*
+	 * Async queues must wait a bit before being allowed dispatch.
+	 * We also ramp up the dispatch depth gradually for async IO,
+	 * based on the last sync IO we serviced
+	 */
+	if (!cfq_cfqq_sync(cfqq) && cfqd->cfq_latency) {
+		unsigned long last_sync = jiffies - cfqd->last_end_sync_rq;
+		unsigned int depth;
+
+		depth = last_sync / cfqd->cfq_slice[1];
+		if (!depth && !cfqq->dispatched)
+			depth = 1;
+		if (depth < max_dispatch)
+			max_dispatch = depth;
+	}
+
+	/*
+	 * If we're below the current max, allow a dispatch
+	 */
+	return cfqq->dispatched < max_dispatch;
+}
+
 /*
  * Dispatch a request from cfqq, moving them to the request queue
  * dispatch list.
  */
-static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+static bool cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
 	struct request *rq;
 
 	BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list));
 
+	if (!cfq_may_dispatch(cfqd, cfqq))
+		return false;
+
 	/*
 	 * follow expired path, else get first next available
 	 */
@@ -1271,6 +1351,8 @@ static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 		atomic_long_inc(&cic->ioc->refcount);
 		cfqd->active_cic = cic;
 	}
+
+	return true;
 }
 
 /*
@@ -1281,7 +1363,6 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
 {
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_queue *cfqq;
-	unsigned int max_dispatch;
 
 	if (!cfqd->busy_queues)
 		return 0;
@@ -1294,48 +1375,11 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
 		return 0;
 
 	/*
-	 * Drain async requests before we start sync IO
+	 * Dispatch a request from this cfqq, if it is allowed
 	 */
-	if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
+	if (!cfq_dispatch_request(cfqd, cfqq))
 		return 0;
 
-	/*
-	 * If this is an async queue and we have sync IO in flight, let it wait
-	 */
-	if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
-		return 0;
-
-	max_dispatch = cfqd->cfq_quantum;
-	if (cfq_class_idle(cfqq))
-		max_dispatch = 1;
-
-	/*
-	 * Does this cfqq already have too much IO in flight?
-	 */
-	if (cfqq->dispatched >= max_dispatch) {
-		/*
-		 * idle queue must always only have a single IO in flight
-		 */
-		if (cfq_class_idle(cfqq))
-			return 0;
-
-		/*
-		 * We have other queues, don't allow more IO from this one
-		 */
-		if (cfqd->busy_queues > 1)
-			return 0;
-
-		/*
-		 * we are the only queue, allow up to 4 times of 'quantum'
-		 */
-		if (cfqq->dispatched >= 4 * max_dispatch)
-			return 0;
-	}
-
-	/*
-	 * Dispatch a request from this cfqq
-	 */
-	cfq_dispatch_request(cfqd, cfqq);
 	cfqq->slice_dispatch++;
 	cfq_clear_cfqq_must_dispatch(cfqq);
 
@@ -1635,7 +1679,7 @@ static void cfq_ioc_set_ioprio(struct io_context *ioc)
 }
 
 static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-			  pid_t pid, int is_sync)
+			  pid_t pid, bool is_sync)
 {
 	RB_CLEAR_NODE(&cfqq->rb_node);
 	RB_CLEAR_NODE(&cfqq->p_node);
@@ -1655,7 +1699,7 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 }
 
 static struct cfq_queue *
-cfq_find_alloc_queue(struct cfq_data *cfqd, int is_sync,
+cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
 		     struct io_context *ioc, gfp_t gfp_mask)
 {
 	struct cfq_queue *cfqq, *new_cfqq = NULL;
@@ -1719,7 +1763,7 @@ cfq_async_queue_prio(struct cfq_data *cfqd, int ioprio_class, int ioprio)
 }
 
 static struct cfq_queue *
-cfq_get_queue(struct cfq_data *cfqd, int is_sync, struct io_context *ioc,
+cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct io_context *ioc,
 	      gfp_t gfp_mask)
 {
 	const int ioprio = task_ioprio(ioc);
@@ -1951,10 +1995,13 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
 	enable_idle = old_idle = cfq_cfqq_idle_window(cfqq);
 
 	if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle ||
-	    (cfqd->hw_tag && CIC_SEEKY(cic)))
+	    (!cfqd->cfq_latency && cfqd->hw_tag && CIC_SEEKY(cic)))
 		enable_idle = 0;
 	else if (sample_valid(cic->ttime_samples)) {
-		if (cic->ttime_mean > cfqd->cfq_slice_idle)
+		unsigned int slice_idle = cfqd->cfq_slice_idle;
+		if (sample_valid(cic->seek_samples) && CIC_SEEKY(cic))
+			slice_idle = msecs_to_jiffies(CFQ_MIN_TT);
+		if (cic->ttime_mean > slice_idle)
 			enable_idle = 0;
 		else
 			enable_idle = 1;
@@ -1973,7 +2020,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq,
  * Check if new_cfqq should preempt the currently active queue. Return 0 for
  * no or if we aren't sure, a 1 will cause a preempt.
  */
-static int
+static bool
 cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
 		   struct request *rq)
 {
@@ -1981,48 +2028,48 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
 
 	cfqq = cfqd->active_queue;
 	if (!cfqq)
-		return 0;
+		return false;
 
 	if (cfq_slice_used(cfqq))
-		return 1;
+		return true;
 
 	if (cfq_class_idle(new_cfqq))
-		return 0;
+		return false;
 
 	if (cfq_class_idle(cfqq))
-		return 1;
+		return true;
 
 	/*
 	 * if the new request is sync, but the currently running queue is
 	 * not, let the sync request have priority.
 	 */
 	if (rq_is_sync(rq) && !cfq_cfqq_sync(cfqq))
-		return 1;
+		return true;
 
 	/*
 	 * So both queues are sync. Let the new request get disk time if
 	 * it's a metadata request and the current queue is doing regular IO.
 	 */
 	if (rq_is_meta(rq) && !cfqq->meta_pending)
-		return 1;
+		return false;
 
 	/*
 	 * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice.
 	 */
 	if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq))
-		return 1;
+		return true;
 
 	if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq))
-		return 0;
+		return false;
 
 	/*
 	 * if this request is as-good as one we would expect from the
 	 * current cfqq, let it preempt
 	 */
 	if (cfq_rq_close(cfqd, rq))
-		return 1;
+		return true;
 
-	return 0;
+	return false;
 }
 
 /*
@@ -2107,6 +2154,7 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
 
 	cfq_add_rq_rb(rq);
 
+	rq_set_fifo_time(rq, jiffies + cfqd->cfq_fifo_expire[rq_is_sync(rq)]);
 	list_add_tail(&rq->queuelist, &cfqq->fifo);
 
 	cfq_rq_enqueued(cfqd, cfqq, rq);
@@ -2157,8 +2205,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
 	if (cfq_cfqq_sync(cfqq))
 		cfqd->sync_flight--;
 
-	if (sync)
+	if (sync) {
 		RQ_CIC(rq)->last_end_request = now;
+		cfqd->last_end_sync_rq = now;
+	}
 
 	/*
 	 * If this is the active queue, check if it needs to be expired,
@@ -2284,7 +2334,7 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
 	struct cfq_data *cfqd = q->elevator->elevator_data;
 	struct cfq_io_context *cic;
 	const int rw = rq_data_dir(rq);
-	const int is_sync = rq_is_sync(rq);
+	const bool is_sync = rq_is_sync(rq);
 	struct cfq_queue *cfqq;
 	unsigned long flags;
 
@@ -2480,8 +2530,9 @@ static void *cfq_init_queue(struct request_queue *q)
 	cfqd->cfq_slice[1] = cfq_slice_sync;
 	cfqd->cfq_slice_async_rq = cfq_slice_async_rq;
 	cfqd->cfq_slice_idle = cfq_slice_idle;
+	cfqd->cfq_latency = 1;
 	cfqd->hw_tag = 1;
-
+	cfqd->last_end_sync_rq = jiffies;
 	return cfqd;
 }
 
@@ -2549,6 +2600,7 @@ SHOW_FUNCTION(cfq_slice_idle_show, cfqd->cfq_slice_idle, 1);
 SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1);
 SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1);
 SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0);
+SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0);
 #undef SHOW_FUNCTION
 
 #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
@@ -2580,6 +2632,7 @@ STORE_FUNCTION(cfq_slice_sync_store, &cfqd->cfq_slice[1], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1);
 STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1,
 		UINT_MAX, 0);
+STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0);
 #undef STORE_FUNCTION
 
 #define CFQ_ATTR(name) \
@@ -2595,6 +2648,7 @@ static struct elv_fs_entry cfq_attrs[] = {
 	CFQ_ATTR(slice_async),
 	CFQ_ATTR(slice_async_rq),
 	CFQ_ATTR(slice_idle),
+	CFQ_ATTR(low_latency),
 	__ATTR_NULL
 };
 
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 7865a34..9bd086c 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -21,6 +21,11 @@ static int compat_put_int(unsigned long arg, int val)
 	return put_user(val, (compat_int_t __user *)compat_ptr(arg));
 }
 
+static int compat_put_uint(unsigned long arg, unsigned int val)
+{
+	return put_user(val, (compat_uint_t __user *)compat_ptr(arg));
+}
+
 static int compat_put_long(unsigned long arg, long val)
 {
 	return put_user(val, (compat_long_t __user *)compat_ptr(arg));
@@ -734,6 +739,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	switch (cmd) {
 	case HDIO_GETGEO:
 		return compat_hdio_getgeo(disk, bdev, compat_ptr(arg));
+	case BLKPBSZGET:
+		return compat_put_uint(arg, bdev_physical_block_size(bdev));
+	case BLKIOMIN:
+		return compat_put_uint(arg, bdev_io_min(bdev));
+	case BLKIOOPT:
+		return compat_put_uint(arg, bdev_io_opt(bdev));
+	case BLKALIGNOFF:
+		return compat_put_int(arg, bdev_alignment_offset(bdev));
 	case BLKFLSBUF:
 	case BLKROSET:
 	case BLKDISCARD:
diff --git a/block/elevator.c b/block/elevator.c
index 1975b61..a847046 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1059,9 +1059,7 @@ ssize_t elv_iosched_store(struct request_queue *q, const char *name,
 		return count;
 
 	strlcpy(elevator_name, name, sizeof(elevator_name));
-	strstrip(elevator_name);
-
-	e = elevator_get(elevator_name);
+	e = elevator_get(strstrip(elevator_name));
 	if (!e) {
 		printk(KERN_ERR "elevator: type %s not found\n", elevator_name);
 		return -EINVAL;
diff --git a/block/ioctl.c b/block/ioctl.c
index d3e6b58..1f4d1de 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -138,6 +138,11 @@ static int put_int(unsigned long arg, int val)
 	return put_user(val, (int __user *)arg);
 }
 
+static int put_uint(unsigned long arg, unsigned int val)
+{
+	return put_user(val, (unsigned int __user *)arg);
+}
+
 static int put_long(unsigned long arg, long val)
 {
 	return put_user(val, (long __user *)arg);
@@ -263,10 +268,18 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
 	case BLKROGET:
 		return put_int(arg, bdev_read_only(bdev) != 0);
-	case BLKBSZGET: /* get the logical block size (cf. BLKSSZGET) */
+	case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */
 		return put_int(arg, block_size(bdev));
-	case BLKSSZGET: /* get block device hardware sector size */
+	case BLKSSZGET: /* get block device logical block size */
 		return put_int(arg, bdev_logical_block_size(bdev));
+	case BLKPBSZGET: /* get block device physical block size */
+		return put_uint(arg, bdev_physical_block_size(bdev));
+	case BLKIOMIN:
+		return put_uint(arg, bdev_io_min(bdev));
+	case BLKIOOPT:
+		return put_uint(arg, bdev_io_opt(bdev));
+	case BLKALIGNOFF:
+		return put_int(arg, bdev_alignment_offset(bdev));
 	case BLKSECTGET:
 		return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev)));
 	case BLKRASET: