From ef9be1d336378de279d4e37779f1b83cebadbcc0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Nov 2005 14:27:09 +0100 Subject: [BLOCK] as-iosched: update alias handling Unlike other ioscheds, as-iosched handles alias by chaing them using rq->queuelist. As aliased requests are very rare in the first place, this complicates merge/dispatch handling without meaningful performance improvement. This patch updates as-iosched to dump aliased requests into dispatch queue as other ioscheds do. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/block/as-iosched.c b/block/as-iosched.c index 43fa204..8da3cf6 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -182,6 +182,9 @@ struct as_rq { static kmem_cache_t *arq_pool; +static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq); +static void as_antic_stop(struct as_data *ad); + /* * IO Context helper functions */ @@ -370,7 +373,7 @@ static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir) * existing request against the same sector), which can happen when using * direct IO, then return the alias. */ -static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq) +static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq) { struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; struct rb_node *parent = NULL; @@ -397,6 +400,16 @@ static struct as_rq *as_add_arq_rb(struct as_data *ad, struct as_rq *arq) return NULL; } +static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq) +{ + struct as_rq *alias; + + while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) { + as_move_to_dispatch(ad, alias); + as_antic_stop(ad); + } +} + static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) { if (!ON_RB(&arq->rb_node)) { @@ -1133,23 +1146,6 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) /* * take it off the sort and fifo list, add to dispatch queue */ - while (!list_empty(&rq->queuelist)) { - struct request *__rq = list_entry_rq(rq->queuelist.next); - struct as_rq *__arq = RQ_DATA(__rq); - - list_del(&__rq->queuelist); - - elv_dispatch_add_tail(ad->q, __rq); - - if (__arq->io_context && __arq->io_context->aic) - atomic_inc(&__arq->io_context->aic->nr_dispatched); - - WARN_ON(__arq->state != AS_RQ_QUEUED); - __arq->state = AS_RQ_DISPATCHED; - - ad->nr_dispatched++; - } - as_remove_queued_request(ad->q, rq); WARN_ON(arq->state != AS_RQ_QUEUED); @@ -1326,49 +1322,12 @@ fifo_expired: } /* - * Add arq to a list behind alias - */ -static inline void -as_add_aliased_request(struct as_data *ad, struct as_rq *arq, - struct as_rq *alias) -{ - struct request *req = arq->request; - struct list_head *insert = alias->request->queuelist.prev; - - /* - * Transfer list of aliases - */ - while (!list_empty(&req->queuelist)) { - struct request *__rq = list_entry_rq(req->queuelist.next); - struct as_rq *__arq = RQ_DATA(__rq); - - list_move_tail(&__rq->queuelist, &alias->request->queuelist); - - WARN_ON(__arq->state != AS_RQ_QUEUED); - } - - /* - * Another request with the same start sector on the rbtree. - * Link this request to that sector. They are untangled in - * as_move_to_dispatch - */ - list_add(&arq->request->queuelist, insert); - - /* - * Don't want to have to handle merges. - */ - as_del_arq_hash(arq); - arq->request->flags |= REQ_NOMERGE; -} - -/* * add arq to rbtree and fifo */ static void as_add_request(request_queue_t *q, struct request *rq) { struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); - struct as_rq *alias; int data_dir; arq->state = AS_RQ_NEW; @@ -1387,33 +1346,17 @@ static void as_add_request(request_queue_t *q, struct request *rq) atomic_inc(&arq->io_context->aic->nr_queued); } - alias = as_add_arq_rb(ad, arq); - if (!alias) { - /* - * set expire time (only used for reads) and add to fifo list - */ - arq->expires = jiffies + ad->fifo_expire[data_dir]; - list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); + as_add_arq_rb(ad, arq); + if (rq_mergeable(arq->request)) + as_add_arq_hash(ad, arq); - if (rq_mergeable(arq->request)) - as_add_arq_hash(ad, arq); - as_update_arq(ad, arq); /* keep state machine up to date */ - - } else { - as_add_aliased_request(ad, arq, alias); - - /* - * have we been anticipating this request? - * or does it come from the same process as the one we are - * anticipating for? - */ - if (ad->antic_status == ANTIC_WAIT_REQ - || ad->antic_status == ANTIC_WAIT_NEXT) { - if (as_can_break_anticipation(ad, arq)) - as_antic_stop(ad); - } - } + /* + * set expire time (only used for reads) and add to fifo list + */ + arq->expires = jiffies + ad->fifo_expire[data_dir]; + list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); + as_update_arq(ad, arq); /* keep state machine up to date */ arq->state = AS_RQ_QUEUED; } @@ -1536,23 +1479,8 @@ static void as_merged_request(request_queue_t *q, struct request *req) * if the merge was a front merge, we need to reposition request */ if (rq_rb_key(req) != arq->rb_key) { - struct as_rq *alias, *next_arq = NULL; - - if (ad->next_arq[arq->is_sync] == arq) - next_arq = as_find_next_arq(ad, arq); - - /* - * Note! We should really be moving any old aliased requests - * off this request and try to insert them into the rbtree. We - * currently don't bother. Ditto the next function. - */ as_del_arq_rb(ad, arq); - if ((alias = as_add_arq_rb(ad, arq))) { - list_del_init(&arq->fifo); - as_add_aliased_request(ad, arq, alias); - if (next_arq) - ad->next_arq[arq->is_sync] = next_arq; - } + as_add_arq_rb(ad, arq); /* * Note! At this stage of this and the next function, our next * request may not be optimal - eg the request may have "grown" @@ -1579,18 +1507,8 @@ static void as_merged_requests(request_queue_t *q, struct request *req, as_add_arq_hash(ad, arq); if (rq_rb_key(req) != arq->rb_key) { - struct as_rq *alias, *next_arq = NULL; - - if (ad->next_arq[arq->is_sync] == arq) - next_arq = as_find_next_arq(ad, arq); - as_del_arq_rb(ad, arq); - if ((alias = as_add_arq_rb(ad, arq))) { - list_del_init(&arq->fifo); - as_add_aliased_request(ad, arq, alias); - if (next_arq) - ad->next_arq[arq->is_sync] = next_arq; - } + as_add_arq_rb(ad, arq); } /* @@ -1610,18 +1528,6 @@ static void as_merged_requests(request_queue_t *q, struct request *req, } /* - * Transfer list of aliases - */ - while (!list_empty(&next->queuelist)) { - struct request *__rq = list_entry_rq(next->queuelist.next); - struct as_rq *__arq = RQ_DATA(__rq); - - list_move_tail(&__rq->queuelist, &req->queuelist); - - WARN_ON(__arq->state != AS_RQ_QUEUED); - } - - /* * kill knowledge of next, this one is a goner */ as_remove_queued_request(q, next); -- cgit v0.10.2 From 88ee5ef157202624de2b43b3512fdcb54fda1ab5 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 12 Nov 2005 11:09:12 +0100 Subject: [BLOCK] ll_rw_blk: fastpath get_request() Originally from: Nick Piggin Move current_io_context out of the get_request fastpth. Also try to streamline a few other things in this area. Signed-off-by: Jens Axboe diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index d4beb9a..97f4e7e 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -1908,40 +1908,40 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio, { struct request *rq = NULL; struct request_list *rl = &q->rq; - struct io_context *ioc = current_io_context(GFP_ATOMIC); - int priv; + struct io_context *ioc = NULL; + int may_queue, priv; - if (rl->count[rw]+1 >= q->nr_requests) { - /* - * The queue will fill after this allocation, so set it as - * full, and mark this process as "batching". This process - * will be allowed to complete a batch of requests, others - * will be blocked. - */ - if (!blk_queue_full(q, rw)) { - ioc_set_batching(q, ioc); - blk_set_queue_full(q, rw); - } - } + may_queue = elv_may_queue(q, rw, bio); + if (may_queue == ELV_MQUEUE_NO) + goto rq_starved; - switch (elv_may_queue(q, rw, bio)) { - case ELV_MQUEUE_NO: - goto rq_starved; - case ELV_MQUEUE_MAY: - break; - case ELV_MQUEUE_MUST: - goto get_rq; - } - - if (blk_queue_full(q, rw) && !ioc_batching(q, ioc)) { - /* - * The queue is full and the allocating process is not a - * "batcher", and not exempted by the IO scheduler - */ - goto out; + if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { + if (rl->count[rw]+1 >= q->nr_requests) { + ioc = current_io_context(GFP_ATOMIC); + /* + * The queue will fill after this allocation, so set + * it as full, and mark this process as "batching". + * This process will be allowed to complete a batch of + * requests, others will be blocked. + */ + if (!blk_queue_full(q, rw)) { + ioc_set_batching(q, ioc); + blk_set_queue_full(q, rw); + } else { + if (may_queue != ELV_MQUEUE_MUST + && !ioc_batching(q, ioc)) { + /* + * The queue is full and the allocating + * process is not a "batcher", and not + * exempted by the IO scheduler + */ + goto out; + } + } + } + set_queue_congested(q, rw); } -get_rq: /* * Only allow batching queuers to allocate up to 50% over the defined * limit of requests, otherwise we could have thousands of requests @@ -1952,8 +1952,6 @@ get_rq: rl->count[rw]++; rl->starved[rw] = 0; - if (rl->count[rw] >= queue_congestion_on_threshold(q)) - set_queue_congested(q, rw); priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); if (priv) @@ -1962,7 +1960,7 @@ get_rq: spin_unlock_irq(q->queue_lock); rq = blk_alloc_request(q, rw, bio, priv, gfp_mask); - if (!rq) { + if (unlikely(!rq)) { /* * Allocation failed presumably due to memory. Undo anything * we might have messed up. @@ -1987,6 +1985,12 @@ rq_starved: goto out; } + /* + * ioc may be NULL here, and ioc_batching will be false. That's + * OK, if the queue is under the request limit then requests need + * not count toward the nr_batch_requests limit. There will always + * be some limit enforced by BLK_BATCH_TIME. + */ if (ioc_batching(q, ioc)) ioc->nr_batch_requests--; -- cgit v0.10.2 From 80cfd548eed68cf90c5ae9cfcd6b02230cece756 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 Jan 2006 09:43:28 +0100 Subject: [BLOCK] bio: check for same page merge possibilities in __bio_add_page() For filesystems with a blocksize < page size, we can merge same page calls into the bio_vec at the end of the bio. This saves segments on systems with a page size > the "normal" 4kb fs block size. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe diff --git a/fs/bio.c b/fs/bio.c index 38d3e80..dfe242a 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -325,10 +325,31 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page if (unlikely(bio_flagged(bio, BIO_CLONED))) return 0; - if (bio->bi_vcnt >= bio->bi_max_vecs) + if (((bio->bi_size + len) >> 9) > max_sectors) return 0; - if (((bio->bi_size + len) >> 9) > max_sectors) + /* + * For filesystems with a blocksize smaller than the pagesize + * we will often be called with the same page as last time and + * a consecutive offset. Optimize this special case. + */ + if (bio->bi_vcnt > 0) { + struct bio_vec *prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; + + if (page == prev->bv_page && + offset == prev->bv_offset + prev->bv_len) { + prev->bv_len += len; + if (q->merge_bvec_fn && + q->merge_bvec_fn(q, bio, prev) < len) { + prev->bv_len -= len; + return 0; + } + + goto done; + } + } + + if (bio->bi_vcnt >= bio->bi_max_vecs) return 0; /* @@ -382,6 +403,7 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page bio->bi_vcnt++; bio->bi_phys_segments++; bio->bi_hw_segments++; + done: bio->bi_size += len; return len; } -- cgit v0.10.2 From 64100099ed22f71cce656c5c2caecf5c9cf255dc Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Fri, 6 Jan 2006 09:46:02 +0100 Subject: [BLOCK] mark some block/ variables cons the patch below marks various read-only variables in block/* as const, so that gcc can optimize the use of them; eg gcc will replace the use by the value directly now and will even remove the memory usage of these. Signed-off-by: Arjan van de Ven Signed-off-by: Jens Axboe diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ee0bb41..74fae2d 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -25,15 +25,15 @@ /* * tunables */ -static int cfq_quantum = 4; /* max queue in one round of service */ -static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ -static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; -static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ -static int cfq_back_penalty = 2; /* penalty of a backwards seek */ +static const int cfq_quantum = 4; /* max queue in one round of service */ +static const int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ +static const int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; +static const int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ +static const int cfq_back_penalty = 2; /* penalty of a backwards seek */ -static int cfq_slice_sync = HZ / 10; +static const int cfq_slice_sync = HZ / 10; static int cfq_slice_async = HZ / 25; -static int cfq_slice_async_rq = 2; +static const int cfq_slice_async_rq = 2; static int cfq_slice_idle = HZ / 100; #define CFQ_IDLE_GRACE (HZ / 10) @@ -45,7 +45,7 @@ static int cfq_slice_idle = HZ / 100; /* * disable queueing at the driver/hardware level */ -static int cfq_max_depth = 2; +static const int cfq_max_depth = 2; /* * for the hash of cfqq inside the cfqd diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 9cbec09..27e494b 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -19,10 +19,10 @@ /* * See Documentation/block/deadline-iosched.txt */ -static int read_expire = HZ / 2; /* max time before a read is submitted. */ -static int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ -static int writes_starved = 2; /* max times reads can starve a write */ -static int fifo_batch = 16; /* # of sequential requests treated as one +static const int read_expire = HZ / 2; /* max time before a read is submitted. */ +static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */ +static const int writes_starved = 2; /* max times reads can starve a write */ +static const int fifo_batch = 16; /* # of sequential requests treated as one by the above parameters. For throughput. */ static const int deadline_hash_shift = 5; diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 97f4e7e..e02c88c 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -1039,7 +1039,7 @@ void blk_queue_invalidate_tags(request_queue_t *q) EXPORT_SYMBOL(blk_queue_invalidate_tags); -static char *rq_flags[] = { +static const char * const rq_flags[] = { "REQ_RW", "REQ_FAILFAST", "REQ_SORTED", diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 1d8852f..c2ac36d 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -46,7 +46,7 @@ EXPORT_SYMBOL(scsi_command_size); static int sg_get_version(int __user *p) { - static int sg_version_num = 30527; + static const int sg_version_num = 30527; return put_user(sg_version_num, p); } -- cgit v0.10.2 From 8ffdc6550c47f75ca4e6c9f30a2a89063e035cf2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2006 09:49:03 +0100 Subject: [BLOCK] add @uptodate to end_that_request_last() and @error to rq_end_io_fn() add @uptodate argument to end_that_request_last() and @error to rq_end_io_fn(). there's no generic way to pass error code to request completion function, making generic error handling of non-fs request difficult (rq->errors is driver-specific and each driver uses it differently). this patch adds @uptodate to end_that_request_last() and @error to rq_end_io_fn(). for fs requests, this doesn't really matter, so just using the same uptodate argument used in the last call to end_that_request_first() should suffice. imho, this can also help the generic command-carrying request jens is working on. Signed-off-by: tejun heo Signed-Off-By: Jens Axboe diff --git a/block/elevator.c b/block/elevator.c index 6c3fc8a..85a11ce 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -498,7 +498,7 @@ struct request *elv_next_request(request_queue_t *q) blkdev_dequeue_request(rq); rq->flags |= REQ_QUIET; end_that_request_chunk(rq, 0, nr_bytes); - end_that_request_last(rq); + end_that_request_last(rq, 0); } else { printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__, ret); diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index e02c88c..8b1ae69 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -344,7 +344,7 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn); /* * Cache flushing for ordered writes handling */ -static void blk_pre_flush_end_io(struct request *flush_rq) +static void blk_pre_flush_end_io(struct request *flush_rq, int error) { struct request *rq = flush_rq->end_io_data; request_queue_t *q = rq->q; @@ -362,7 +362,7 @@ static void blk_pre_flush_end_io(struct request *flush_rq) } } -static void blk_post_flush_end_io(struct request *flush_rq) +static void blk_post_flush_end_io(struct request *flush_rq, int error) { struct request *rq = flush_rq->end_io_data; request_queue_t *q = rq->q; @@ -2317,7 +2317,7 @@ EXPORT_SYMBOL(blk_rq_map_kern); */ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk, struct request *rq, int at_head, - void (*done)(struct request *)) + rq_end_io_fn *done) { int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK; @@ -2521,7 +2521,7 @@ EXPORT_SYMBOL(blk_put_request); * blk_end_sync_rq - executes a completion event on a request * @rq: request to complete */ -void blk_end_sync_rq(struct request *rq) +void blk_end_sync_rq(struct request *rq, int error) { struct completion *waiting = rq->waiting; @@ -3183,9 +3183,17 @@ EXPORT_SYMBOL(end_that_request_chunk); /* * queue lock must be held */ -void end_that_request_last(struct request *req) +void end_that_request_last(struct request *req, int uptodate) { struct gendisk *disk = req->rq_disk; + int error; + + /* + * extend uptodate bool to allow < 0 value to be direct io error + */ + error = 0; + if (end_io_error(uptodate)) + error = !uptodate ? -EIO : uptodate; if (unlikely(laptop_mode) && blk_fs_request(req)) laptop_io_completion(); @@ -3200,7 +3208,7 @@ void end_that_request_last(struct request *req) disk->in_flight--; } if (req->end_io) - req->end_io(req); + req->end_io(req, error); else __blk_put_request(req->q, req); } @@ -3212,7 +3220,7 @@ void end_request(struct request *req, int uptodate) if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) { add_disk_randomness(req->rq_disk); blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, uptodate); } } diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c index 70eaa5c..21097a3 100644 --- a/drivers/block/DAC960.c +++ b/drivers/block/DAC960.c @@ -3471,7 +3471,7 @@ static inline boolean DAC960_ProcessCompletedRequest(DAC960_Command_T *Command, if (!end_that_request_first(Request, UpToDate, Command->BlockCount)) { - end_that_request_last(Request); + end_that_request_last(Request, UpToDate); if (Command->Completion) { complete(Command->Completion); diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index c3441b3..d2815b7 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c @@ -2310,7 +2310,7 @@ static inline void complete_command( ctlr_info_t *h, CommandList_struct *cmd, printk("Done with %p\n", cmd->rq); #endif /* CCISS_DEBUG */ - end_that_request_last(cmd->rq); + end_that_request_last(cmd->rq, status ? 1 : -EIO); cmd_free(h,cmd,1); } diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c index cf1822a..9bddb68 100644 --- a/drivers/block/cpqarray.c +++ b/drivers/block/cpqarray.c @@ -1036,7 +1036,7 @@ static inline void complete_command(cmdlist_t *cmd, int timeout) complete_buffers(cmd->rq->bio, ok); DBGPX(printk("Done with %p\n", cmd->rq);); - end_that_request_last(cmd->rq); + end_that_request_last(cmd->rq, ok ? 1 : -EIO); } /* diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index f7e765a..a5b857c 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -2301,7 +2301,7 @@ static void floppy_end_request(struct request *req, int uptodate) add_disk_randomness(req->rq_disk); floppy_off((long)req->rq_disk->private_data); blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, uptodate); /* We're done with the request */ current_req = NULL; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 9e268dd..485345c 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -136,7 +136,7 @@ static void nbd_end_request(struct request *req) spin_lock_irqsave(q->queue_lock, flags); if (!end_that_request_first(req, uptodate, req->nr_sectors)) { - end_that_request_last(req); + end_that_request_last(req, uptodate); } spin_unlock_irqrestore(q->queue_lock, flags); } diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c index 1ded3b4..9251f41 100644 --- a/drivers/block/sx8.c +++ b/drivers/block/sx8.c @@ -770,7 +770,7 @@ static inline void carm_end_request_queued(struct carm_host *host, rc = end_that_request_first(req, uptodate, req->hard_nr_sectors); assert(rc == 0); - end_that_request_last(req); + end_that_request_last(req, uptodate); rc = carm_put_request(host, crq); assert(rc == 0); diff --git a/drivers/block/ub.c b/drivers/block/ub.c index 10740a0..a05fe58 100644 --- a/drivers/block/ub.c +++ b/drivers/block/ub.c @@ -951,7 +951,7 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd) static void ub_end_rq(struct request *rq, int uptodate) { end_that_request_first(rq, uptodate, rq->hard_nr_sectors); - end_that_request_last(rq); + end_that_request_last(rq, uptodate); } static int ub_rw_cmd_retry(struct ub_dev *sc, struct ub_lun *lun, diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c index 2d518aa..063f030 100644 --- a/drivers/block/viodasd.c +++ b/drivers/block/viodasd.c @@ -305,7 +305,7 @@ static void viodasd_end_request(struct request *req, int uptodate, if (end_that_request_first(req, uptodate, num_sectors)) return; add_disk_randomness(req->rq_disk); - end_that_request_last(req); + end_that_request_last(req, uptodate); } /* diff --git a/drivers/cdrom/cdu31a.c b/drivers/cdrom/cdu31a.c index ac96de1..378e88d 100644 --- a/drivers/cdrom/cdu31a.c +++ b/drivers/cdrom/cdu31a.c @@ -1402,7 +1402,7 @@ static void do_cdu31a_request(request_queue_t * q) if (!end_that_request_first(req, 1, nblock)) { spin_lock_irq(q->queue_lock); blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, 1); spin_unlock_irq(q->queue_lock); } continue; diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 70aeb3a..d31117e 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -614,7 +614,7 @@ static void cdrom_end_request (ide_drive_t *drive, int uptodate) */ spin_lock_irqsave(&ide_lock, flags); end_that_request_chunk(failed, 0, failed->data_len); - end_that_request_last(failed); + end_that_request_last(failed, 0); spin_unlock_irqrestore(&ide_lock, flags); } @@ -1735,7 +1735,7 @@ end_request: spin_lock_irqsave(&ide_lock, flags); blkdev_dequeue_request(rq); - end_that_request_last(rq); + end_that_request_last(rq, 1); HWGROUP(drive)->rq = NULL; spin_unlock_irqrestore(&ide_lock, flags); return ide_stopped; diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index ecfafcd..8435b44 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -89,7 +89,7 @@ int __ide_end_request(ide_drive_t *drive, struct request *rq, int uptodate, blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; - end_that_request_last(rq); + end_that_request_last(rq, uptodate); ret = 0; } return ret; @@ -247,7 +247,7 @@ static void ide_complete_pm_request (ide_drive_t *drive, struct request *rq) } blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; - end_that_request_last(rq); + end_that_request_last(rq, 1); spin_unlock_irqrestore(&ide_lock, flags); } @@ -379,7 +379,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err) blkdev_dequeue_request(rq); HWGROUP(drive)->rq = NULL; rq->errors = err; - end_that_request_last(rq); + end_that_request_last(rq, !rq->errors); spin_unlock_irqrestore(&ide_lock, flags); } diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c index f283b5b..4f52252 100644 --- a/drivers/message/i2o/i2o_block.c +++ b/drivers/message/i2o/i2o_block.c @@ -466,7 +466,7 @@ static void i2o_block_end_request(struct request *req, int uptodate, spin_lock_irqsave(q->queue_lock, flags); - end_that_request_last(req); + end_that_request_last(req, uptodate); if (likely(dev)) { dev->open_queue_depth--; diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c index abcf191..8e380c1 100644 --- a/drivers/mmc/mmc_block.c +++ b/drivers/mmc/mmc_block.c @@ -263,7 +263,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) */ add_disk_randomness(req->rq_disk); blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, 1); } spin_unlock_irq(&md->lock); } while (ret); @@ -289,7 +289,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) add_disk_randomness(req->rq_disk); blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, 0); spin_unlock_irq(&md->lock); return 0; diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 7008d32..fdb6138 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1035,7 +1035,7 @@ dasd_end_request(struct request *req, int uptodate) if (end_that_request_first(req, uptodate, req->hard_nr_sectors)) BUG(); add_disk_randomness(req->rq_disk); - end_that_request_last(req); + end_that_request_last(req, uptodate); } /* diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c index 1efc9f2..559d514 100644 --- a/drivers/s390/char/tape_block.c +++ b/drivers/s390/char/tape_block.c @@ -78,7 +78,7 @@ tapeblock_end_request(struct request *req, int uptodate) { if (end_that_request_first(req, uptodate, req->hard_nr_sectors)) BUG(); - end_that_request_last(req); + end_that_request_last(req, uptodate); } static void diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c index 4cb1f3e..3c688ef 100644 --- a/drivers/scsi/ide-scsi.c +++ b/drivers/scsi/ide-scsi.c @@ -1046,7 +1046,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd) /* kill current request */ blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, 0); if (req->flags & REQ_SENSE) kfree(scsi->pc->buffer); kfree(scsi->pc); @@ -1056,7 +1056,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd) /* now nuke the drive queue */ while ((req = elv_next_request(drive->queue))) { blkdev_dequeue_request(req); - end_that_request_last(req); + end_that_request_last(req, 0); } HWGROUP(drive)->rq = NULL; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index a7f3f0c..53551f1 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -791,7 +791,7 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate, spin_lock_irqsave(q->queue_lock, flags); if (blk_rq_tagged(req)) blk_queue_end_tag(q, req); - end_that_request_last(req); + end_that_request_last(req, uptodate); spin_unlock_irqrestore(q->queue_lock, flags); /* diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3d3ad7d..d651150 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -748,7 +748,7 @@ static void sd_end_flush(request_queue_t *q, struct request *flush_rq) * force journal abort of barriers */ end_that_request_first(rq, -EOPNOTSUPP, rq->hard_nr_sectors); - end_that_request_last(rq); + end_that_request_last(rq, -EOPNOTSUPP); } } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a18500d..a0ce8c5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -102,7 +102,7 @@ void copy_io_context(struct io_context **pdst, struct io_context **psrc); void swap_io_context(struct io_context **ioc1, struct io_context **ioc2); struct request; -typedef void (rq_end_io_fn)(struct request *); +typedef void (rq_end_io_fn)(struct request *, int); struct request_list { int count[2]; @@ -560,7 +560,7 @@ extern void register_disk(struct gendisk *dev); extern void generic_make_request(struct bio *bio); extern void blk_put_request(struct request *); extern void __blk_put_request(request_queue_t *, struct request *); -extern void blk_end_sync_rq(struct request *rq); +extern void blk_end_sync_rq(struct request *rq, int error); extern void blk_attempt_remerge(request_queue_t *, struct request *); extern struct request *blk_get_request(request_queue_t *, int, gfp_t); extern void blk_insert_request(request_queue_t *, struct request *, int, void *); @@ -614,7 +614,7 @@ static inline void blk_run_address_space(struct address_space *mapping) */ extern int end_that_request_first(struct request *, int, int); extern int end_that_request_chunk(struct request *, int, int); -extern void end_that_request_last(struct request *); +extern void end_that_request_last(struct request *, int); extern void end_request(struct request *req, int uptodate); /* -- cgit v0.10.2 From 52d9e675361261a1eb1716b02222ec6177ec342b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2006 09:49:58 +0100 Subject: [BLOCK] ll_rw_blk: separate out bio init part from __make_request Separate out bio initialization part from __make_request. It will be used by the following blk_ordered_reimpl. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 8b1ae69..65c4efc 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -36,6 +36,8 @@ static void blk_unplug_work(void *data); static void blk_unplug_timeout(unsigned long data); static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io); +static void init_request_from_bio(struct request *req, struct bio *bio); +static int __make_request(request_queue_t *q, struct bio *bio); /* * For the allocated request tables @@ -1667,8 +1669,6 @@ static int blk_init_free_list(request_queue_t *q) return 0; } -static int __make_request(request_queue_t *, struct bio *); - request_queue_t *blk_alloc_queue(gfp_t gfp_mask) { return blk_alloc_queue_node(gfp_mask, -1); @@ -2659,6 +2659,36 @@ void blk_attempt_remerge(request_queue_t *q, struct request *rq) EXPORT_SYMBOL(blk_attempt_remerge); +static void init_request_from_bio(struct request *req, struct bio *bio) +{ + req->flags |= REQ_CMD; + + /* + * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) + */ + if (bio_rw_ahead(bio) || bio_failfast(bio)) + req->flags |= REQ_FAILFAST; + + /* + * REQ_BARRIER implies no merging, but lets make it explicit + */ + if (unlikely(bio_barrier(bio))) + req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); + + req->errors = 0; + req->hard_sector = req->sector = bio->bi_sector; + req->hard_nr_sectors = req->nr_sectors = bio_sectors(bio); + req->current_nr_sectors = req->hard_cur_sectors = bio_cur_sectors(bio); + req->nr_phys_segments = bio_phys_segments(req->q, bio); + req->nr_hw_segments = bio_hw_segments(req->q, bio); + req->buffer = bio_data(bio); /* see ->buffer comment above */ + req->waiting = NULL; + req->bio = req->biotail = bio; + req->ioprio = bio_prio(bio); + req->rq_disk = bio->bi_bdev->bd_disk; + req->start_time = jiffies; +} + static int __make_request(request_queue_t *q, struct bio *bio) { struct request *req; @@ -2754,33 +2784,7 @@ get_rq: * We don't worry about that case for efficiency. It won't happen * often, and the elevators are able to handle it. */ - - req->flags |= REQ_CMD; - - /* - * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) - */ - if (bio_rw_ahead(bio) || bio_failfast(bio)) - req->flags |= REQ_FAILFAST; - - /* - * REQ_BARRIER implies no merging, but lets make it explicit - */ - if (unlikely(barrier)) - req->flags |= (REQ_HARDBARRIER | REQ_NOMERGE); - - req->errors = 0; - req->hard_sector = req->sector = sector; - req->hard_nr_sectors = req->nr_sectors = nr_sectors; - req->current_nr_sectors = req->hard_cur_sectors = cur_nr_sectors; - req->nr_phys_segments = bio_phys_segments(q, bio); - req->nr_hw_segments = bio_hw_segments(q, bio); - req->buffer = bio_data(bio); /* see ->buffer comment above */ - req->waiting = NULL; - req->bio = req->biotail = bio; - req->ioprio = prio; - req->rq_disk = bio->bi_bdev->bd_disk; - req->start_time = jiffies; + init_request_from_bio(req, bio); spin_lock_irq(q->queue_lock); if (elv_queue_empty(q)) -- cgit v0.10.2 From 797e7dbbee0a91fa1349192f18ad5c454997d876 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2006 09:51:03 +0100 Subject: [BLOCK] reimplement handling of barrier request Reimplement handling of barrier requests. * Flexible handling to deal with various capabilities of target devices. * Retry support for falling back. * Tagged queues which don't support ordered tag can do ordered. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/block/elevator.c b/block/elevator.c index 85a11ce..39dcccc 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -304,15 +304,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq) rq->flags &= ~REQ_STARTED; - /* - * if this is the flush, requeue the original instead and drop the flush - */ - if (rq->flags & REQ_BAR_FLUSH) { - clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); - rq = rq->end_io_data; - } - - __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); + __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0); } static void elv_drain_elevator(request_queue_t *q) @@ -332,8 +324,19 @@ static void elv_drain_elevator(request_queue_t *q) void __elv_add_request(request_queue_t *q, struct request *rq, int where, int plug) { + struct list_head *pos; + unsigned ordseq; + + if (q->ordcolor) + rq->flags |= REQ_ORDERED_COLOR; + if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { /* + * toggle ordered color + */ + q->ordcolor ^= 1; + + /* * barriers implicitly indicate back insertion */ if (where == ELEVATOR_INSERT_SORT) @@ -393,6 +396,30 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where, q->elevator->ops->elevator_add_req_fn(q, rq); break; + case ELEVATOR_INSERT_REQUEUE: + /* + * If ordered flush isn't in progress, we do front + * insertion; otherwise, requests should be requeued + * in ordseq order. + */ + rq->flags |= REQ_SOFTBARRIER; + + if (q->ordseq == 0) { + list_add(&rq->queuelist, &q->queue_head); + break; + } + + ordseq = blk_ordered_req_seq(rq); + + list_for_each(pos, &q->queue_head) { + struct request *pos_rq = list_entry_rq(pos); + if (ordseq <= blk_ordered_req_seq(pos_rq)) + break; + } + + list_add_tail(&rq->queuelist, pos); + break; + default: printk(KERN_ERR "%s: bad insertion point %d\n", __FUNCTION__, where); @@ -422,25 +449,16 @@ static inline struct request *__elv_next_request(request_queue_t *q) { struct request *rq; - if (unlikely(list_empty(&q->queue_head) && - !q->elevator->ops->elevator_dispatch_fn(q, 0))) - return NULL; - - rq = list_entry_rq(q->queue_head.next); - - /* - * if this is a barrier write and the device has to issue a - * flush sequence to support it, check how far we are - */ - if (blk_fs_request(rq) && blk_barrier_rq(rq)) { - BUG_ON(q->ordered == QUEUE_ORDERED_NONE); + while (1) { + while (!list_empty(&q->queue_head)) { + rq = list_entry_rq(q->queue_head.next); + if (blk_do_ordered(q, &rq)) + return rq; + } - if (q->ordered == QUEUE_ORDERED_FLUSH && - !blk_barrier_preflush(rq)) - rq = blk_start_pre_flush(q, rq); + if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) + return NULL; } - - return rq; } struct request *elv_next_request(request_queue_t *q) @@ -593,7 +611,21 @@ void elv_completed_request(request_queue_t *q, struct request *rq) * request is released from the driver, io must be done */ if (blk_account_rq(rq)) { + struct request *first_rq = list_entry_rq(q->queue_head.next); + q->in_flight--; + + /* + * Check if the queue is waiting for fs requests to be + * drained for flush sequence. + */ + if (q->ordseq && q->in_flight == 0 && + blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && + blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) { + blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); + q->request_fn(q); + } + if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) e->ops->elevator_completed_req_fn(q, rq); } diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 65c4efc..91d3b48 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -290,8 +290,8 @@ static inline void rq_init(request_queue_t *q, struct request *rq) /** * blk_queue_ordered - does this queue support ordered writes - * @q: the request queue - * @flag: see below + * @q: the request queue + * @ordered: one of QUEUE_ORDERED_* * * Description: * For journalled file systems, doing ordered writes on a commit @@ -300,28 +300,30 @@ static inline void rq_init(request_queue_t *q, struct request *rq) * feature should call this function and indicate so. * **/ -void blk_queue_ordered(request_queue_t *q, int flag) -{ - switch (flag) { - case QUEUE_ORDERED_NONE: - if (q->flush_rq) - kmem_cache_free(request_cachep, q->flush_rq); - q->flush_rq = NULL; - q->ordered = flag; - break; - case QUEUE_ORDERED_TAG: - q->ordered = flag; - break; - case QUEUE_ORDERED_FLUSH: - q->ordered = flag; - if (!q->flush_rq) - q->flush_rq = kmem_cache_alloc(request_cachep, - GFP_KERNEL); - break; - default: - printk("blk_queue_ordered: bad value %d\n", flag); - break; +int blk_queue_ordered(request_queue_t *q, unsigned ordered, + prepare_flush_fn *prepare_flush_fn) +{ + if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && + prepare_flush_fn == NULL) { + printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n"); + return -EINVAL; + } + + if (ordered != QUEUE_ORDERED_NONE && + ordered != QUEUE_ORDERED_DRAIN && + ordered != QUEUE_ORDERED_DRAIN_FLUSH && + ordered != QUEUE_ORDERED_DRAIN_FUA && + ordered != QUEUE_ORDERED_TAG && + ordered != QUEUE_ORDERED_TAG_FLUSH && + ordered != QUEUE_ORDERED_TAG_FUA) { + printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); + return -EINVAL; } + + q->next_ordered = ordered; + q->prepare_flush_fn = prepare_flush_fn; + + return 0; } EXPORT_SYMBOL(blk_queue_ordered); @@ -346,167 +348,265 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn); /* * Cache flushing for ordered writes handling */ -static void blk_pre_flush_end_io(struct request *flush_rq, int error) +inline unsigned blk_ordered_cur_seq(request_queue_t *q) { - struct request *rq = flush_rq->end_io_data; - request_queue_t *q = rq->q; - - elv_completed_request(q, flush_rq); - - rq->flags |= REQ_BAR_PREFLUSH; - - if (!flush_rq->errors) - elv_requeue_request(q, rq); - else { - q->end_flush_fn(q, flush_rq); - clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); - q->request_fn(q); - } + if (!q->ordseq) + return 0; + return 1 << ffz(q->ordseq); } -static void blk_post_flush_end_io(struct request *flush_rq, int error) +unsigned blk_ordered_req_seq(struct request *rq) { - struct request *rq = flush_rq->end_io_data; request_queue_t *q = rq->q; - elv_completed_request(q, flush_rq); + BUG_ON(q->ordseq == 0); - rq->flags |= REQ_BAR_POSTFLUSH; + if (rq == &q->pre_flush_rq) + return QUEUE_ORDSEQ_PREFLUSH; + if (rq == &q->bar_rq) + return QUEUE_ORDSEQ_BAR; + if (rq == &q->post_flush_rq) + return QUEUE_ORDSEQ_POSTFLUSH; - q->end_flush_fn(q, flush_rq); - clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); - q->request_fn(q); + if ((rq->flags & REQ_ORDERED_COLOR) == + (q->orig_bar_rq->flags & REQ_ORDERED_COLOR)) + return QUEUE_ORDSEQ_DRAIN; + else + return QUEUE_ORDSEQ_DONE; } -struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq) +void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error) { - struct request *flush_rq = q->flush_rq; - - BUG_ON(!blk_barrier_rq(rq)); + struct request *rq; + int uptodate; - if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags)) - return NULL; + if (error && !q->orderr) + q->orderr = error; - rq_init(q, flush_rq); - flush_rq->elevator_private = NULL; - flush_rq->flags = REQ_BAR_FLUSH; - flush_rq->rq_disk = rq->rq_disk; - flush_rq->rl = NULL; + BUG_ON(q->ordseq & seq); + q->ordseq |= seq; - /* - * prepare_flush returns 0 if no flush is needed, just mark both - * pre and post flush as done in that case - */ - if (!q->prepare_flush_fn(q, flush_rq)) { - rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH; - clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags); - return rq; - } + if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) + return; /* - * some drivers dequeue requests right away, some only after io - * completion. make sure the request is dequeued. + * Okay, sequence complete. */ - if (!list_empty(&rq->queuelist)) - blkdev_dequeue_request(rq); + rq = q->orig_bar_rq; + uptodate = q->orderr ? q->orderr : 1; - flush_rq->end_io_data = rq; - flush_rq->end_io = blk_pre_flush_end_io; + q->ordseq = 0; - __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); - return flush_rq; + end_that_request_first(rq, uptodate, rq->hard_nr_sectors); + end_that_request_last(rq, uptodate); } -static void blk_start_post_flush(request_queue_t *q, struct request *rq) +static void pre_flush_end_io(struct request *rq, int error) { - struct request *flush_rq = q->flush_rq; + elv_completed_request(rq->q, rq); + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); +} - BUG_ON(!blk_barrier_rq(rq)); +static void bar_end_io(struct request *rq, int error) +{ + elv_completed_request(rq->q, rq); + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); +} - rq_init(q, flush_rq); - flush_rq->elevator_private = NULL; - flush_rq->flags = REQ_BAR_FLUSH; - flush_rq->rq_disk = rq->rq_disk; - flush_rq->rl = NULL; +static void post_flush_end_io(struct request *rq, int error) +{ + elv_completed_request(rq->q, rq); + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); +} - if (q->prepare_flush_fn(q, flush_rq)) { - flush_rq->end_io_data = rq; - flush_rq->end_io = blk_post_flush_end_io; +static void queue_flush(request_queue_t *q, unsigned which) +{ + struct request *rq; + rq_end_io_fn *end_io; - __elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0); - q->request_fn(q); + if (which == QUEUE_ORDERED_PREFLUSH) { + rq = &q->pre_flush_rq; + end_io = pre_flush_end_io; + } else { + rq = &q->post_flush_rq; + end_io = post_flush_end_io; } + + rq_init(q, rq); + rq->flags = REQ_HARDBARRIER; + rq->elevator_private = NULL; + rq->rq_disk = q->bar_rq.rq_disk; + rq->rl = NULL; + rq->end_io = end_io; + q->prepare_flush_fn(q, rq); + + __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); } -static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq, - int sectors) +static inline struct request *start_ordered(request_queue_t *q, + struct request *rq) { - if (sectors > rq->nr_sectors) - sectors = rq->nr_sectors; + q->bi_size = 0; + q->orderr = 0; + q->ordered = q->next_ordered; + q->ordseq |= QUEUE_ORDSEQ_STARTED; + + /* + * Prep proxy barrier request. + */ + blkdev_dequeue_request(rq); + q->orig_bar_rq = rq; + rq = &q->bar_rq; + rq_init(q, rq); + rq->flags = bio_data_dir(q->orig_bar_rq->bio); + rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0; + rq->elevator_private = NULL; + rq->rl = NULL; + init_request_from_bio(rq, q->orig_bar_rq->bio); + rq->end_io = bar_end_io; + + /* + * Queue ordered sequence. As we stack them at the head, we + * need to queue in reverse order. Note that we rely on that + * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs + * request gets inbetween ordered sequence. + */ + if (q->ordered & QUEUE_ORDERED_POSTFLUSH) + queue_flush(q, QUEUE_ORDERED_POSTFLUSH); + else + q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; + + __elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0); + + if (q->ordered & QUEUE_ORDERED_PREFLUSH) { + queue_flush(q, QUEUE_ORDERED_PREFLUSH); + rq = &q->pre_flush_rq; + } else + q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; - rq->nr_sectors -= sectors; - return rq->nr_sectors; + if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) + q->ordseq |= QUEUE_ORDSEQ_DRAIN; + else + rq = NULL; + + return rq; } -static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq, - int sectors, int queue_locked) +int blk_do_ordered(request_queue_t *q, struct request **rqp) { - if (q->ordered != QUEUE_ORDERED_FLUSH) - return 0; - if (!blk_fs_request(rq) || !blk_barrier_rq(rq)) - return 0; - if (blk_barrier_postflush(rq)) - return 0; + struct request *rq = *rqp, *allowed_rq; + int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); - if (!blk_check_end_barrier(q, rq, sectors)) { - unsigned long flags = 0; + if (!q->ordseq) { + if (!is_barrier) + return 1; - if (!queue_locked) - spin_lock_irqsave(q->queue_lock, flags); + if (q->next_ordered != QUEUE_ORDERED_NONE) { + *rqp = start_ordered(q, rq); + return 1; + } else { + /* + * This can happen when the queue switches to + * ORDERED_NONE while this request is on it. + */ + blkdev_dequeue_request(rq); + end_that_request_first(rq, -EOPNOTSUPP, + rq->hard_nr_sectors); + end_that_request_last(rq, -EOPNOTSUPP); + *rqp = NULL; + return 0; + } + } - blk_start_post_flush(q, rq); + if (q->ordered & QUEUE_ORDERED_TAG) { + if (is_barrier && rq != &q->bar_rq) + *rqp = NULL; + return 1; + } - if (!queue_locked) - spin_unlock_irqrestore(q->queue_lock, flags); + switch (blk_ordered_cur_seq(q)) { + case QUEUE_ORDSEQ_PREFLUSH: + allowed_rq = &q->pre_flush_rq; + break; + case QUEUE_ORDSEQ_BAR: + allowed_rq = &q->bar_rq; + break; + case QUEUE_ORDSEQ_POSTFLUSH: + allowed_rq = &q->post_flush_rq; + break; + default: + allowed_rq = NULL; + break; } + if (rq != allowed_rq && + (blk_fs_request(rq) || rq == &q->pre_flush_rq || + rq == &q->post_flush_rq)) + *rqp = NULL; + return 1; } -/** - * blk_complete_barrier_rq - complete possible barrier request - * @q: the request queue for the device - * @rq: the request - * @sectors: number of sectors to complete - * - * Description: - * Used in driver end_io handling to determine whether to postpone - * completion of a barrier request until a post flush has been done. This - * is the unlocked variant, used if the caller doesn't already hold the - * queue lock. - **/ -int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors) +static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error) { - return __blk_complete_barrier_rq(q, rq, sectors, 0); + request_queue_t *q = bio->bi_private; + struct bio_vec *bvec; + int i; + + /* + * This is dry run, restore bio_sector and size. We'll finish + * this request again with the original bi_end_io after an + * error occurs or post flush is complete. + */ + q->bi_size += bytes; + + if (bio->bi_size) + return 1; + + /* Rewind bvec's */ + bio->bi_idx = 0; + bio_for_each_segment(bvec, bio, i) { + bvec->bv_len += bvec->bv_offset; + bvec->bv_offset = 0; + } + + /* Reset bio */ + set_bit(BIO_UPTODATE, &bio->bi_flags); + bio->bi_size = q->bi_size; + bio->bi_sector -= (q->bi_size >> 9); + q->bi_size = 0; + + return 0; } -EXPORT_SYMBOL(blk_complete_barrier_rq); -/** - * blk_complete_barrier_rq_locked - complete possible barrier request - * @q: the request queue for the device - * @rq: the request - * @sectors: number of sectors to complete - * - * Description: - * See blk_complete_barrier_rq(). This variant must be used if the caller - * holds the queue lock. - **/ -int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq, - int sectors) +static inline int ordered_bio_endio(struct request *rq, struct bio *bio, + unsigned int nbytes, int error) { - return __blk_complete_barrier_rq(q, rq, sectors, 1); + request_queue_t *q = rq->q; + bio_end_io_t *endio; + void *private; + + if (&q->bar_rq != rq) + return 0; + + /* + * Okay, this is the barrier request in progress, dry finish it. + */ + if (error && !q->orderr) + q->orderr = error; + + endio = bio->bi_end_io; + private = bio->bi_private; + bio->bi_end_io = flush_dry_bio_endio; + bio->bi_private = q; + + bio_endio(bio, nbytes, error); + + bio->bi_end_io = endio; + bio->bi_private = private; + + return 1; } -EXPORT_SYMBOL(blk_complete_barrier_rq_locked); /** * blk_queue_bounce_limit - set bounce buffer limit for queue @@ -1047,6 +1147,7 @@ static const char * const rq_flags[] = { "REQ_SORTED", "REQ_SOFTBARRIER", "REQ_HARDBARRIER", + "REQ_FUA", "REQ_CMD", "REQ_NOMERGE", "REQ_STARTED", @@ -1066,6 +1167,7 @@ static const char * const rq_flags[] = { "REQ_PM_SUSPEND", "REQ_PM_RESUME", "REQ_PM_SHUTDOWN", + "REQ_ORDERED_COLOR", }; void blk_dump_rq_flags(struct request *rq, char *msg) @@ -1643,8 +1745,6 @@ void blk_cleanup_queue(request_queue_t * q) if (q->queue_tags) __blk_queue_free_tags(q); - blk_queue_ordered(q, QUEUE_ORDERED_NONE); - kmem_cache_free(requestq_cachep, q); } @@ -2714,7 +2814,7 @@ static int __make_request(request_queue_t *q, struct bio *bio) spin_lock_prefetch(q->queue_lock); barrier = bio_barrier(bio); - if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) { + if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) { err = -EOPNOTSUPP; goto end_io; } @@ -3075,7 +3175,8 @@ static int __end_that_request_first(struct request *req, int uptodate, if (nr_bytes >= bio->bi_size) { req->bio = bio->bi_next; nbytes = bio->bi_size; - bio_endio(bio, nbytes, error); + if (!ordered_bio_endio(req, bio, nbytes, error)) + bio_endio(bio, nbytes, error); next_idx = 0; bio_nbytes = 0; } else { @@ -3130,7 +3231,8 @@ static int __end_that_request_first(struct request *req, int uptodate, * if the request wasn't completed, update state */ if (bio_nbytes) { - bio_endio(bio, bio_nbytes, error); + if (!ordered_bio_endio(req, bio, bio_nbytes, error)) + bio_endio(bio, bio_nbytes, error); bio->bi_idx += next_idx; bio_iovec(bio)->bv_offset += nr_bytes; bio_iovec(bio)->bv_len -= nr_bytes; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a0ce8c5..15db0f1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -207,6 +207,7 @@ enum rq_flag_bits { __REQ_SORTED, /* elevator knows about this request */ __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ __REQ_HARDBARRIER, /* may not be passed by drive either */ + __REQ_FUA, /* forced unit access */ __REQ_CMD, /* is a regular fs rw request */ __REQ_NOMERGE, /* don't touch this for merging */ __REQ_STARTED, /* drive already may have started this one */ @@ -230,9 +231,7 @@ enum rq_flag_bits { __REQ_PM_SUSPEND, /* suspend request */ __REQ_PM_RESUME, /* resume request */ __REQ_PM_SHUTDOWN, /* shutdown request */ - __REQ_BAR_PREFLUSH, /* barrier pre-flush done */ - __REQ_BAR_POSTFLUSH, /* barrier post-flush */ - __REQ_BAR_FLUSH, /* rq is the flush request */ + __REQ_ORDERED_COLOR, /* is before or after barrier */ __REQ_NR_BITS, /* stops here */ }; @@ -241,6 +240,7 @@ enum rq_flag_bits { #define REQ_SORTED (1 << __REQ_SORTED) #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) #define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) +#define REQ_FUA (1 << __REQ_FUA) #define REQ_CMD (1 << __REQ_CMD) #define REQ_NOMERGE (1 << __REQ_NOMERGE) #define REQ_STARTED (1 << __REQ_STARTED) @@ -260,9 +260,7 @@ enum rq_flag_bits { #define REQ_PM_SUSPEND (1 << __REQ_PM_SUSPEND) #define REQ_PM_RESUME (1 << __REQ_PM_RESUME) #define REQ_PM_SHUTDOWN (1 << __REQ_PM_SHUTDOWN) -#define REQ_BAR_PREFLUSH (1 << __REQ_BAR_PREFLUSH) -#define REQ_BAR_POSTFLUSH (1 << __REQ_BAR_POSTFLUSH) -#define REQ_BAR_FLUSH (1 << __REQ_BAR_FLUSH) +#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) /* * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME @@ -292,8 +290,7 @@ struct bio_vec; typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *); typedef void (activity_fn) (void *data, int rw); typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *); -typedef int (prepare_flush_fn) (request_queue_t *, struct request *); -typedef void (end_flush_fn) (request_queue_t *, struct request *); +typedef void (prepare_flush_fn) (request_queue_t *, struct request *); enum blk_queue_state { Queue_down, @@ -335,7 +332,6 @@ struct request_queue activity_fn *activity_fn; issue_flush_fn *issue_flush_fn; prepare_flush_fn *prepare_flush_fn; - end_flush_fn *end_flush_fn; /* * Dispatch queue sorting @@ -420,14 +416,11 @@ struct request_queue /* * reserved for flush operations */ - struct request *flush_rq; - unsigned char ordered; -}; - -enum { - QUEUE_ORDERED_NONE, - QUEUE_ORDERED_TAG, - QUEUE_ORDERED_FLUSH, + unsigned int ordered, next_ordered, ordseq; + int orderr, ordcolor; + struct request pre_flush_rq, bar_rq, post_flush_rq; + struct request *orig_bar_rq; + unsigned int bi_size; }; #define RQ_INACTIVE (-1) @@ -445,12 +438,51 @@ enum { #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ #define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ #define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ -#define QUEUE_FLAG_FLUSH 9 /* doing barrier flush sequence */ + +enum { + /* + * Hardbarrier is supported with one of the following methods. + * + * NONE : hardbarrier unsupported + * DRAIN : ordering by draining is enough + * DRAIN_FLUSH : ordering by draining w/ pre and post flushes + * DRAIN_FUA : ordering by draining w/ pre flush and FUA write + * TAG : ordering by tag is enough + * TAG_FLUSH : ordering by tag w/ pre and post flushes + * TAG_FUA : ordering by tag w/ pre flush and FUA write + */ + QUEUE_ORDERED_NONE = 0x00, + QUEUE_ORDERED_DRAIN = 0x01, + QUEUE_ORDERED_TAG = 0x02, + + QUEUE_ORDERED_PREFLUSH = 0x10, + QUEUE_ORDERED_POSTFLUSH = 0x20, + QUEUE_ORDERED_FUA = 0x40, + + QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, + QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, + QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH, + QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | + QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA, + + /* + * Ordered operation sequence + */ + QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ + QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ + QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ + QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ + QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ + QUEUE_ORDSEQ_DONE = 0x20, +}; #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) -#define blk_queue_flushing(q) test_bit(QUEUE_FLAG_FLUSH, &(q)->queue_flags) +#define blk_queue_flushing(q) ((q)->ordseq) #define blk_fs_request(rq) ((rq)->flags & REQ_CMD) #define blk_pc_request(rq) ((rq)->flags & REQ_BLOCK_PC) @@ -466,8 +498,7 @@ enum { #define blk_sorted_rq(rq) ((rq)->flags & REQ_SORTED) #define blk_barrier_rq(rq) ((rq)->flags & REQ_HARDBARRIER) -#define blk_barrier_preflush(rq) ((rq)->flags & REQ_BAR_PREFLUSH) -#define blk_barrier_postflush(rq) ((rq)->flags & REQ_BAR_POSTFLUSH) +#define blk_fua_rq(rq) ((rq)->flags & REQ_FUA) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) @@ -665,11 +696,12 @@ extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn); extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *); extern void blk_queue_dma_alignment(request_queue_t *, int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern void blk_queue_ordered(request_queue_t *, int); +extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *); extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *); -extern struct request *blk_start_pre_flush(request_queue_t *,struct request *); -extern int blk_complete_barrier_rq(request_queue_t *, struct request *, int); -extern int blk_complete_barrier_rq_locked(request_queue_t *, struct request *, int); +extern int blk_do_ordered(request_queue_t *, struct request **); +extern unsigned blk_ordered_cur_seq(request_queue_t *); +extern unsigned blk_ordered_req_seq(struct request *); +extern void blk_ordered_complete_seq(request_queue_t *, unsigned, int); extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index a74c27e..fb80fa4 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -130,6 +130,7 @@ extern int elv_try_last_merge(request_queue_t *, struct bio *); #define ELEVATOR_INSERT_FRONT 1 #define ELEVATOR_INSERT_BACK 2 #define ELEVATOR_INSERT_SORT 3 +#define ELEVATOR_INSERT_REQUEUE 4 /* * return values from elevator_may_queue_fn -- cgit v0.10.2 From 461d4e90c8cd049718884cd17c955e231140d3be Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2006 09:52:55 +0100 Subject: [BLOCK] update SCSI to use new blk_ordered for barriers All ordered request related stuff delegated to HLD. Midlayer now doens't deal with ordered setting or prepare_flush callback. sd.c updated to deal with blk_queue_ordered setting. Currently, ordered tag isn't used as SCSI midlayer cannot guarantee request ordering. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c index 5b9c2c5..66783c8 100644 --- a/drivers/scsi/hosts.c +++ b/drivers/scsi/hosts.c @@ -347,17 +347,8 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize) shost->cmd_per_lun = sht->cmd_per_lun; shost->unchecked_isa_dma = sht->unchecked_isa_dma; shost->use_clustering = sht->use_clustering; - shost->ordered_flush = sht->ordered_flush; shost->ordered_tag = sht->ordered_tag; - /* - * hosts/devices that do queueing must support ordered tags - */ - if (shost->can_queue > 1 && shost->ordered_flush) { - printk(KERN_ERR "scsi: ordered flushes don't support queueing\n"); - shost->ordered_flush = 0; - } - if (sht->max_host_blocked) shost->max_host_blocked = sht->max_host_blocked; else diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 53551f1..7a38b10 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -932,9 +932,6 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes, int sense_valid = 0; int sense_deferred = 0; - if (blk_complete_barrier_rq(q, req, good_bytes >> 9)) - return; - /* * Free up any indirection buffers we allocated for DMA purposes. * For the case of a READ, we need to copy the data out of the @@ -1199,38 +1196,6 @@ static int scsi_init_io(struct scsi_cmnd *cmd) return BLKPREP_KILL; } -static int scsi_prepare_flush_fn(request_queue_t *q, struct request *rq) -{ - struct scsi_device *sdev = q->queuedata; - struct scsi_driver *drv; - - if (sdev->sdev_state == SDEV_RUNNING) { - drv = *(struct scsi_driver **) rq->rq_disk->private_data; - - if (drv->prepare_flush) - return drv->prepare_flush(q, rq); - } - - return 0; -} - -static void scsi_end_flush_fn(request_queue_t *q, struct request *rq) -{ - struct scsi_device *sdev = q->queuedata; - struct request *flush_rq = rq->end_io_data; - struct scsi_driver *drv; - - if (flush_rq->errors) { - printk("scsi: barrier error, disabling flush support\n"); - blk_queue_ordered(q, QUEUE_ORDERED_NONE); - } - - if (sdev->sdev_state == SDEV_RUNNING) { - drv = *(struct scsi_driver **) rq->rq_disk->private_data; - drv->end_flush(q, rq); - } -} - static int scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk, sector_t *error_sector) { @@ -1703,17 +1668,6 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) blk_queue_segment_boundary(q, shost->dma_boundary); blk_queue_issue_flush_fn(q, scsi_issue_flush_fn); - /* - * ordered tags are superior to flush ordering - */ - if (shost->ordered_tag) - blk_queue_ordered(q, QUEUE_ORDERED_TAG); - else if (shost->ordered_flush) { - blk_queue_ordered(q, QUEUE_ORDERED_FLUSH); - q->prepare_flush_fn = scsi_prepare_flush_fn; - q->end_flush_fn = scsi_end_flush_fn; - } - if (!shost->use_clustering) clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); return q; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index d651150..2eefc9e 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -121,8 +121,7 @@ static void sd_shutdown(struct device *dev); static void sd_rescan(struct device *); static int sd_init_command(struct scsi_cmnd *); static int sd_issue_flush(struct device *, sector_t *); -static void sd_end_flush(request_queue_t *, struct request *); -static int sd_prepare_flush(request_queue_t *, struct request *); +static void sd_prepare_flush(request_queue_t *, struct request *); static void sd_read_capacity(struct scsi_disk *sdkp, char *diskname, unsigned char *buffer); @@ -137,8 +136,6 @@ static struct scsi_driver sd_template = { .rescan = sd_rescan, .init_command = sd_init_command, .issue_flush = sd_issue_flush, - .prepare_flush = sd_prepare_flush, - .end_flush = sd_end_flush, }; /* @@ -729,42 +726,13 @@ static int sd_issue_flush(struct device *dev, sector_t *error_sector) return ret; } -static void sd_end_flush(request_queue_t *q, struct request *flush_rq) +static void sd_prepare_flush(request_queue_t *q, struct request *rq) { - struct request *rq = flush_rq->end_io_data; - struct scsi_cmnd *cmd = rq->special; - unsigned int bytes = rq->hard_nr_sectors << 9; - - if (!flush_rq->errors) { - spin_unlock(q->queue_lock); - scsi_io_completion(cmd, bytes, 0); - spin_lock(q->queue_lock); - } else if (blk_barrier_postflush(rq)) { - spin_unlock(q->queue_lock); - scsi_io_completion(cmd, 0, bytes); - spin_lock(q->queue_lock); - } else { - /* - * force journal abort of barriers - */ - end_that_request_first(rq, -EOPNOTSUPP, rq->hard_nr_sectors); - end_that_request_last(rq, -EOPNOTSUPP); - } -} - -static int sd_prepare_flush(request_queue_t *q, struct request *rq) -{ - struct scsi_device *sdev = q->queuedata; - struct scsi_disk *sdkp = dev_get_drvdata(&sdev->sdev_gendev); - - if (!sdkp || !sdkp->WCE) - return 0; - memset(rq->cmd, 0, sizeof(rq->cmd)); - rq->flags |= REQ_BLOCK_PC | REQ_SOFTBARRIER; + rq->flags |= REQ_BLOCK_PC; rq->timeout = SD_TIMEOUT; rq->cmd[0] = SYNCHRONIZE_CACHE; - return 1; + rq->cmd_len = 10; } static void sd_rescan(struct device *dev) @@ -1462,6 +1430,7 @@ static int sd_revalidate_disk(struct gendisk *disk) struct scsi_disk *sdkp = scsi_disk(disk); struct scsi_device *sdp = sdkp->device; unsigned char *buffer; + unsigned ordered; SCSI_LOG_HLQUEUE(3, printk("sd_revalidate_disk: disk=%s\n", disk->disk_name)); @@ -1498,7 +1467,20 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_write_protect_flag(sdkp, disk->disk_name, buffer); sd_read_cache_type(sdkp, disk->disk_name, buffer); } - + + /* + * We now have all cache related info, determine how we deal + * with ordered requests. Note that as the current SCSI + * dispatch function can alter request order, we cannot use + * QUEUE_ORDERED_TAG_* even when ordered tag is supported. + */ + if (sdkp->WCE) + ordered = QUEUE_ORDERED_DRAIN_FLUSH; + else + ordered = QUEUE_ORDERED_DRAIN; + + blk_queue_ordered(sdkp->disk->queue, ordered, sd_prepare_flush); + set_capacity(disk, sdkp->capacity); kfree(buffer); @@ -1598,6 +1580,7 @@ static int sd_probe(struct device *dev) strcpy(gd->devfs_name, sdp->devfs_name); gd->private_data = &sdkp->driver; + gd->queue = sdkp->device->request_queue; sd_revalidate_disk(gd); @@ -1605,7 +1588,6 @@ static int sd_probe(struct device *dev) gd->flags = GENHD_FL_DRIVERFS; if (sdp->removable) gd->flags |= GENHD_FL_REMOVABLE; - gd->queue = sdkp->device->request_queue; dev_set_drvdata(dev, sdkp); add_disk(gd); diff --git a/include/scsi/scsi_driver.h b/include/scsi/scsi_driver.h index 850dfa8..02e26c1 100644 --- a/include/scsi/scsi_driver.h +++ b/include/scsi/scsi_driver.h @@ -15,7 +15,6 @@ struct scsi_driver { void (*rescan)(struct device *); int (*issue_flush)(struct device *, sector_t *); int (*prepare_flush)(struct request_queue *, struct request *); - void (*end_flush)(struct request_queue *, struct request *); }; #define to_scsi_driver(drv) \ container_of((drv), struct scsi_driver, gendrv) diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h index 6cbb198..25f637b 100644 --- a/include/scsi/scsi_host.h +++ b/include/scsi/scsi_host.h @@ -392,7 +392,6 @@ struct scsi_host_template { /* * ordered write support */ - unsigned ordered_flush:1; unsigned ordered_tag:1; /* -- cgit v0.10.2 From 007365ad60387df30f02f01fdc2b6e6432f6c265 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2006 09:53:52 +0100 Subject: [BLOCK] scsi: add FUA support to sd Add FUA support for barriers to SCSI disk. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 2eefc9e..32d4d8d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -102,6 +102,7 @@ struct scsi_disk { u8 write_prot; unsigned WCE : 1; /* state of disk WCE bit */ unsigned RCD : 1; /* state of disk RCD bit, unused */ + unsigned DPOFUA : 1; /* state of disk DPOFUA bit */ }; static DEFINE_IDR(sd_index_idr); @@ -343,6 +344,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt) if (block > 0xffffffff) { SCpnt->cmnd[0] += READ_16 - READ_6; + SCpnt->cmnd[1] |= blk_fua_rq(rq) ? 0x8 : 0; SCpnt->cmnd[2] = sizeof(block) > 4 ? (unsigned char) (block >> 56) & 0xff : 0; SCpnt->cmnd[3] = sizeof(block) > 4 ? (unsigned char) (block >> 48) & 0xff : 0; SCpnt->cmnd[4] = sizeof(block) > 4 ? (unsigned char) (block >> 40) & 0xff : 0; @@ -362,6 +364,7 @@ static int sd_init_command(struct scsi_cmnd * SCpnt) this_count = 0xffff; SCpnt->cmnd[0] += READ_10 - READ_6; + SCpnt->cmnd[1] |= blk_fua_rq(rq) ? 0x8 : 0; SCpnt->cmnd[2] = (unsigned char) (block >> 24) & 0xff; SCpnt->cmnd[3] = (unsigned char) (block >> 16) & 0xff; SCpnt->cmnd[4] = (unsigned char) (block >> 8) & 0xff; @@ -370,6 +373,17 @@ static int sd_init_command(struct scsi_cmnd * SCpnt) SCpnt->cmnd[7] = (unsigned char) (this_count >> 8) & 0xff; SCpnt->cmnd[8] = (unsigned char) this_count & 0xff; } else { + if (unlikely(blk_fua_rq(rq))) { + /* + * This happens only if this drive failed + * 10byte rw command with ILLEGAL_REQUEST + * during operation and thus turned off + * use_10_for_rw. + */ + printk(KERN_ERR "sd: FUA write on READ/WRITE(6) drive\n"); + return 0; + } + SCpnt->cmnd[1] |= (unsigned char) ((block >> 16) & 0x1f); SCpnt->cmnd[2] = (unsigned char) ((block >> 8) & 0xff); SCpnt->cmnd[3] = (unsigned char) block & 0xff; @@ -1395,10 +1409,18 @@ sd_read_cache_type(struct scsi_disk *sdkp, char *diskname, sdkp->RCD = 0; } + sdkp->DPOFUA = (data.device_specific & 0x10) != 0; + if (sdkp->DPOFUA && !sdkp->device->use_10_for_rw) { + printk(KERN_NOTICE "SCSI device %s: uses " + "READ/WRITE(6), disabling FUA\n", diskname); + sdkp->DPOFUA = 0; + } + ct = sdkp->RCD + 2*sdkp->WCE; - printk(KERN_NOTICE "SCSI device %s: drive cache: %s\n", - diskname, types[ct]); + printk(KERN_NOTICE "SCSI device %s: drive cache: %s%s\n", + diskname, types[ct], + sdkp->DPOFUA ? " w/ FUA" : ""); return; } @@ -1475,7 +1497,8 @@ static int sd_revalidate_disk(struct gendisk *disk) * QUEUE_ORDERED_TAG_* even when ordered tag is supported. */ if (sdkp->WCE) - ordered = QUEUE_ORDERED_DRAIN_FLUSH; + ordered = sdkp->DPOFUA + ? QUEUE_ORDERED_DRAIN_FUA : QUEUE_ORDERED_DRAIN_FLUSH; else ordered = QUEUE_ORDERED_DRAIN; -- cgit v0.10.2 From 93c9338713d4e11102cd09b4670ad42a336b06a3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2006 09:55:00 +0100 Subject: [BLOCK] update libata to use new blk_ordered for barriers Reflect changes in SCSI midlayer and updated to use new ordered request implementation Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c index 887eaa2..d113290 100644 --- a/drivers/scsi/ahci.c +++ b/drivers/scsi/ahci.c @@ -214,7 +214,6 @@ static struct scsi_host_template ahci_sht = { .dma_boundary = AHCI_DMA_BOUNDARY, .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, - .ordered_flush = 1, }; static const struct ata_port_operations ahci_ops = { diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c index 0ea2787..4b647ee 100644 --- a/drivers/scsi/ata_piix.c +++ b/drivers/scsi/ata_piix.c @@ -185,7 +185,6 @@ static struct scsi_host_template piix_sht = { .dma_boundary = ATA_DMA_BOUNDARY, .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, - .ordered_flush = 1, }; static const struct ata_port_operations piix_pata_ops = { diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c index b2bf16a..cd54244 100644 --- a/drivers/scsi/sata_mv.c +++ b/drivers/scsi/sata_mv.c @@ -374,7 +374,6 @@ static struct scsi_host_template mv_sht = { .dma_boundary = MV_DMA_BOUNDARY, .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, - .ordered_flush = 1, }; static const struct ata_port_operations mv5_ops = { diff --git a/drivers/scsi/sata_nv.c b/drivers/scsi/sata_nv.c index 4954896..c0cf52c 100644 --- a/drivers/scsi/sata_nv.c +++ b/drivers/scsi/sata_nv.c @@ -235,7 +235,6 @@ static struct scsi_host_template nv_sht = { .dma_boundary = ATA_DMA_BOUNDARY, .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, - .ordered_flush = 1, }; static const struct ata_port_operations nv_ops = { diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c index da7fa04..3d1ea09 100644 --- a/drivers/scsi/sata_promise.c +++ b/drivers/scsi/sata_promise.c @@ -114,7 +114,6 @@ static struct scsi_host_template pdc_ata_sht = { .dma_boundary = ATA_DMA_BOUNDARY, .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, - .ordered_flush = 1, }; static const struct ata_port_operations pdc_sata_ops = { diff --git a/drivers/scsi/sata_sil.c b/drivers/scsi/sata_sil.c index d205348..b017f85 100644 --- a/drivers/scsi/sata_sil.c +++ b/drivers/scsi/sata_sil.c @@ -147,7 +147,6 @@ static struct scsi_host_template sil_sht = { .dma_boundary = ATA_DMA_BOUNDARY, .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, - .ordered_flush = 1, }; static const struct ata_port_operations sil_ops = { diff --git a/drivers/scsi/sata_sil24.c b/drivers/scsi/sata_sil24.c index a0ad3ed..9231301 100644 --- a/drivers/scsi/sata_sil24.c +++ b/drivers/scsi/sata_sil24.c @@ -292,7 +292,6 @@ static struct scsi_host_template sil24_sht = { .dma_boundary = ATA_DMA_BOUNDARY, .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, - .ordered_flush = 1, /* NCQ not supported yet */ }; static const struct ata_port_operations sil24_ops = { diff --git a/drivers/scsi/sata_sis.c b/drivers/scsi/sata_sis.c index 32e1262..2df8c56 100644 --- a/drivers/scsi/sata_sis.c +++ b/drivers/scsi/sata_sis.c @@ -99,7 +99,6 @@ static struct scsi_host_template sis_sht = { .dma_boundary = ATA_DMA_BOUNDARY, .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, - .ordered_flush = 1, }; static const struct ata_port_operations sis_ops = { diff --git a/drivers/scsi/sata_svw.c b/drivers/scsi/sata_svw.c index 6e7f7c8..6683735 100644 --- a/drivers/scsi/sata_svw.c +++ b/drivers/scsi/sata_svw.c @@ -303,7 +303,6 @@ static struct scsi_host_template k2_sata_sht = { .proc_info = k2_sata_proc_info, #endif .bios_param = ata_std_bios_param, - .ordered_flush = 1, }; diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c index 94b253b..bc87c16 100644 --- a/drivers/scsi/sata_sx4.c +++ b/drivers/scsi/sata_sx4.c @@ -194,7 +194,6 @@ static struct scsi_host_template pdc_sata_sht = { .dma_boundary = ATA_DMA_BOUNDARY, .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, - .ordered_flush = 1, }; static const struct ata_port_operations pdc_20621_ops = { diff --git a/drivers/scsi/sata_uli.c b/drivers/scsi/sata_uli.c index b2422a0..9635ca7 100644 --- a/drivers/scsi/sata_uli.c +++ b/drivers/scsi/sata_uli.c @@ -87,7 +87,6 @@ static struct scsi_host_template uli_sht = { .dma_boundary = ATA_DMA_BOUNDARY, .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, - .ordered_flush = 1, }; static const struct ata_port_operations uli_ops = { diff --git a/drivers/scsi/sata_via.c b/drivers/scsi/sata_via.c index c762156..6d5b0a7 100644 --- a/drivers/scsi/sata_via.c +++ b/drivers/scsi/sata_via.c @@ -106,7 +106,6 @@ static struct scsi_host_template svia_sht = { .dma_boundary = ATA_DMA_BOUNDARY, .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, - .ordered_flush = 1, }; static const struct ata_port_operations svia_sata_ops = { diff --git a/drivers/scsi/sata_vsc.c b/drivers/scsi/sata_vsc.c index fcfa486..2e2c3b7 100644 --- a/drivers/scsi/sata_vsc.c +++ b/drivers/scsi/sata_vsc.c @@ -235,7 +235,6 @@ static struct scsi_host_template vsc_sata_sht = { .dma_boundary = ATA_DMA_BOUNDARY, .slave_configure = ata_scsi_slave_config, .bios_param = ata_std_bios_param, - .ordered_flush = 1, }; -- cgit v0.10.2 From 9a3dccc42556537a48f39ee9a9e7ab90a933f766 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2006 09:56:18 +0100 Subject: [BLOCK] add FUA support to libata Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c index 9ea1025..bdfb0a8 100644 --- a/drivers/scsi/libata-core.c +++ b/drivers/scsi/libata-core.c @@ -562,16 +562,28 @@ static const u8 ata_rw_cmds[] = { ATA_CMD_WRITE_MULTI, ATA_CMD_READ_MULTI_EXT, ATA_CMD_WRITE_MULTI_EXT, + 0, + 0, + 0, + ATA_CMD_WRITE_MULTI_FUA_EXT, /* pio */ ATA_CMD_PIO_READ, ATA_CMD_PIO_WRITE, ATA_CMD_PIO_READ_EXT, ATA_CMD_PIO_WRITE_EXT, + 0, + 0, + 0, + 0, /* dma */ ATA_CMD_READ, ATA_CMD_WRITE, ATA_CMD_READ_EXT, - ATA_CMD_WRITE_EXT + ATA_CMD_WRITE_EXT, + 0, + 0, + 0, + ATA_CMD_WRITE_FUA_EXT }; /** @@ -584,25 +596,32 @@ static const u8 ata_rw_cmds[] = { * LOCKING: * caller. */ -void ata_rwcmd_protocol(struct ata_queued_cmd *qc) +int ata_rwcmd_protocol(struct ata_queued_cmd *qc) { struct ata_taskfile *tf = &qc->tf; struct ata_device *dev = qc->dev; + u8 cmd; - int index, lba48, write; + int index, fua, lba48, write; + fua = (tf->flags & ATA_TFLAG_FUA) ? 4 : 0; lba48 = (tf->flags & ATA_TFLAG_LBA48) ? 2 : 0; write = (tf->flags & ATA_TFLAG_WRITE) ? 1 : 0; if (dev->flags & ATA_DFLAG_PIO) { tf->protocol = ATA_PROT_PIO; - index = dev->multi_count ? 0 : 4; + index = dev->multi_count ? 0 : 8; } else { tf->protocol = ATA_PROT_DMA; - index = 8; + index = 16; } - tf->command = ata_rw_cmds[index + lba48 + write]; + cmd = ata_rw_cmds[index + fua + lba48 + write]; + if (cmd) { + tf->command = cmd; + return 0; + } + return -1; } static const char * const xfer_mode_str[] = { diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c index e0439be..2c644cb 100644 --- a/drivers/scsi/libata-scsi.c +++ b/drivers/scsi/libata-scsi.c @@ -1080,11 +1080,13 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, const u8 *scsicm scsicmd[0] == WRITE_16) tf->flags |= ATA_TFLAG_WRITE; - /* Calculate the SCSI LBA and transfer length. */ + /* Calculate the SCSI LBA, transfer length and FUA. */ switch (scsicmd[0]) { case READ_10: case WRITE_10: scsi_10_lba_len(scsicmd, &block, &n_block); + if (unlikely(scsicmd[1] & (1 << 3))) + tf->flags |= ATA_TFLAG_FUA; break; case READ_6: case WRITE_6: @@ -1099,6 +1101,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, const u8 *scsicm case READ_16: case WRITE_16: scsi_16_lba_len(scsicmd, &block, &n_block); + if (unlikely(scsicmd[1] & (1 << 3))) + tf->flags |= ATA_TFLAG_FUA; break; default: DPRINTK("no-byte command\n"); @@ -1142,7 +1146,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, const u8 *scsicm tf->device |= (block >> 24) & 0xf; } - ata_rwcmd_protocol(qc); + if (unlikely(ata_rwcmd_protocol(qc) < 0)) + goto invalid_fld; qc->nsect = n_block; tf->nsect = n_block & 0xff; @@ -1160,7 +1165,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, const u8 *scsicm if ((block >> 28) || (n_block > 256)) goto out_of_range; - ata_rwcmd_protocol(qc); + if (unlikely(ata_rwcmd_protocol(qc) < 0)) + goto invalid_fld; /* Convert LBA to CHS */ track = (u32)block / dev->sectors; @@ -1695,6 +1701,7 @@ static unsigned int ata_msense_rw_recovery(u8 **ptr_io, const u8 *last) unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf, unsigned int buflen) { + struct ata_device *dev = args->dev; u8 *scsicmd = args->cmd->cmnd, *p, *last; const u8 sat_blk_desc[] = { 0, 0, 0, 0, /* number of blocks: sat unspecified */ @@ -1703,6 +1710,7 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf, }; u8 pg, spg; unsigned int ebd, page_control, six_byte, output_len, alloc_len, minlen; + u8 dpofua; VPRINTK("ENTER\n"); @@ -1771,9 +1779,17 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf, if (minlen < 1) return 0; + + dpofua = 0; + if (ata_id_has_fua(args->id) && dev->flags & ATA_DFLAG_LBA48 && + (!(dev->flags & ATA_DFLAG_PIO) || dev->multi_count)) + dpofua = 1 << 4; + if (six_byte) { output_len--; rbuf[0] = output_len; + if (minlen > 2) + rbuf[2] |= dpofua; if (ebd) { if (minlen > 3) rbuf[3] = sizeof(sat_blk_desc); @@ -1786,6 +1802,8 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf, rbuf[0] = output_len >> 8; if (minlen > 1) rbuf[1] = output_len; + if (minlen > 3) + rbuf[3] |= dpofua; if (ebd) { if (minlen > 7) rbuf[7] = sizeof(sat_blk_desc); @@ -2446,7 +2464,7 @@ int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) if (xlat_func) ata_scsi_translate(ap, dev, cmd, done, xlat_func); else - ata_scsi_simulate(dev->id, cmd, done); + ata_scsi_simulate(ap, dev, cmd, done); } else ata_scsi_translate(ap, dev, cmd, done, atapi_xlat); @@ -2469,14 +2487,16 @@ out_unlock: * spin_lock_irqsave(host_set lock) */ -void ata_scsi_simulate(u16 *id, +void ata_scsi_simulate(struct ata_port *ap, struct ata_device *dev, struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)) { struct ata_scsi_args args; const u8 *scsicmd = cmd->cmnd; - args.id = id; + args.ap = ap; + args.dev = dev; + args.id = dev->id; args.cmd = cmd; args.done = done; diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h index 251e53b..e03ce48 100644 --- a/drivers/scsi/libata.h +++ b/drivers/scsi/libata.h @@ -32,6 +32,8 @@ #define DRV_VERSION "1.20" /* must be exactly four chars */ struct ata_scsi_args { + struct ata_port *ap; + struct ata_device *dev; u16 *id; struct scsi_cmnd *cmd; void (*done)(struct scsi_cmnd *); @@ -41,7 +43,7 @@ struct ata_scsi_args { extern int atapi_enabled; extern struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap, struct ata_device *dev); -extern void ata_rwcmd_protocol(struct ata_queued_cmd *qc); +extern int ata_rwcmd_protocol(struct ata_queued_cmd *qc); extern void ata_qc_free(struct ata_queued_cmd *qc); extern int ata_qc_issue(struct ata_queued_cmd *qc); extern int ata_check_atapi_dma(struct ata_queued_cmd *qc); diff --git a/include/linux/ata.h b/include/linux/ata.h index d2873b7..f63dad4 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -129,6 +129,7 @@ enum { ATA_CMD_READ_EXT = 0x25, ATA_CMD_WRITE = 0xCA, ATA_CMD_WRITE_EXT = 0x35, + ATA_CMD_WRITE_FUA_EXT = 0x3D, ATA_CMD_PIO_READ = 0x20, ATA_CMD_PIO_READ_EXT = 0x24, ATA_CMD_PIO_WRITE = 0x30, @@ -137,6 +138,7 @@ enum { ATA_CMD_READ_MULTI_EXT = 0x29, ATA_CMD_WRITE_MULTI = 0xC5, ATA_CMD_WRITE_MULTI_EXT = 0x39, + ATA_CMD_WRITE_MULTI_FUA_EXT = 0xCE, ATA_CMD_SET_FEATURES = 0xEF, ATA_CMD_PACKET = 0xA0, ATA_CMD_VERIFY = 0x40, @@ -192,6 +194,7 @@ enum { ATA_TFLAG_DEVICE = (1 << 2), /* enable r/w to device reg */ ATA_TFLAG_WRITE = (1 << 3), /* data dir: host->dev==1 (write) */ ATA_TFLAG_LBA = (1 << 4), /* enable LBA */ + ATA_TFLAG_FUA = (1 << 5), /* enable FUA */ }; enum ata_tf_protocols { @@ -245,7 +248,8 @@ struct ata_taskfile { #define ata_id_is_sata(id) ((id)[93] == 0) #define ata_id_rahead_enabled(id) ((id)[85] & (1 << 6)) #define ata_id_wcache_enabled(id) ((id)[85] & (1 << 5)) -#define ata_id_has_flush(id) ((id)[83] & (1 << 12)) +#define ata_id_has_fua(id) ((id)[84] & (1 << 6)) +#define ata_id_has_flush(id) ((id)[83] & (1 << 12)) #define ata_id_has_flush_ext(id) ((id)[83] & (1 << 13)) #define ata_id_has_lba48(id) ((id)[83] & (1 << 10)) #define ata_id_has_wcache(id) ((id)[82] & (1 << 5)) diff --git a/include/linux/libata.h b/include/linux/libata.h index e828e17..6db2c08 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -480,7 +480,8 @@ extern u8 ata_bmdma_status(struct ata_port *ap); extern void ata_bmdma_irq_clear(struct ata_port *ap); extern void ata_qc_complete(struct ata_queued_cmd *qc); extern void ata_eng_timeout(struct ata_port *ap); -extern void ata_scsi_simulate(u16 *id, struct scsi_cmnd *cmd, +extern void ata_scsi_simulate(struct ata_port *ap, struct ata_device *dev, + struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *)); extern int ata_std_bios_param(struct scsi_device *sdev, struct block_device *bdev, -- cgit v0.10.2 From 3e087b575496b8aa445192f58e7d996b1cdfa121 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2006 09:57:31 +0100 Subject: [BLOCK] update IDE to use new blk_ordered for barriers Update IDE to use new blk_ordered. This change makes the following behavior changes. * Partial completion of the barrier request is handled as failure of the whole ordered sequence. No more partial completion for barrier requests. * Any failure of pre or post flush request results in failure of the whole ordered sequence. So, successfully completed ordered sequence guarantees that all requests prior to the barrier made to physical medium and, then, the while barrier request made to the physical medium. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 4e57679..4b44172 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c @@ -681,50 +681,9 @@ static ide_proc_entry_t idedisk_proc[] = { #endif /* CONFIG_PROC_FS */ -static void idedisk_end_flush(request_queue_t *q, struct request *flush_rq) +static void idedisk_prepare_flush(request_queue_t *q, struct request *rq) { ide_drive_t *drive = q->queuedata; - struct request *rq = flush_rq->end_io_data; - int good_sectors = rq->hard_nr_sectors; - int bad_sectors; - sector_t sector; - - if (flush_rq->errors & ABRT_ERR) { - printk(KERN_ERR "%s: barrier support doesn't work\n", drive->name); - blk_queue_ordered(drive->queue, QUEUE_ORDERED_NONE); - blk_queue_issue_flush_fn(drive->queue, NULL); - good_sectors = 0; - } else if (flush_rq->errors) { - good_sectors = 0; - if (blk_barrier_preflush(rq)) { - sector = ide_get_error_location(drive,flush_rq->buffer); - if ((sector >= rq->hard_sector) && - (sector < rq->hard_sector + rq->hard_nr_sectors)) - good_sectors = sector - rq->hard_sector; - } - } - - if (flush_rq->errors) - printk(KERN_ERR "%s: failed barrier write: " - "sector=%Lx(good=%d/bad=%d)\n", - drive->name, (unsigned long long)rq->sector, - good_sectors, - (int) (rq->hard_nr_sectors-good_sectors)); - - bad_sectors = rq->hard_nr_sectors - good_sectors; - - if (good_sectors) - __ide_end_request(drive, rq, 1, good_sectors); - if (bad_sectors) - __ide_end_request(drive, rq, 0, bad_sectors); -} - -static int idedisk_prepare_flush(request_queue_t *q, struct request *rq) -{ - ide_drive_t *drive = q->queuedata; - - if (!drive->wcache) - return 0; memset(rq->cmd, 0, sizeof(rq->cmd)); @@ -735,9 +694,8 @@ static int idedisk_prepare_flush(request_queue_t *q, struct request *rq) rq->cmd[0] = WIN_FLUSH_CACHE; - rq->flags |= REQ_DRIVE_TASK | REQ_SOFTBARRIER; + rq->flags |= REQ_DRIVE_TASK; rq->buffer = rq->cmd; - return 1; } static int idedisk_issue_flush(request_queue_t *q, struct gendisk *disk, @@ -794,27 +752,64 @@ static int set_nowerr(ide_drive_t *drive, int arg) return 0; } +static void update_ordered(ide_drive_t *drive) +{ + struct hd_driveid *id = drive->id; + unsigned ordered = QUEUE_ORDERED_NONE; + prepare_flush_fn *prep_fn = NULL; + issue_flush_fn *issue_fn = NULL; + + if (drive->wcache) { + unsigned long long capacity; + int barrier; + /* + * We must avoid issuing commands a drive does not + * understand or we may crash it. We check flush cache + * is supported. We also check we have the LBA48 flush + * cache if the drive capacity is too large. By this + * time we have trimmed the drive capacity if LBA48 is + * not available so we don't need to recheck that. + */ + capacity = idedisk_capacity(drive); + barrier = ide_id_has_flush_cache(id) && + (drive->addressing == 0 || capacity <= (1ULL << 28) || + ide_id_has_flush_cache_ext(id)); + + printk(KERN_INFO "%s: cache flushes %ssupported\n", + drive->name, barrier ? "" : "not"); + + if (barrier) { + ordered = QUEUE_ORDERED_DRAIN_FLUSH; + prep_fn = idedisk_prepare_flush; + issue_fn = idedisk_issue_flush; + } + } else + ordered = QUEUE_ORDERED_DRAIN; + + blk_queue_ordered(drive->queue, ordered, prep_fn); + blk_queue_issue_flush_fn(drive->queue, issue_fn); +} + static int write_cache(ide_drive_t *drive, int arg) { ide_task_t args; - int err; - - if (!ide_id_has_flush_cache(drive->id)) - return 1; + int err = 1; - memset(&args, 0, sizeof(ide_task_t)); - args.tfRegister[IDE_FEATURE_OFFSET] = (arg) ? + if (ide_id_has_flush_cache(drive->id)) { + memset(&args, 0, sizeof(ide_task_t)); + args.tfRegister[IDE_FEATURE_OFFSET] = (arg) ? SETFEATURES_EN_WCACHE : SETFEATURES_DIS_WCACHE; - args.tfRegister[IDE_COMMAND_OFFSET] = WIN_SETFEATURES; - args.command_type = IDE_DRIVE_TASK_NO_DATA; - args.handler = &task_no_data_intr; + args.tfRegister[IDE_COMMAND_OFFSET] = WIN_SETFEATURES; + args.command_type = IDE_DRIVE_TASK_NO_DATA; + args.handler = &task_no_data_intr; + err = ide_raw_taskfile(drive, &args, NULL); + if (err == 0) + drive->wcache = arg; + } - err = ide_raw_taskfile(drive, &args, NULL); - if (err) - return err; + update_ordered(drive); - drive->wcache = arg; - return 0; + return err; } static int do_idedisk_flushcache (ide_drive_t *drive) @@ -888,7 +883,6 @@ static void idedisk_setup (ide_drive_t *drive) { struct hd_driveid *id = drive->id; unsigned long long capacity; - int barrier; idedisk_add_settings(drive); @@ -992,31 +986,6 @@ static void idedisk_setup (ide_drive_t *drive) drive->wcache = 1; write_cache(drive, 1); - - /* - * We must avoid issuing commands a drive does not understand - * or we may crash it. We check flush cache is supported. We also - * check we have the LBA48 flush cache if the drive capacity is - * too large. By this time we have trimmed the drive capacity if - * LBA48 is not available so we don't need to recheck that. - */ - barrier = 0; - if (ide_id_has_flush_cache(id)) - barrier = 1; - if (drive->addressing == 1) { - /* Can't issue the correct flush ? */ - if (capacity > (1ULL << 28) && !ide_id_has_flush_cache_ext(id)) - barrier = 0; - } - - printk(KERN_INFO "%s: cache flushes %ssupported\n", - drive->name, barrier ? "" : "not "); - if (barrier) { - blk_queue_ordered(drive->queue, QUEUE_ORDERED_FLUSH); - drive->queue->prepare_flush_fn = idedisk_prepare_flush; - drive->queue->end_flush_fn = idedisk_end_flush; - blk_queue_issue_flush_fn(drive->queue, idedisk_issue_flush); - } } static void ide_cacheflush_p(ide_drive_t *drive) diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c index 8435b44..b5dc6df 100644 --- a/drivers/ide/ide-io.c +++ b/drivers/ide/ide-io.c @@ -119,10 +119,7 @@ int ide_end_request (ide_drive_t *drive, int uptodate, int nr_sectors) if (!nr_sectors) nr_sectors = rq->hard_cur_sectors; - if (blk_complete_barrier_rq_locked(drive->queue, rq, nr_sectors)) - ret = rq->nr_sectors != 0; - else - ret = __ide_end_request(drive, rq, uptodate, nr_sectors); + ret = __ide_end_request(drive, rq, uptodate, nr_sectors); spin_unlock_irqrestore(&ide_lock, flags); return ret; -- cgit v0.10.2 From ff5b8cf1491330836d75eede4e5632caa32b776a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 Jan 2006 09:58:37 +0100 Subject: [BLOCK] I/O barrier documentation update Update documentation to match new barrier implementation. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 303c57a..8e63831 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -263,14 +263,8 @@ A flag in the bio structure, BIO_BARRIER is used to identify a barrier i/o. The generic i/o scheduler would make sure that it places the barrier request and all other requests coming after it after all the previous requests in the queue. Barriers may be implemented in different ways depending on the -driver. A SCSI driver for example could make use of ordered tags to -preserve the necessary ordering with a lower impact on throughput. For IDE -this might be two sync cache flush: a pre and post flush when encountering -a barrier write. - -There is a provision for queues to indicate what kind of barriers they -can provide. This is as of yet unmerged, details will be added here once it -is in the kernel. +driver. For more details regarding I/O barriers, please read barrier.txt +in this directory. 1.2.2 Request Priority/Latency -- cgit v0.10.2 From 15fc858a0067c800f410a24551a7b461978abf0b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 Jan 2006 10:00:50 +0100 Subject: [BLOCK] Correct blk_execute_rq_nowait() prototype diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 15db0f1..fb09853 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -613,8 +613,7 @@ extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_io extern int blk_execute_rq(request_queue_t *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *, - struct request *, int, - void (*done)(struct request *)); + struct request *, int, rq_end_io_fn *); static inline request_queue_t *bdev_get_queue(struct block_device *bdev) { -- cgit v0.10.2 From e650c305ec3178818b317dad37a6d9c7fa8ba28d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 Jan 2006 12:38:30 +0100 Subject: [SCSI] scsi_end_async() needs to take an uptodate parameter Signed-off-by: Jens Axboe diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 7a38b10..ba93d6e 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -308,7 +308,7 @@ struct scsi_io_context { static kmem_cache_t *scsi_io_context_cache; -static void scsi_end_async(struct request *req) +static void scsi_end_async(struct request *req, int uptodate) { struct scsi_io_context *sioc = req->end_io_data; -- cgit v0.10.2