diff options
author | Steve French <sfrench@us.ibm.com> | 2009-02-03 15:19:23 (GMT) |
---|---|---|
committer | Steve French <sfrench@us.ibm.com> | 2009-02-03 15:19:23 (GMT) |
commit | e1f81c8a417be466e85a38b61679aa6860ec7331 (patch) | |
tree | 749815f74bfad330e83560a10fd5da5fc4d8c765 /block | |
parent | 0e2bedaa394f74fa9f75ee937488c33d90039b5a (diff) | |
parent | b1792e367053968f2ddb48bc911d314143ce6242 (diff) | |
download | linux-e1f81c8a417be466e85a38b61679aa6860ec7331.tar.xz |
Merge branch 'master' of /pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-barrier.c | 2 | ||||
-rw-r--r-- | block/blk-core.c | 100 | ||||
-rw-r--r-- | block/blk-integrity.c | 25 | ||||
-rw-r--r-- | block/blk-sysfs.c | 58 | ||||
-rw-r--r-- | block/blk.h | 8 | ||||
-rw-r--r-- | block/blktrace.c | 72 | ||||
-rw-r--r-- | block/cfq-iosched.c | 39 |
7 files changed, 202 insertions, 102 deletions
diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 8eba4e4..f7dae57 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -302,7 +302,7 @@ static void bio_end_empty_barrier(struct bio *bio, int err) * Description: * Issue a flush for the block device in question. Caller can supply * room for storing the error offset in case of a flush error, if they - * wish to. Caller must run wait_for_completion() on its own. + * wish to. */ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) { diff --git a/block/blk-core.c b/block/blk-core.c index a824e49..29bcfac 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -64,11 +64,12 @@ static struct workqueue_struct *kblockd_workqueue; static void drive_stat_acct(struct request *rq, int new_io) { + struct gendisk *disk = rq->rq_disk; struct hd_struct *part; int rw = rq_data_dir(rq); int cpu; - if (!blk_fs_request(rq) || !rq->rq_disk) + if (!blk_fs_request(rq) || !disk || !blk_do_io_stat(disk->queue)) return; cpu = part_stat_lock(); @@ -599,8 +600,7 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) q->request_fn = rfn; q->prep_rq_fn = NULL; q->unplug_fn = generic_unplug_device; - q->queue_flags = (1 << QUEUE_FLAG_CLUSTER | - 1 << QUEUE_FLAG_STACKABLE); + q->queue_flags = QUEUE_FLAG_DEFAULT; q->queue_lock = lock; blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK); @@ -1125,6 +1125,8 @@ void init_request_from_bio(struct request *req, struct bio *bio) if (bio_sync(bio)) req->cmd_flags |= REQ_RW_SYNC; + if (bio_unplug(bio)) + req->cmd_flags |= REQ_UNPLUG; if (bio_rw_meta(bio)) req->cmd_flags |= REQ_RW_META; @@ -1141,6 +1143,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) int el_ret, nr_sectors; const unsigned short prio = bio_prio(bio); const int sync = bio_sync(bio); + const int unplug = bio_unplug(bio); int rw_flags; nr_sectors = bio_sectors(bio); @@ -1244,7 +1247,7 @@ get_rq: blk_plug_device(q); add_request(q, req); out: - if (sync || blk_queue_nonrot(q)) + if (unplug || blk_queue_nonrot(q)) __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); return 0; @@ -1448,6 +1451,11 @@ static inline void __generic_make_request(struct bio *bio) err = -EOPNOTSUPP; goto end_io; } + if (bio_barrier(bio) && bio_has_data(bio) && + (q->next_ordered == QUEUE_ORDERED_NONE)) { + err = -EOPNOTSUPP; + goto end_io; + } ret = q->make_request_fn(q, bio); } while (ret); @@ -1655,6 +1663,55 @@ void blkdev_dequeue_request(struct request *req) } EXPORT_SYMBOL(blkdev_dequeue_request); +static void blk_account_io_completion(struct request *req, unsigned int bytes) +{ + struct gendisk *disk = req->rq_disk; + + if (!disk || !blk_do_io_stat(disk->queue)) + return; + + if (blk_fs_request(req)) { + const int rw = rq_data_dir(req); + struct hd_struct *part; + int cpu; + + cpu = part_stat_lock(); + part = disk_map_sector_rcu(req->rq_disk, req->sector); + part_stat_add(cpu, part, sectors[rw], bytes >> 9); + part_stat_unlock(); + } +} + +static void blk_account_io_done(struct request *req) +{ + struct gendisk *disk = req->rq_disk; + + if (!disk || !blk_do_io_stat(disk->queue)) + return; + + /* + * Account IO completion. bar_rq isn't accounted as a normal + * IO on queueing nor completion. Accounting the containing + * request is enough. + */ + if (blk_fs_request(req) && req != &req->q->bar_rq) { + unsigned long duration = jiffies - req->start_time; + const int rw = rq_data_dir(req); + struct hd_struct *part; + int cpu; + + cpu = part_stat_lock(); + part = disk_map_sector_rcu(disk, req->sector); + + part_stat_inc(cpu, part, ios[rw]); + part_stat_add(cpu, part, ticks[rw], duration); + part_round_stats(cpu, part); + part_dec_in_flight(part); + + part_stat_unlock(); + } +} + /** * __end_that_request_first - end I/O on a request * @req: the request being processed @@ -1690,16 +1747,7 @@ static int __end_that_request_first(struct request *req, int error, (unsigned long long)req->sector); } - if (blk_fs_request(req) && req->rq_disk) { - const int rw = rq_data_dir(req); - struct hd_struct *part; - int cpu; - - cpu = part_stat_lock(); - part = disk_map_sector_rcu(req->rq_disk, req->sector); - part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9); - part_stat_unlock(); - } + blk_account_io_completion(req, nr_bytes); total_bytes = bio_nbytes = 0; while ((bio = req->bio) != NULL) { @@ -1779,8 +1827,6 @@ static int __end_that_request_first(struct request *req, int error, */ static void end_that_request_last(struct request *req, int error) { - struct gendisk *disk = req->rq_disk; - if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); @@ -1792,27 +1838,7 @@ static void end_that_request_last(struct request *req, int error) blk_delete_timer(req); - /* - * Account IO completion. bar_rq isn't accounted as a normal - * IO on queueing nor completion. Accounting the containing - * request is enough. - */ - if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { - unsigned long duration = jiffies - req->start_time; - const int rw = rq_data_dir(req); - struct hd_struct *part; - int cpu; - - cpu = part_stat_lock(); - part = disk_map_sector_rcu(disk, req->sector); - - part_stat_inc(cpu, part, ios[rw]); - part_stat_add(cpu, part, ticks[rw], duration); - part_round_stats(cpu, part); - part_dec_in_flight(part); - - part_stat_unlock(); - } + blk_account_io_done(req); if (req->end_io) req->end_io(req, error); diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 61a8e2f..91fa8e0 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c @@ -309,24 +309,24 @@ static struct kobj_type integrity_ktype = { /** * blk_integrity_register - Register a gendisk as being integrity-capable * @disk: struct gendisk pointer to make integrity-aware - * @template: integrity profile + * @template: optional integrity profile to register * * Description: When a device needs to advertise itself as being able * to send/receive integrity metadata it must use this function to * register the capability with the block layer. The template is a * blk_integrity struct with values appropriate for the underlying - * hardware. See Documentation/block/data-integrity.txt. + * hardware. If template is NULL the new profile is allocated but + * not filled out. See Documentation/block/data-integrity.txt. */ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) { struct blk_integrity *bi; BUG_ON(disk == NULL); - BUG_ON(template == NULL); if (disk->integrity == NULL) { bi = kmem_cache_alloc(integrity_cachep, - GFP_KERNEL | __GFP_ZERO); + GFP_KERNEL | __GFP_ZERO); if (!bi) return -1; @@ -346,13 +346,16 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) bi = disk->integrity; /* Use the provided profile as template */ - bi->name = template->name; - bi->generate_fn = template->generate_fn; - bi->verify_fn = template->verify_fn; - bi->tuple_size = template->tuple_size; - bi->set_tag_fn = template->set_tag_fn; - bi->get_tag_fn = template->get_tag_fn; - bi->tag_size = template->tag_size; + if (template != NULL) { + bi->name = template->name; + bi->generate_fn = template->generate_fn; + bi->verify_fn = template->verify_fn; + bi->tuple_size = template->tuple_size; + bi->set_tag_fn = template->set_tag_fn; + bi->get_tag_fn = template->get_tag_fn; + bi->tag_size = template->tag_size; + } else + bi->name = "unsupported"; return 0; } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index a29cb78..e29ddfc 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -130,6 +130,27 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) return queue_var_show(max_hw_sectors_kb, (page)); } +static ssize_t queue_nonrot_show(struct request_queue *q, char *page) +{ + return queue_var_show(!blk_queue_nonrot(q), page); +} + +static ssize_t queue_nonrot_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long nm; + ssize_t ret = queue_var_store(&nm, page, count); + + spin_lock_irq(q->queue_lock); + if (nm) + queue_flag_clear(QUEUE_FLAG_NONROT, q); + else + queue_flag_set(QUEUE_FLAG_NONROT, q); + spin_unlock_irq(q->queue_lock); + + return ret; +} + static ssize_t queue_nomerges_show(struct request_queue *q, char *page) { return queue_var_show(blk_queue_nomerges(q), page); @@ -146,8 +167,8 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, queue_flag_set(QUEUE_FLAG_NOMERGES, q); else queue_flag_clear(QUEUE_FLAG_NOMERGES, q); - spin_unlock_irq(q->queue_lock); + return ret; } @@ -176,6 +197,27 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) return ret; } +static ssize_t queue_iostats_show(struct request_queue *q, char *page) +{ + return queue_var_show(blk_queue_io_stat(q), page); +} + +static ssize_t queue_iostats_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long stats; + ssize_t ret = queue_var_store(&stats, page, count); + + spin_lock_irq(q->queue_lock); + if (stats) + queue_flag_set(QUEUE_FLAG_IO_STAT, q); + else + queue_flag_clear(QUEUE_FLAG_IO_STAT, q); + spin_unlock_irq(q->queue_lock); + + return ret; +} + static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .show = queue_requests_show, @@ -210,6 +252,12 @@ static struct queue_sysfs_entry queue_hw_sector_size_entry = { .show = queue_hw_sector_size_show, }; +static struct queue_sysfs_entry queue_nonrot_entry = { + .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, + .show = queue_nonrot_show, + .store = queue_nonrot_store, +}; + static struct queue_sysfs_entry queue_nomerges_entry = { .attr = {.name = "nomerges", .mode = S_IRUGO | S_IWUSR }, .show = queue_nomerges_show, @@ -222,6 +270,12 @@ static struct queue_sysfs_entry queue_rq_affinity_entry = { .store = queue_rq_affinity_store, }; +static struct queue_sysfs_entry queue_iostats_entry = { + .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR }, + .show = queue_iostats_show, + .store = queue_iostats_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -229,8 +283,10 @@ static struct attribute *default_attrs[] = { &queue_max_sectors_entry.attr, &queue_iosched_entry.attr, &queue_hw_sector_size_entry.attr, + &queue_nonrot_entry.attr, &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, + &queue_iostats_entry.attr, NULL, }; diff --git a/block/blk.h b/block/blk.h index 6e1ed40..0dce92c 100644 --- a/block/blk.h +++ b/block/blk.h @@ -108,4 +108,12 @@ static inline int blk_cpu_to_group(int cpu) #endif } +static inline int blk_do_io_stat(struct request_queue *q) +{ + if (q) + return blk_queue_io_stat(q); + + return 0; +} + #endif diff --git a/block/blktrace.c b/block/blktrace.c index b0a2cae..39cc3bf 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -187,59 +187,12 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, static struct dentry *blk_tree_root; static DEFINE_MUTEX(blk_tree_mutex); -static unsigned int root_users; - -static inline void blk_remove_root(void) -{ - if (blk_tree_root) { - debugfs_remove(blk_tree_root); - blk_tree_root = NULL; - } -} - -static void blk_remove_tree(struct dentry *dir) -{ - mutex_lock(&blk_tree_mutex); - debugfs_remove(dir); - if (--root_users == 0) - blk_remove_root(); - mutex_unlock(&blk_tree_mutex); -} - -static struct dentry *blk_create_tree(const char *blk_name) -{ - struct dentry *dir = NULL; - int created = 0; - - mutex_lock(&blk_tree_mutex); - - if (!blk_tree_root) { - blk_tree_root = debugfs_create_dir("block", NULL); - if (!blk_tree_root) - goto err; - created = 1; - } - - dir = debugfs_create_dir(blk_name, blk_tree_root); - if (dir) - root_users++; - else { - /* Delete root only if we created it */ - if (created) - blk_remove_root(); - } - -err: - mutex_unlock(&blk_tree_mutex); - return dir; -} static void blk_trace_cleanup(struct blk_trace *bt) { - relay_close(bt->rchan); debugfs_remove(bt->msg_file); debugfs_remove(bt->dropped_file); - blk_remove_tree(bt->dir); + relay_close(bt->rchan); free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); @@ -346,7 +299,18 @@ static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, static int blk_remove_buf_file_callback(struct dentry *dentry) { + struct dentry *parent = dentry->d_parent; debugfs_remove(dentry); + + /* + * this will fail for all but the last file, but that is ok. what we + * care about is the top level buts->name directory going away, when + * the last trace file is gone. Then we don't have to rmdir() that + * manually on trace stop, so it nicely solves the issue with + * force killing of running traces. + */ + + debugfs_remove(parent); return 0; } @@ -404,7 +368,15 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, goto err; ret = -ENOENT; - dir = blk_create_tree(buts->name); + + if (!blk_tree_root) { + blk_tree_root = debugfs_create_dir("block", NULL); + if (!blk_tree_root) + return -ENOMEM; + } + + dir = debugfs_create_dir(buts->name, blk_tree_root); + if (!dir) goto err; @@ -458,8 +430,6 @@ probe_err: atomic_dec(&blk_probes_ref); mutex_unlock(&blk_probe_mutex); err: - if (dir) - blk_remove_tree(dir); if (bt) { if (bt->msg_file) debugfs_remove(bt->msg_file); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e8525fa..664ebfd 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -84,6 +84,11 @@ struct cfq_data { */ struct cfq_rb_root service_tree; unsigned int busy_queues; + /* + * Used to track any pending rt requests so we can pre-empt current + * non-RT cfqq in service when this value is non-zero. + */ + unsigned int busy_rt_queues; int rq_in_driver; int sync_flight; @@ -562,6 +567,8 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(cfq_cfqq_on_rr(cfqq)); cfq_mark_cfqq_on_rr(cfqq); cfqd->busy_queues++; + if (cfq_class_rt(cfqq)) + cfqd->busy_rt_queues++; cfq_resort_rr_list(cfqd, cfqq); } @@ -581,6 +588,8 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; + if (cfq_class_rt(cfqq)) + cfqd->busy_rt_queues--; } /* @@ -1005,6 +1014,20 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) goto expire; /* + * If we have a RT cfqq waiting, then we pre-empt the current non-rt + * cfqq. + */ + if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues) { + /* + * We simulate this as cfqq timed out so that it gets to bank + * the remaining of its time slice. + */ + cfq_log_cfqq(cfqd, cfqq, "preempt"); + cfq_slice_expired(cfqd, 1); + goto new_queue; + } + + /* * The active queue has requests and isn't expired, allow it to * dispatch. */ @@ -1067,6 +1090,13 @@ __cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (RB_EMPTY_ROOT(&cfqq->sort_list)) break; + /* + * If there is a non-empty RT cfqq waiting for current + * cfqq's timeslice to complete, pre-empt this cfqq + */ + if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues) + break; + } while (dispatched < max_dispatch); /* @@ -1801,6 +1831,12 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, if (rq_is_meta(rq) && !cfqq->meta_pending) return 1; + /* + * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. + */ + if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) + return 1; + if (!cfqd->active_cic || !cfq_cfqq_wait_request(cfqq)) return 0; @@ -1870,7 +1906,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, /* * not the active queue - expire current slice if it is * idle and has expired it's mean thinktime or this new queue - * has some old slice time left and is of higher priority + * has some old slice time left and is of higher priority or + * this new queue is RT and the current one is BE */ cfq_preempt_queue(cfqd, cfqq); cfq_mark_cfqq_must_dispatch(cfqq); |