summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c42
-rw-r--r--block/blk-settings.c8
-rw-r--r--block/blktrace.c31
-rw-r--r--block/cfq-iosched.c36
-rw-r--r--block/compat_ioctl.c2
-rw-r--r--block/elevator.c2
-rw-r--r--block/genhd.c11
7 files changed, 93 insertions, 39 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 2987fe4..1905aab 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -482,6 +482,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
kobject_init(&q->kobj, &blk_queue_ktype);
mutex_init(&q->sysfs_lock);
+ spin_lock_init(&q->__queue_lock);
return q;
}
@@ -544,10 +545,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
* if caller didn't supply a lock, they get per-queue locking with
* our embedded lock
*/
- if (!lock) {
- spin_lock_init(&q->__queue_lock);
+ if (!lock)
lock = &q->__queue_lock;
- }
q->request_fn = rfn;
q->prep_rq_fn = NULL;
@@ -807,35 +806,32 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags,
rq = get_request(q, rw_flags, bio, GFP_NOIO);
while (!rq) {
DEFINE_WAIT(wait);
+ struct io_context *ioc;
struct request_list *rl = &q->rq;
prepare_to_wait_exclusive(&rl->wait[rw], &wait,
TASK_UNINTERRUPTIBLE);
- rq = get_request(q, rw_flags, bio, GFP_NOIO);
-
- if (!rq) {
- struct io_context *ioc;
-
- blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
+ blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ);
- __generic_unplug_device(q);
- spin_unlock_irq(q->queue_lock);
- io_schedule();
+ __generic_unplug_device(q);
+ spin_unlock_irq(q->queue_lock);
+ io_schedule();
- /*
- * After sleeping, we become a "batching" process and
- * will be able to allocate at least one request, and
- * up to a big batch of them for a small period time.
- * See ioc_batching, ioc_set_batching
- */
- ioc = current_io_context(GFP_NOIO, q->node);
- ioc_set_batching(q, ioc);
+ /*
+ * After sleeping, we become a "batching" process and
+ * will be able to allocate at least one request, and
+ * up to a big batch of them for a small period time.
+ * See ioc_batching, ioc_set_batching
+ */
+ ioc = current_io_context(GFP_NOIO, q->node);
+ ioc_set_batching(q, ioc);
- spin_lock_irq(q->queue_lock);
- }
+ spin_lock_irq(q->queue_lock);
finish_wait(&rl->wait[rw], &wait);
- }
+
+ rq = get_request(q, rw_flags, bio, GFP_NOIO);
+ };
return rq;
}
diff --git a/block/blk-settings.c b/block/blk-settings.c
index bb93d4c..8dd8641 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -286,8 +286,14 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b)
t->max_hw_segments = min(t->max_hw_segments, b->max_hw_segments);
t->max_segment_size = min(t->max_segment_size, b->max_segment_size);
t->hardsect_size = max(t->hardsect_size, b->hardsect_size);
- if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags))
+ if (!t->queue_lock)
+ WARN_ON_ONCE(1);
+ else if (!test_bit(QUEUE_FLAG_CLUSTER, &b->queue_flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(t->queue_lock, flags);
queue_flag_clear(QUEUE_FLAG_CLUSTER, t);
+ spin_unlock_irqrestore(t->queue_lock, flags);
+ }
}
EXPORT_SYMBOL(blk_queue_stack_limits);
diff --git a/block/blktrace.c b/block/blktrace.c
index 568588c..8d3a277 100644
--- a/block/blktrace.c
+++ b/block/blktrace.c
@@ -75,6 +75,24 @@ static void trace_note_time(struct blk_trace *bt)
local_irq_restore(flags);
}
+void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
+{
+ int n;
+ va_list args;
+ unsigned long flags;
+ char *buf;
+
+ local_irq_save(flags);
+ buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
+ va_start(args, fmt);
+ n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
+ va_end(args);
+
+ trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(__trace_note_message);
+
static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
pid_t pid)
{
@@ -141,10 +159,7 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
/*
* A word about the locking here - we disable interrupts to reserve
* some space in the relay per-cpu buffer, to prevent an irq
- * from coming in and stepping on our toes. Once reserved, it's
- * enough to get preemption disabled to prevent read of this data
- * before we are through filling it. get_cpu()/put_cpu() does this
- * for us
+ * from coming in and stepping on our toes.
*/
local_irq_save(flags);
@@ -232,6 +247,7 @@ static void blk_trace_cleanup(struct blk_trace *bt)
debugfs_remove(bt->dropped_file);
blk_remove_tree(bt->dir);
free_percpu(bt->sequence);
+ free_percpu(bt->msg_data);
kfree(bt);
}
@@ -346,6 +362,10 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
if (!bt->sequence)
goto err;
+ bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG);
+ if (!bt->msg_data)
+ goto err;
+
ret = -ENOENT;
dir = blk_create_tree(buts->name);
if (!dir)
@@ -392,6 +412,7 @@ err:
if (bt->dropped_file)
debugfs_remove(bt->dropped_file);
free_percpu(bt->sequence);
+ free_percpu(bt->msg_data);
if (bt->rchan)
relay_close(bt->rchan);
kfree(bt);
@@ -476,7 +497,7 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
switch (cmd) {
case BLKTRACESETUP:
- strcpy(b, bdevname(bdev, b));
+ bdevname(bdev, b);
ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
break;
case BLKTRACESTART:
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index b399c62..d01b411 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -124,6 +124,8 @@ struct cfq_data {
struct cfq_queue {
/* reference count */
atomic_t ref;
+ /* various state flags, see below */
+ unsigned int flags;
/* parent cfq_data */
struct cfq_data *cfqd;
/* service_tree member */
@@ -138,14 +140,14 @@ struct cfq_queue {
int queued[2];
/* currently allocated requests */
int allocated[2];
- /* pending metadata requests */
- int meta_pending;
/* fifo list of requests in sort_list */
struct list_head fifo;
unsigned long slice_end;
long slice_resid;
+ /* pending metadata requests */
+ int meta_pending;
/* number of requests that are on the dispatch list or inside driver */
int dispatched;
@@ -153,8 +155,6 @@ struct cfq_queue {
unsigned short ioprio, org_ioprio;
unsigned short ioprio_class, org_ioprio_class;
- /* various state flags, see below */
- unsigned int flags;
};
enum cfqq_state_flags {
@@ -1142,6 +1142,9 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
kmem_cache_free(cfq_pool, cfqq);
}
+/*
+ * Must always be called with the rcu_read_lock() held
+ */
static void
__call_for_each_cic(struct io_context *ioc,
void (*func)(struct io_context *, struct cfq_io_context *))
@@ -1197,6 +1200,11 @@ static void cic_free_func(struct io_context *ioc, struct cfq_io_context *cic)
cfq_cic_free(cic);
}
+/*
+ * Must be called with rcu_read_lock() held or preemption otherwise disabled.
+ * Only two callers of this - ->dtor() which is called with the rcu_read_lock(),
+ * and ->trim() which is called with the task lock held
+ */
static void cfq_free_io_context(struct io_context *ioc)
{
/*
@@ -1502,20 +1510,24 @@ static struct cfq_io_context *
cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
{
struct cfq_io_context *cic;
+ unsigned long flags;
void *k;
if (unlikely(!ioc))
return NULL;
+ rcu_read_lock();
+
/*
* we maintain a last-hit cache, to avoid browsing over the tree
*/
cic = rcu_dereference(ioc->ioc_data);
- if (cic && cic->key == cfqd)
+ if (cic && cic->key == cfqd) {
+ rcu_read_unlock();
return cic;
+ }
do {
- rcu_read_lock();
cic = radix_tree_lookup(&ioc->radix_root, (unsigned long) cfqd);
rcu_read_unlock();
if (!cic)
@@ -1524,10 +1536,13 @@ cfq_cic_lookup(struct cfq_data *cfqd, struct io_context *ioc)
k = cic->key;
if (unlikely(!k)) {
cfq_drop_dead_cic(cfqd, ioc, cic);
+ rcu_read_lock();
continue;
}
+ spin_lock_irqsave(&ioc->lock, flags);
rcu_assign_pointer(ioc->ioc_data, cic);
+ spin_unlock_irqrestore(&ioc->lock, flags);
break;
} while (1);
@@ -2134,6 +2149,10 @@ static void *cfq_init_queue(struct request_queue *q)
static void cfq_slab_kill(void)
{
+ /*
+ * Caller already ensured that pending RCU callbacks are completed,
+ * so we should have no busy allocations at this point.
+ */
if (cfq_pool)
kmem_cache_destroy(cfq_pool);
if (cfq_ioc_pool)
@@ -2292,6 +2311,11 @@ static void __exit cfq_exit(void)
ioc_gone = &all_gone;
/* ioc_gone's update must be visible before reading ioc_count */
smp_wmb();
+
+ /*
+ * this also protects us from entering cfq_slab_kill() with
+ * pending RCU callbacks
+ */
if (elv_ioc_count_read(ioc_count))
wait_for_completion(ioc_gone);
cfq_slab_kill();
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index c70d0b6..c23177e 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -555,7 +555,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg)
if (copy_from_user(&cbuts, arg, sizeof(cbuts)))
return -EFAULT;
- strcpy(b, bdevname(bdev, b));
+ bdevname(bdev, b);
buts = (struct blk_user_trace_setup) {
.act_mask = cbuts.act_mask,
diff --git a/block/elevator.c b/block/elevator.c
index 980f8ae..902dd13 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -1110,6 +1110,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q);
spin_unlock_irq(q->queue_lock);
+ blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name);
+
return 1;
fail_register:
diff --git a/block/genhd.c b/block/genhd.c
index fda9c7a..b922d48 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -653,15 +653,21 @@ void genhd_media_change_notify(struct gendisk *disk)
EXPORT_SYMBOL_GPL(genhd_media_change_notify);
#endif /* 0 */
-dev_t blk_lookup_devt(const char *name)
+dev_t blk_lookup_devt(const char *name, int part)
{
struct device *dev;
dev_t devt = MKDEV(0, 0);
mutex_lock(&block_class_lock);
list_for_each_entry(dev, &block_class.devices, node) {
+ if (dev->type != &disk_type)
+ continue;
if (strcmp(dev->bus_id, name) == 0) {
- devt = dev->devt;
+ struct gendisk *disk = dev_to_disk(dev);
+
+ if (part < disk->minors)
+ devt = MKDEV(MAJOR(dev->devt),
+ MINOR(dev->devt) + part);
break;
}
}
@@ -669,7 +675,6 @@ dev_t blk_lookup_devt(const char *name)
return devt;
}
-
EXPORT_SYMBOL(blk_lookup_devt);
struct gendisk *alloc_disk(int minors)