From dbf9782c1078c537831201c73ac60c9623ae9370 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 16 Dec 2014 18:44:36 -0500 Subject: dm: remove exports for request-based interfaces without external callers Remove exports for dm_dispatch_request, dm_requeue_unmapped_request, and dm_kill_unmapped_request. Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 71e6b73..5920a9a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1062,7 +1062,7 @@ static void dm_unprep_request(struct request *rq) /* * Requeue the original request of a clone. */ -void dm_requeue_unmapped_request(struct request *clone) +static void dm_requeue_unmapped_request(struct request *clone) { int rw = rq_data_dir(clone); struct dm_rq_target_io *tio = clone->end_io_data; @@ -1079,7 +1079,6 @@ void dm_requeue_unmapped_request(struct request *clone) rq_completed(md, rw, 0); } -EXPORT_SYMBOL_GPL(dm_requeue_unmapped_request); static void __stop_queue(struct request_queue *q) { @@ -1177,7 +1176,7 @@ static void dm_complete_request(struct request *clone, int error) * Target's rq_end_io() function isn't called. * This may be used when the target's map_rq() function fails. */ -void dm_kill_unmapped_request(struct request *clone, int error) +static void dm_kill_unmapped_request(struct request *clone, int error) { struct dm_rq_target_io *tio = clone->end_io_data; struct request *rq = tio->orig; @@ -1185,7 +1184,6 @@ void dm_kill_unmapped_request(struct request *clone, int error) rq->cmd_flags |= REQ_FAILED; dm_complete_request(clone, error); } -EXPORT_SYMBOL_GPL(dm_kill_unmapped_request); /* * Called with the queue lock held @@ -1686,7 +1684,7 @@ static void dm_request(struct request_queue *q, struct bio *bio) _dm_request(q, bio); } -void dm_dispatch_request(struct request *rq) +static void dm_dispatch_request(struct request *rq) { int r; @@ -1698,7 +1696,6 @@ void dm_dispatch_request(struct request *rq) if (r) dm_complete_request(rq, r); } -EXPORT_SYMBOL_GPL(dm_dispatch_request); static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, void *data) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index ca6d2ac..19296fb 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -600,9 +600,6 @@ static inline unsigned long to_bytes(sector_t n) /*----------------------------------------------------------------- * Helper for block layer and dm core operations *---------------------------------------------------------------*/ -void dm_dispatch_request(struct request *rq); -void dm_requeue_unmapped_request(struct request *rq); -void dm_kill_unmapped_request(struct request *rq, int error); int dm_underlying_device_busy(struct request_queue *q); #endif /* _LINUX_DEVICE_MAPPER_H */ -- cgit v0.10.2 From 1ae49ea2cf3ef097d4496981261a400f1f988b84 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 5 Dec 2014 17:11:05 -0500 Subject: dm: split request structure out from dm_rq_target_io structure Request-based DM support for blk-mq devices requires that dm_rq_target_io structures not be allocated with an embedded request structure. The request-based DM target (e.g. dm-multipath) must allocate the request from the blk-mq devices' request_queue using blk_get_request(). The unfortunate side-effect of this change is old-style request-based DM support will no longer use contiguous memory for the dm_rq_target_io and request structures for each clone. Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5920a9a..9a857e3 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -78,7 +78,7 @@ struct dm_io { struct dm_rq_target_io { struct mapped_device *md; struct dm_target *ti; - struct request *orig, clone; + struct request *orig, *clone; int error; union map_info info; }; @@ -179,6 +179,7 @@ struct mapped_device { * io objects are allocated from here. */ mempool_t *io_pool; + mempool_t *rq_pool; struct bio_set *bs; @@ -214,6 +215,7 @@ struct mapped_device { */ struct dm_md_mempools { mempool_t *io_pool; + mempool_t *rq_pool; struct bio_set *bs; }; @@ -228,6 +230,7 @@ struct table_device { #define RESERVED_MAX_IOS 1024 static struct kmem_cache *_io_cache; static struct kmem_cache *_rq_tio_cache; +static struct kmem_cache *_rq_cache; /* * Bio-based DM's mempools' reserved IOs set by the user. @@ -285,9 +288,14 @@ static int __init local_init(void) if (!_rq_tio_cache) goto out_free_io_cache; + _rq_cache = kmem_cache_create("dm_clone_request", sizeof(struct request), + __alignof__(struct request), 0, NULL); + if (!_rq_cache) + goto out_free_rq_tio_cache; + r = dm_uevent_init(); if (r) - goto out_free_rq_tio_cache; + goto out_free_rq_cache; deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1); if (!deferred_remove_workqueue) { @@ -309,6 +317,8 @@ out_free_workqueue: destroy_workqueue(deferred_remove_workqueue); out_uevent_exit: dm_uevent_exit(); +out_free_rq_cache: + kmem_cache_destroy(_rq_cache); out_free_rq_tio_cache: kmem_cache_destroy(_rq_tio_cache); out_free_io_cache: @@ -322,6 +332,7 @@ static void local_exit(void) flush_scheduled_work(); destroy_workqueue(deferred_remove_workqueue); + kmem_cache_destroy(_rq_cache); kmem_cache_destroy(_rq_tio_cache); kmem_cache_destroy(_io_cache); unregister_blkdev(_major, _name); @@ -574,6 +585,17 @@ static void free_rq_tio(struct dm_rq_target_io *tio) mempool_free(tio, tio->md->io_pool); } +static struct request *alloc_clone_request(struct mapped_device *md, + gfp_t gfp_mask) +{ + return mempool_alloc(md->rq_pool, gfp_mask); +} + +static void free_clone_request(struct mapped_device *md, struct request *rq) +{ + mempool_free(rq, md->rq_pool); +} + static int md_in_flight(struct mapped_device *md) { return atomic_read(&md->pending[READ]) + @@ -1017,6 +1039,7 @@ static void free_rq_clone(struct request *clone) struct dm_rq_target_io *tio = clone->end_io_data; blk_rq_unprep_clone(clone); + free_clone_request(tio->md, clone); free_rq_tio(tio); } @@ -1712,12 +1735,11 @@ static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, } static int setup_clone(struct request *clone, struct request *rq, - struct dm_rq_target_io *tio) + struct dm_rq_target_io *tio, gfp_t gfp_mask) { int r; - blk_rq_init(NULL, clone); - r = blk_rq_prep_clone(clone, rq, tio->md->bs, GFP_ATOMIC, + r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask, dm_rq_bio_constructor, tio); if (r) return r; @@ -1728,9 +1750,29 @@ static int setup_clone(struct request *clone, struct request *rq, clone->end_io = end_clone_request; clone->end_io_data = tio; + tio->clone = clone; + return 0; } +static struct request *__clone_rq(struct request *rq, struct mapped_device *md, + struct dm_rq_target_io *tio, gfp_t gfp_mask) +{ + struct request *clone = alloc_clone_request(md, gfp_mask); + + if (!clone) + return NULL; + + blk_rq_init(NULL, clone); + if (setup_clone(clone, rq, tio, gfp_mask)) { + /* -ENOMEM */ + free_clone_request(md, clone); + return NULL; + } + + return clone; +} + static struct request *clone_rq(struct request *rq, struct mapped_device *md, gfp_t gfp_mask) { @@ -1743,13 +1785,13 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md, tio->md = md; tio->ti = NULL; + tio->clone = NULL; tio->orig = rq; tio->error = 0; memset(&tio->info, 0, sizeof(tio->info)); - clone = &tio->clone; - if (setup_clone(clone, rq, tio)) { - /* -ENOMEM */ + clone = __clone_rq(rq, md, tio, GFP_ATOMIC); + if (!clone) { free_rq_tio(tio); return NULL; } @@ -2149,6 +2191,8 @@ static void free_dev(struct mapped_device *md) destroy_workqueue(md->wq); if (md->io_pool) mempool_destroy(md->io_pool); + if (md->rq_pool) + mempool_destroy(md->rq_pool); if (md->bs) bioset_free(md->bs); blk_integrity_unregister(md->disk); @@ -2195,10 +2239,12 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) goto out; } - BUG_ON(!p || md->io_pool || md->bs); + BUG_ON(!p || md->io_pool || md->rq_pool || md->bs); md->io_pool = p->io_pool; p->io_pool = NULL; + md->rq_pool = p->rq_pool; + p->rq_pool = NULL; md->bs = p->bs; p->bs = NULL; @@ -3129,6 +3175,9 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u } else if (type == DM_TYPE_REQUEST_BASED) { cachep = _rq_tio_cache; pool_size = dm_get_reserved_rq_based_ios(); + pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); + if (!pools->rq_pool) + goto out; front_pad = offsetof(struct dm_rq_clone_bio_info, clone); /* per_bio_data_size is not used. See __bind_mempools(). */ WARN_ON(per_bio_data_size != 0); @@ -3162,6 +3211,9 @@ void dm_free_md_mempools(struct dm_md_mempools *pools) if (pools->io_pool) mempool_destroy(pools->io_pool); + if (pools->rq_pool) + mempool_destroy(pools->rq_pool); + if (pools->bs) bioset_free(pools->bs); -- cgit v0.10.2 From 2eb6e1e3aa873f2bb62075bebe17fa108ee07374 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 17 Oct 2014 17:46:36 -0600 Subject: dm: submit stacked requests in irq enabled context Switch to having request-based DM enqueue all prep'ed requests into work processed by another thread. This allows request-based DM to invoke block APIs that assume interrupt enabled context (e.g. blk_get_request) and is a prerequisite for adding blk-mq support to request-based DM. The new kernel thread is only initialized for request-based DM devices. multipath_map() is now always in irq enabled context so change multipath spinlock (m->lock) locking to always disable interrupts. Signed-off-by: Keith Busch Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 7b6b0f0..2552b88 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -384,12 +384,11 @@ static int multipath_map(struct dm_target *ti, struct request *clone, struct multipath *m = (struct multipath *) ti->private; int r = DM_MAPIO_REQUEUE; size_t nr_bytes = blk_rq_bytes(clone); - unsigned long flags; struct pgpath *pgpath; struct block_device *bdev; struct dm_mpath_io *mpio; - spin_lock_irqsave(&m->lock, flags); + spin_lock_irq(&m->lock); /* Do we need to select a new pgpath? */ if (!m->current_pgpath || @@ -411,21 +410,26 @@ static int multipath_map(struct dm_target *ti, struct request *clone, /* ENOMEM, requeue */ goto out_unlock; + mpio = map_context->ptr; + mpio->pgpath = pgpath; + mpio->nr_bytes = nr_bytes; + bdev = pgpath->path.dev->bdev; + clone->q = bdev_get_queue(bdev); clone->rq_disk = bdev->bd_disk; clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; - mpio = map_context->ptr; - mpio->pgpath = pgpath; - mpio->nr_bytes = nr_bytes; + + spin_unlock_irq(&m->lock); + if (pgpath->pg->ps.type->start_io) pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path, nr_bytes); - r = DM_MAPIO_REMAPPED; + return DM_MAPIO_REMAPPED; out_unlock: - spin_unlock_irqrestore(&m->lock, flags); + spin_unlock_irq(&m->lock); return r; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 9a857e3..f0e3407 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -20,6 +20,7 @@ #include #include #include +#include #include @@ -79,6 +80,7 @@ struct dm_rq_target_io { struct mapped_device *md; struct dm_target *ti; struct request *orig, *clone; + struct kthread_work work; int error; union map_info info; }; @@ -208,6 +210,9 @@ struct mapped_device { struct bio flush_bio; struct dm_stats stats; + + struct kthread_worker kworker; + struct task_struct *kworker_task; }; /* @@ -1773,6 +1778,8 @@ static struct request *__clone_rq(struct request *rq, struct mapped_device *md, return clone; } +static void map_tio_request(struct kthread_work *work); + static struct request *clone_rq(struct request *rq, struct mapped_device *md, gfp_t gfp_mask) { @@ -1789,6 +1796,7 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md, tio->orig = rq; tio->error = 0; memset(&tio->info, 0, sizeof(tio->info)); + init_kthread_work(&tio->work, map_tio_request); clone = __clone_rq(rq, md, tio, GFP_ATOMIC); if (!clone) { @@ -1833,7 +1841,6 @@ static int map_request(struct dm_target *ti, struct request *clone, int r, requeued = 0; struct dm_rq_target_io *tio = clone->end_io_data; - tio->ti = ti; r = ti->type->map_rq(ti, clone, &tio->info); switch (r) { case DM_MAPIO_SUBMITTED: @@ -1864,6 +1871,13 @@ static int map_request(struct dm_target *ti, struct request *clone, return requeued; } +static void map_tio_request(struct kthread_work *work) +{ + struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work); + + map_request(tio->ti, tio->clone, tio->md); +} + static struct request *dm_start_request(struct mapped_device *md, struct request *orig) { struct request *clone; @@ -1895,6 +1909,7 @@ static void dm_request_fn(struct request_queue *q) struct dm_table *map = dm_get_live_table(md, &srcu_idx); struct dm_target *ti; struct request *rq, *clone; + struct dm_rq_target_io *tio; sector_t pos; /* @@ -1930,20 +1945,15 @@ static void dm_request_fn(struct request_queue *q) clone = dm_start_request(md, rq); - spin_unlock(q->queue_lock); - if (map_request(ti, clone, md)) - goto requeued; - + tio = rq->special; + /* Establish tio->ti before queuing work (map_tio_request) */ + tio->ti = ti; + queue_kthread_work(&md->kworker, &tio->work); BUG_ON(!irqs_disabled()); - spin_lock(q->queue_lock); } goto out; -requeued: - BUG_ON(!irqs_disabled()); - spin_lock(q->queue_lock); - delay_and_out: blk_delay_queue(q, HZ / 10); out: @@ -2129,6 +2139,7 @@ static struct mapped_device *alloc_dev(int minor) INIT_WORK(&md->work, dm_wq_work); init_waitqueue_head(&md->eventq); init_completion(&md->kobj_holder.completion); + md->kworker_task = NULL; md->disk->major = _major; md->disk->first_minor = minor; @@ -2189,6 +2200,9 @@ static void free_dev(struct mapped_device *md) unlock_fs(md); bdput(md->bdev); destroy_workqueue(md->wq); + + if (md->kworker_task) + kthread_stop(md->kworker_task); if (md->io_pool) mempool_destroy(md->io_pool); if (md->rq_pool) @@ -2484,6 +2498,11 @@ static int dm_init_request_based_queue(struct mapped_device *md) blk_queue_prep_rq(md->queue, dm_prep_fn); blk_queue_lld_busy(md->queue, dm_lld_busy); + /* Also initialize the request-based DM worker thread */ + init_kthread_worker(&md->kworker); + md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, + "kdmwork-%s", dm_device_name(md)); + elv_register_queue(md->queue); return 1; @@ -2574,6 +2593,9 @@ static void __dm_destroy(struct mapped_device *md, bool wait) set_bit(DMF_FREEING, &md->flags); spin_unlock(&_minor_lock); + if (dm_request_based(md)) + flush_kthread_worker(&md->kworker); + if (!dm_suspended_md(md)) { dm_table_presuspend_targets(map); dm_table_postsuspend_targets(map); @@ -2817,8 +2839,10 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, * Stop md->queue before flushing md->wq in case request-based * dm defers requests to md->wq from md->queue. */ - if (dm_request_based(md)) + if (dm_request_based(md)) { stop_queue(md->queue); + flush_kthread_worker(&md->kworker); + } flush_workqueue(md->wq); -- cgit v0.10.2 From 466d89a6bcd500f64896b514f78b32e8d0b0303a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 17 Oct 2014 17:46:37 -0600 Subject: dm: prepare for allocating blk-mq clone requests in target For blk-mq request-based DM the responsibility of allocating a cloned request will be transfered from DM core to the target type. To prepare for conditionally using this new model the original request's 'special' now points to the dm_rq_target_io because the clone is allocated later in the block layer rather than in DM core. Signed-off-by: Keith Busch Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f0e3407..ae12198 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1016,7 +1016,7 @@ static void end_clone_bio(struct bio *clone, int error) * the md may be freed in dm_put() at the end of this function. * Or do dm_get() before calling this function and dm_put() later. */ -static void rq_completed(struct mapped_device *md, int rw, int run_queue) +static void rq_completed(struct mapped_device *md, int rw, bool run_queue) { atomic_dec(&md->pending[rw]); @@ -1050,7 +1050,8 @@ static void free_rq_clone(struct request *clone) /* * Complete the clone and the original request. - * Must be called without queue lock. + * Must be called without clone's queue lock held, + * see end_clone_request() for more details. */ static void dm_end_request(struct request *clone, int error) { @@ -1079,7 +1080,8 @@ static void dm_end_request(struct request *clone, int error) static void dm_unprep_request(struct request *rq) { - struct request *clone = rq->special; + struct dm_rq_target_io *tio = rq->special; + struct request *clone = tio->clone; rq->special = NULL; rq->cmd_flags &= ~REQ_DONTPREP; @@ -1090,12 +1092,10 @@ static void dm_unprep_request(struct request *rq) /* * Requeue the original request of a clone. */ -static void dm_requeue_unmapped_request(struct request *clone) +static void dm_requeue_unmapped_original_request(struct mapped_device *md, + struct request *rq) { - int rw = rq_data_dir(clone); - struct dm_rq_target_io *tio = clone->end_io_data; - struct mapped_device *md = tio->md; - struct request *rq = tio->orig; + int rw = rq_data_dir(rq); struct request_queue *q = rq->q; unsigned long flags; @@ -1105,7 +1105,14 @@ static void dm_requeue_unmapped_request(struct request *clone) blk_requeue_request(q, rq); spin_unlock_irqrestore(q->queue_lock, flags); - rq_completed(md, rw, 0); + rq_completed(md, rw, false); +} + +static void dm_requeue_unmapped_request(struct request *clone) +{ + struct dm_rq_target_io *tio = clone->end_io_data; + + dm_requeue_unmapped_original_request(tio->md, tio->orig); } static void __stop_queue(struct request_queue *q) @@ -1175,8 +1182,8 @@ static void dm_done(struct request *clone, int error, bool mapped) static void dm_softirq_done(struct request *rq) { bool mapped = true; - struct request *clone = rq->completion_data; - struct dm_rq_target_io *tio = clone->end_io_data; + struct dm_rq_target_io *tio = rq->special; + struct request *clone = tio->clone; if (rq->cmd_flags & REQ_FAILED) mapped = false; @@ -1188,13 +1195,11 @@ static void dm_softirq_done(struct request *rq) * Complete the clone and the original request with the error status * through softirq context. */ -static void dm_complete_request(struct request *clone, int error) +static void dm_complete_request(struct request *rq, int error) { - struct dm_rq_target_io *tio = clone->end_io_data; - struct request *rq = tio->orig; + struct dm_rq_target_io *tio = rq->special; tio->error = error; - rq->completion_data = clone; blk_complete_request(rq); } @@ -1204,20 +1209,19 @@ static void dm_complete_request(struct request *clone, int error) * Target's rq_end_io() function isn't called. * This may be used when the target's map_rq() function fails. */ -static void dm_kill_unmapped_request(struct request *clone, int error) +static void dm_kill_unmapped_request(struct request *rq, int error) { - struct dm_rq_target_io *tio = clone->end_io_data; - struct request *rq = tio->orig; - rq->cmd_flags |= REQ_FAILED; - dm_complete_request(clone, error); + dm_complete_request(rq, error); } /* - * Called with the queue lock held + * Called with the clone's queue lock held */ static void end_clone_request(struct request *clone, int error) { + struct dm_rq_target_io *tio = clone->end_io_data; + /* * For just cleaning up the information of the queue in which * the clone was dispatched. @@ -1228,13 +1232,13 @@ static void end_clone_request(struct request *clone, int error) /* * Actual request completion is done in a softirq context which doesn't - * hold the queue lock. Otherwise, deadlock could occur because: + * hold the clone's queue lock. Otherwise, deadlock could occur because: * - another request may be submitted by the upper level driver * of the stacking during the completion * - the submission which requires queue lock may be done - * against this queue + * against this clone's queue */ - dm_complete_request(clone, error); + dm_complete_request(tio->orig, error); } /* @@ -1712,16 +1716,17 @@ static void dm_request(struct request_queue *q, struct bio *bio) _dm_request(q, bio); } -static void dm_dispatch_request(struct request *rq) +static void dm_dispatch_clone_request(struct request *clone, struct request *rq) { int r; - if (blk_queue_io_stat(rq->q)) - rq->cmd_flags |= REQ_IO_STAT; + if (blk_queue_io_stat(clone->q)) + clone->cmd_flags |= REQ_IO_STAT; - rq->start_time = jiffies; - r = blk_insert_cloned_request(rq->q, rq); + clone->start_time = jiffies; + r = blk_insert_cloned_request(clone->q, clone); if (r) + /* must complete clone in terms of original request */ dm_complete_request(rq, r); } @@ -1760,8 +1765,8 @@ static int setup_clone(struct request *clone, struct request *rq, return 0; } -static struct request *__clone_rq(struct request *rq, struct mapped_device *md, - struct dm_rq_target_io *tio, gfp_t gfp_mask) +static struct request *clone_rq(struct request *rq, struct mapped_device *md, + struct dm_rq_target_io *tio, gfp_t gfp_mask) { struct request *clone = alloc_clone_request(md, gfp_mask); @@ -1780,10 +1785,9 @@ static struct request *__clone_rq(struct request *rq, struct mapped_device *md, static void map_tio_request(struct kthread_work *work); -static struct request *clone_rq(struct request *rq, struct mapped_device *md, - gfp_t gfp_mask) +static struct dm_rq_target_io *prep_tio(struct request *rq, + struct mapped_device *md, gfp_t gfp_mask) { - struct request *clone; struct dm_rq_target_io *tio; tio = alloc_rq_tio(md, gfp_mask); @@ -1798,13 +1802,12 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md, memset(&tio->info, 0, sizeof(tio->info)); init_kthread_work(&tio->work, map_tio_request); - clone = __clone_rq(rq, md, tio, GFP_ATOMIC); - if (!clone) { + if (!clone_rq(rq, md, tio, gfp_mask)) { free_rq_tio(tio); return NULL; } - return clone; + return tio; } /* @@ -1813,18 +1816,18 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md, static int dm_prep_fn(struct request_queue *q, struct request *rq) { struct mapped_device *md = q->queuedata; - struct request *clone; + struct dm_rq_target_io *tio; if (unlikely(rq->special)) { DMWARN("Already has something in rq->special."); return BLKPREP_KILL; } - clone = clone_rq(rq, md, GFP_ATOMIC); - if (!clone) + tio = prep_tio(rq, md, GFP_ATOMIC); + if (!tio) return BLKPREP_DEFER; - rq->special = clone; + rq->special = tio; rq->cmd_flags |= REQ_DONTPREP; return BLKPREP_OK; @@ -1835,11 +1838,12 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) * 0 : the request has been processed (not requeued) * !0 : the request has been requeued */ -static int map_request(struct dm_target *ti, struct request *clone, +static int map_request(struct dm_target *ti, struct request *rq, struct mapped_device *md) { int r, requeued = 0; - struct dm_rq_target_io *tio = clone->end_io_data; + struct dm_rq_target_io *tio = rq->special; + struct request *clone = tio->clone; r = ti->type->map_rq(ti, clone, &tio->info); switch (r) { @@ -1849,8 +1853,8 @@ static int map_request(struct dm_target *ti, struct request *clone, case DM_MAPIO_REMAPPED: /* The target has remapped the I/O so dispatch it */ trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), - blk_rq_pos(tio->orig)); - dm_dispatch_request(clone); + blk_rq_pos(rq)); + dm_dispatch_clone_request(clone, rq); break; case DM_MAPIO_REQUEUE: /* The target wants to requeue the I/O */ @@ -1864,7 +1868,7 @@ static int map_request(struct dm_target *ti, struct request *clone, } /* The target wants to complete the I/O */ - dm_kill_unmapped_request(clone, r); + dm_kill_unmapped_request(rq, r); break; } @@ -1875,16 +1879,13 @@ static void map_tio_request(struct kthread_work *work) { struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work); - map_request(tio->ti, tio->clone, tio->md); + map_request(tio->ti, tio->orig, tio->md); } -static struct request *dm_start_request(struct mapped_device *md, struct request *orig) +static void dm_start_request(struct mapped_device *md, struct request *orig) { - struct request *clone; - blk_start_request(orig); - clone = orig->special; - atomic_inc(&md->pending[rq_data_dir(clone)]); + atomic_inc(&md->pending[rq_data_dir(orig)]); /* * Hold the md reference here for the in-flight I/O. @@ -1894,8 +1895,6 @@ static struct request *dm_start_request(struct mapped_device *md, struct request * See the comment in rq_completed() too. */ dm_get(md); - - return clone; } /* @@ -1908,7 +1907,7 @@ static void dm_request_fn(struct request_queue *q) int srcu_idx; struct dm_table *map = dm_get_live_table(md, &srcu_idx); struct dm_target *ti; - struct request *rq, *clone; + struct request *rq; struct dm_rq_target_io *tio; sector_t pos; @@ -1931,19 +1930,19 @@ static void dm_request_fn(struct request_queue *q) ti = dm_table_find_target(map, pos); if (!dm_target_is_valid(ti)) { /* - * Must perform setup, that dm_done() requires, + * Must perform setup, that rq_completed() requires, * before calling dm_kill_unmapped_request */ DMERR_LIMIT("request attempted access beyond the end of device"); - clone = dm_start_request(md, rq); - dm_kill_unmapped_request(clone, -EIO); + dm_start_request(md, rq); + dm_kill_unmapped_request(rq, -EIO); continue; } if (ti->type->busy && ti->type->busy(ti)) goto delay_and_out; - clone = dm_start_request(md, rq); + dm_start_request(md, rq); tio = rq->special; /* Establish tio->ti before queuing work (map_tio_request) */ @@ -2240,16 +2239,15 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) bioset_free(md->bs); md->bs = p->bs; p->bs = NULL; - } else if (dm_table_get_type(t) == DM_TYPE_REQUEST_BASED) { - /* - * There's no need to reload with request-based dm - * because the size of front_pad doesn't change. - * Note for future: If you are to reload bioset, - * prep-ed requests in the queue may refer - * to bio from the old bioset, so you must walk - * through the queue to unprep. - */ } + /* + * There's no need to reload with request-based dm + * because the size of front_pad doesn't change. + * Note for future: If you are to reload bioset, + * prep-ed requests in the queue may refer + * to bio from the old bioset, so you must walk + * through the queue to unprep. + */ goto out; } -- cgit v0.10.2 From e5863d9ad754926e7d3f38b43ac8bd48ef73b097 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 17 Dec 2014 21:08:12 -0500 Subject: dm: allocate requests in target when stacking on blk-mq devices For blk-mq request-based DM the responsibility of allocating a cloned request is transfered from DM core to the target type. Doing so enables the cloned request to be allocated from the appropriate blk-mq request_queue's pool (only the DM target, e.g. multipath, can know which block device to send a given cloned request to). Care was taken to preserve compatibility with old-style block request completion that requires request-based DM _not_ acquire the clone request's queue lock in the completion path. As such, there are now 2 different request-based DM target_type interfaces: 1) the original .map_rq() interface will continue to be used for non-blk-mq devices -- the preallocated clone request is passed in from DM core. 2) a new .clone_and_map_rq() and .release_clone_rq() will be used for blk-mq devices -- blk_get_request() and blk_put_request() are used respectively from these hooks. dm_table_set_type() was updated to detect if the request-based target is being stacked on blk-mq devices, if so DM_TYPE_MQ_REQUEST_BASED is set. DM core disallows switching the DM table's type after it is set. This means that there is no mixing of non-blk-mq and blk-mq devices within the same request-based DM table. [This patch was started by Keith and later heavily modified by Mike] Tested-by: Bart Van Assche Signed-off-by: Keith Busch Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 2552b88..863fc8c 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -11,6 +11,7 @@ #include "dm-path-selector.h" #include "dm-uevent.h" +#include #include #include #include @@ -378,12 +379,13 @@ static int __must_push_back(struct multipath *m) /* * Map cloned requests */ -static int multipath_map(struct dm_target *ti, struct request *clone, - union map_info *map_context) +static int __multipath_map(struct dm_target *ti, struct request *clone, + union map_info *map_context, + struct request *rq, struct request **__clone) { struct multipath *m = (struct multipath *) ti->private; int r = DM_MAPIO_REQUEUE; - size_t nr_bytes = blk_rq_bytes(clone); + size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq); struct pgpath *pgpath; struct block_device *bdev; struct dm_mpath_io *mpio; @@ -416,12 +418,25 @@ static int multipath_map(struct dm_target *ti, struct request *clone, bdev = pgpath->path.dev->bdev; - clone->q = bdev_get_queue(bdev); - clone->rq_disk = bdev->bd_disk; - clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; - spin_unlock_irq(&m->lock); + if (clone) { + /* Old request-based interface: allocated clone is passed in */ + clone->q = bdev_get_queue(bdev); + clone->rq_disk = bdev->bd_disk; + clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; + } else { + /* blk-mq request-based interface */ + *__clone = blk_get_request(bdev_get_queue(bdev), + rq_data_dir(rq), GFP_KERNEL); + if (IS_ERR(*__clone)) + /* ENOMEM, requeue */ + return r; + (*__clone)->bio = (*__clone)->biotail = NULL; + (*__clone)->rq_disk = bdev->bd_disk; + (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT; + } + if (pgpath->pg->ps.type->start_io) pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path, @@ -434,6 +449,24 @@ out_unlock: return r; } +static int multipath_map(struct dm_target *ti, struct request *clone, + union map_info *map_context) +{ + return __multipath_map(ti, clone, map_context, NULL, NULL); +} + +static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, + union map_info *map_context, + struct request **clone) +{ + return __multipath_map(ti, NULL, map_context, rq, clone); +} + +static void multipath_release_clone(struct request *clone) +{ + blk_put_request(clone); +} + /* * If we run out of usable paths, should we queue I/O or error it? */ @@ -1670,11 +1703,13 @@ out: *---------------------------------------------------------------*/ static struct target_type multipath_target = { .name = "multipath", - .version = {1, 7, 0}, + .version = {1, 8, 0}, .module = THIS_MODULE, .ctr = multipath_ctr, .dtr = multipath_dtr, .map_rq = multipath_map, + .clone_and_map_rq = multipath_clone_and_map, + .release_clone_rq = multipath_release_clone, .rq_end_io = multipath_end_io, .presuspend = multipath_presuspend, .postsuspend = multipath_postsuspend, diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 3afae9e..2d7e373 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -827,6 +827,7 @@ static int dm_table_set_type(struct dm_table *t) { unsigned i; unsigned bio_based = 0, request_based = 0, hybrid = 0; + bool use_blk_mq = false; struct dm_target *tgt; struct dm_dev_internal *dd; struct list_head *devices; @@ -872,11 +873,26 @@ static int dm_table_set_type(struct dm_table *t) /* Non-request-stackable devices can't be used for request-based dm */ devices = dm_table_get_devices(t); list_for_each_entry(dd, devices, list) { - if (!blk_queue_stackable(bdev_get_queue(dd->dm_dev->bdev))) { - DMWARN("table load rejected: including" - " non-request-stackable devices"); + struct request_queue *q = bdev_get_queue(dd->dm_dev->bdev); + + if (!blk_queue_stackable(q)) { + DMERR("table load rejected: including" + " non-request-stackable devices"); return -EINVAL; } + + if (q->mq_ops) + use_blk_mq = true; + } + + if (use_blk_mq) { + /* verify _all_ devices in the table are blk-mq devices */ + list_for_each_entry(dd, devices, list) + if (!bdev_get_queue(dd->dm_dev->bdev)->mq_ops) { + DMERR("table load rejected: not all devices" + " are blk-mq request-stackable"); + return -EINVAL; + } } /* @@ -890,7 +906,7 @@ static int dm_table_set_type(struct dm_table *t) return -EINVAL; } - t->type = DM_TYPE_REQUEST_BASED; + t->type = !use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED; return 0; } @@ -907,7 +923,15 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t) bool dm_table_request_based(struct dm_table *t) { - return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED; + unsigned table_type = dm_table_get_type(t); + + return (table_type == DM_TYPE_REQUEST_BASED || + table_type == DM_TYPE_MQ_REQUEST_BASED); +} + +bool dm_table_mq_request_based(struct dm_table *t) +{ + return dm_table_get_type(t) == DM_TYPE_MQ_REQUEST_BASED; } static int dm_table_alloc_md_mempools(struct dm_table *t) diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 242e3ce..925ec1b 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -137,13 +137,26 @@ static int io_err_map_rq(struct dm_target *ti, struct request *clone, return -EIO; } +static int io_err_clone_and_map_rq(struct dm_target *ti, struct request *rq, + union map_info *map_context, + struct request **clone) +{ + return -EIO; +} + +static void io_err_release_clone_rq(struct request *clone) +{ +} + static struct target_type error_target = { .name = "error", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .ctr = io_err_ctr, .dtr = io_err_dtr, .map = io_err_map, .map_rq = io_err_map_rq, + .clone_and_map_rq = io_err_clone_and_map_rq, + .release_clone_rq = io_err_release_clone_rq, }; int __init dm_target_init(void) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ae12198..549b815 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1044,7 +1044,10 @@ static void free_rq_clone(struct request *clone) struct dm_rq_target_io *tio = clone->end_io_data; blk_rq_unprep_clone(clone); - free_clone_request(tio->md, clone); + if (clone->q && clone->q->mq_ops) + tio->ti->type->release_clone_rq(clone); + else + free_clone_request(tio->md, clone); free_rq_tio(tio); } @@ -1086,7 +1089,8 @@ static void dm_unprep_request(struct request *rq) rq->special = NULL; rq->cmd_flags &= ~REQ_DONTPREP; - free_rq_clone(clone); + if (clone) + free_rq_clone(clone); } /* @@ -1185,6 +1189,13 @@ static void dm_softirq_done(struct request *rq) struct dm_rq_target_io *tio = rq->special; struct request *clone = tio->clone; + if (!clone) { + blk_end_request_all(rq, tio->error); + rq_completed(tio->md, rq_data_dir(rq), false); + free_rq_tio(tio); + return; + } + if (rq->cmd_flags & REQ_FAILED) mapped = false; @@ -1207,7 +1218,7 @@ static void dm_complete_request(struct request *rq, int error) * Complete the not-mapped clone and the original request with the error status * through softirq context. * Target's rq_end_io() function isn't called. - * This may be used when the target's map_rq() function fails. + * This may be used when the target's map_rq() or clone_and_map_rq() functions fail. */ static void dm_kill_unmapped_request(struct request *rq, int error) { @@ -1222,13 +1233,15 @@ static void end_clone_request(struct request *clone, int error) { struct dm_rq_target_io *tio = clone->end_io_data; - /* - * For just cleaning up the information of the queue in which - * the clone was dispatched. - * The clone is *NOT* freed actually here because it is alloced from - * dm own mempool and REQ_ALLOCED isn't set in clone->cmd_flags. - */ - __blk_put_request(clone->q, clone); + if (!clone->q->mq_ops) { + /* + * For just cleaning up the information of the queue in which + * the clone was dispatched. + * The clone is *NOT* freed actually here because it is alloced + * from dm own mempool (REQ_ALLOCED isn't set). + */ + __blk_put_request(clone->q, clone); + } /* * Actual request completion is done in a softirq context which doesn't @@ -1789,6 +1802,8 @@ static struct dm_rq_target_io *prep_tio(struct request *rq, struct mapped_device *md, gfp_t gfp_mask) { struct dm_rq_target_io *tio; + int srcu_idx; + struct dm_table *table; tio = alloc_rq_tio(md, gfp_mask); if (!tio) @@ -1802,10 +1817,15 @@ static struct dm_rq_target_io *prep_tio(struct request *rq, memset(&tio->info, 0, sizeof(tio->info)); init_kthread_work(&tio->work, map_tio_request); - if (!clone_rq(rq, md, tio, gfp_mask)) { - free_rq_tio(tio); - return NULL; + table = dm_get_live_table(md, &srcu_idx); + if (!dm_table_mq_request_based(table)) { + if (!clone_rq(rq, md, tio, gfp_mask)) { + dm_put_live_table(md, srcu_idx); + free_rq_tio(tio); + return NULL; + } } + dm_put_live_table(md, srcu_idx); return tio; } @@ -1835,17 +1855,36 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq) /* * Returns: - * 0 : the request has been processed (not requeued) - * !0 : the request has been requeued + * 0 : the request has been processed + * DM_MAPIO_REQUEUE : the original request needs to be requeued + * < 0 : the request was completed due to failure */ static int map_request(struct dm_target *ti, struct request *rq, struct mapped_device *md) { - int r, requeued = 0; + int r; struct dm_rq_target_io *tio = rq->special; - struct request *clone = tio->clone; + struct request *clone = NULL; + + if (tio->clone) { + clone = tio->clone; + r = ti->type->map_rq(ti, clone, &tio->info); + } else { + r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); + if (r < 0) { + /* The target wants to complete the I/O */ + dm_kill_unmapped_request(rq, r); + return r; + } + if (IS_ERR(clone)) + return DM_MAPIO_REQUEUE; + if (setup_clone(clone, rq, tio, GFP_KERNEL)) { + /* -ENOMEM */ + ti->type->release_clone_rq(clone); + return DM_MAPIO_REQUEUE; + } + } - r = ti->type->map_rq(ti, clone, &tio->info); switch (r) { case DM_MAPIO_SUBMITTED: /* The target has taken the I/O to submit by itself later */ @@ -1859,7 +1898,6 @@ static int map_request(struct dm_target *ti, struct request *rq, case DM_MAPIO_REQUEUE: /* The target wants to requeue the I/O */ dm_requeue_unmapped_request(clone); - requeued = 1; break; default: if (r > 0) { @@ -1869,17 +1907,20 @@ static int map_request(struct dm_target *ti, struct request *rq, /* The target wants to complete the I/O */ dm_kill_unmapped_request(rq, r); - break; + return r; } - return requeued; + return 0; } static void map_tio_request(struct kthread_work *work) { struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work); + struct request *rq = tio->orig; + struct mapped_device *md = tio->md; - map_request(tio->ti, tio->orig, tio->md); + if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE) + dm_requeue_unmapped_original_request(md, rq); } static void dm_start_request(struct mapped_device *md, struct request *orig) @@ -2459,6 +2500,14 @@ unsigned dm_get_md_type(struct mapped_device *md) return md->type; } +static bool dm_md_type_request_based(struct mapped_device *md) +{ + unsigned table_type = dm_get_md_type(md); + + return (table_type == DM_TYPE_REQUEST_BASED || + table_type == DM_TYPE_MQ_REQUEST_BASED); +} + struct target_type *dm_get_immutable_target_type(struct mapped_device *md) { return md->immutable_target_type; @@ -2511,8 +2560,7 @@ static int dm_init_request_based_queue(struct mapped_device *md) */ int dm_setup_md_queue(struct mapped_device *md) { - if ((dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) && - !dm_init_request_based_queue(md)) { + if (dm_md_type_request_based(md) && !dm_init_request_based_queue(md)) { DMWARN("Cannot initialize queue for request-based mapped device"); return -EINVAL; } @@ -3184,27 +3232,35 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u { struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); struct kmem_cache *cachep; - unsigned int pool_size; + unsigned int pool_size = 0; unsigned int front_pad; if (!pools) return NULL; - if (type == DM_TYPE_BIO_BASED) { + switch (type) { + case DM_TYPE_BIO_BASED: cachep = _io_cache; pool_size = dm_get_reserved_bio_based_ios(); front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone); - } else if (type == DM_TYPE_REQUEST_BASED) { - cachep = _rq_tio_cache; + break; + case DM_TYPE_REQUEST_BASED: pool_size = dm_get_reserved_rq_based_ios(); pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache); if (!pools->rq_pool) goto out; + /* fall through to setup remaining rq-based pools */ + case DM_TYPE_MQ_REQUEST_BASED: + cachep = _rq_tio_cache; + if (!pool_size) + pool_size = dm_get_reserved_rq_based_ios(); front_pad = offsetof(struct dm_rq_clone_bio_info, clone); /* per_bio_data_size is not used. See __bind_mempools(). */ WARN_ON(per_bio_data_size != 0); - } else + break; + default: goto out; + } pools->io_pool = mempool_create_slab_pool(pool_size, cachep); if (!pools->io_pool) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 84b0f9e4..84d7978 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -34,9 +34,10 @@ /* * Type of table and mapped_device's mempool */ -#define DM_TYPE_NONE 0 -#define DM_TYPE_BIO_BASED 1 -#define DM_TYPE_REQUEST_BASED 2 +#define DM_TYPE_NONE 0 +#define DM_TYPE_BIO_BASED 1 +#define DM_TYPE_REQUEST_BASED 2 +#define DM_TYPE_MQ_REQUEST_BASED 3 /* * List of devices that a metadevice uses and should open/close. @@ -73,6 +74,7 @@ int dm_table_any_busy_target(struct dm_table *t); unsigned dm_table_get_type(struct dm_table *t); struct target_type *dm_table_get_immutable_target_type(struct dm_table *t); bool dm_table_request_based(struct dm_table *t); +bool dm_table_mq_request_based(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 19296fb..2646aed 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -48,6 +48,11 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti); typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio); typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, union map_info *map_context); +typedef int (*dm_clone_and_map_request_fn) (struct dm_target *ti, + struct request *rq, + union map_info *map_context, + struct request **clone); +typedef void (*dm_release_clone_request_fn) (struct request *clone); /* * Returns: @@ -143,6 +148,8 @@ struct target_type { dm_dtr_fn dtr; dm_map_fn map; dm_map_request_fn map_rq; + dm_clone_and_map_request_fn clone_and_map_rq; + dm_release_clone_request_fn release_clone_rq; dm_endio_fn end_io; dm_request_endio_fn rq_end_io; dm_presuspend_fn presuspend; diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index a570d7b..889f3a5 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 29 +#define DM_VERSION_MINOR 30 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2014-10-28)" +#define DM_VERSION_EXTRA "-ioctl (2014-12-22)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v0.10.2 From 65803c2059832fb99b992728157f7924c2e42d4b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 18 Dec 2014 16:26:47 -0500 Subject: dm table: train hybrid target type detection to select blk-mq if appropriate Otherwise replacing the multipath target with the error target fails: device-mapper: ioctl: can't change device type after initial table load. The error target was mistakenly considered to be target type DM_TYPE_REQUEST_BASED rather than DM_TYPE_MQ_REQUEST_BASED even if the target it was to replace was of type DM_TYPE_MQ_REQUEST_BASED. Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 2d7e373..14954d8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -831,7 +831,7 @@ static int dm_table_set_type(struct dm_table *t) struct dm_target *tgt; struct dm_dev_internal *dd; struct list_head *devices; - unsigned live_md_type; + unsigned live_md_type = dm_get_md_type(t->md); for (i = 0; i < t->num_targets; i++) { tgt = t->targets + i; @@ -855,8 +855,8 @@ static int dm_table_set_type(struct dm_table *t) * Determine the type from the live device. * Default to bio-based if device is new. */ - live_md_type = dm_get_md_type(t->md); - if (live_md_type == DM_TYPE_REQUEST_BASED) + if (live_md_type == DM_TYPE_REQUEST_BASED || + live_md_type == DM_TYPE_MQ_REQUEST_BASED) request_based = 1; else bio_based = 1; @@ -870,6 +870,17 @@ static int dm_table_set_type(struct dm_table *t) BUG_ON(!request_based); /* No targets in this table */ + /* + * Request-based dm supports only tables that have a single target now. + * To support multiple targets, request splitting support is needed, + * and that needs lots of changes in the block-layer. + * (e.g. request completion process for partial completion.) + */ + if (t->num_targets > 1) { + DMWARN("Request-based dm doesn't support multiple targets yet"); + return -EINVAL; + } + /* Non-request-stackable devices can't be used for request-based dm */ devices = dm_table_get_devices(t); list_for_each_entry(dd, devices, list) { @@ -893,20 +904,14 @@ static int dm_table_set_type(struct dm_table *t) " are blk-mq request-stackable"); return -EINVAL; } - } + t->type = DM_TYPE_MQ_REQUEST_BASED; - /* - * Request-based dm supports only tables that have a single target now. - * To support multiple targets, request splitting support is needed, - * and that needs lots of changes in the block-layer. - * (e.g. request completion process for partial completion.) - */ - if (t->num_targets > 1) { - DMWARN("Request-based dm doesn't support multiple targets yet"); - return -EINVAL; - } + } else if (hybrid && list_empty(devices) && live_md_type != DM_TYPE_NONE) { + /* inherit live MD type */ + t->type = live_md_type; - t->type = !use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED; + } else + t->type = DM_TYPE_REQUEST_BASED; return 0; } diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 84d7978..59f53e7 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -101,7 +101,8 @@ int dm_setup_md_queue(struct mapped_device *md); /* * To check whether the target type is request-based or not (bio-based). */ -#define dm_target_request_based(t) ((t)->type->map_rq != NULL) +#define dm_target_request_based(t) (((t)->type->map_rq != NULL) || \ + ((t)->type->clone_and_map_rq != NULL)) /* * To check whether the target type is a hybrid (capable of being -- cgit v0.10.2 From 3ca5a21a9c02bdebe2d95268482031f002efcf23 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 29 May 2014 11:23:23 +0300 Subject: dm raid: fix a couple integer overflows My static checker complains that if "num_raid_params" is UINT_MAX then the "if (num_raid_params + 1 > argc) {" check doesn't work as intended. The other change is that I moved the "if (argc != (num_raid_devs * 2))" condition forward a few lines so it was before the call to context_alloc(). If we had an integer overflow inside that function then it would lead to an immediate crash. Signed-off-by: Dan Carpenter Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 07c0fa0..41acc9d 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1243,7 +1243,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) argv++; /* Skip over RAID params for now and find out # of devices */ - if (num_raid_params + 1 > argc) { + if (num_raid_params >= argc) { ti->error = "Arguments do not agree with counts given"; return -EINVAL; } @@ -1254,6 +1254,12 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) return -EINVAL; } + argc -= num_raid_params + 1; /* +1: we already have num_raid_devs */ + if (argc != (num_raid_devs * 2)) { + ti->error = "Supplied RAID devices does not match the count given"; + return -EINVAL; + } + rs = context_alloc(ti, rt, (unsigned)num_raid_devs); if (IS_ERR(rs)) return PTR_ERR(rs); @@ -1262,16 +1268,8 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) if (ret) goto bad; - ret = -EINVAL; - - argc -= num_raid_params + 1; /* +1: we already have num_raid_devs */ argv += num_raid_params + 1; - if (argc != (num_raid_devs * 2)) { - ti->error = "Supplied RAID devices does not match the count given"; - goto bad; - } - ret = dev_parms(rs, argv); if (ret) goto bad; -- cgit v0.10.2 From 0f30af98cbb111cebd99f09cb7b8cc8c9351c0b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20Sch=C3=B6lling?= Date: Thu, 22 May 2014 22:42:37 +0200 Subject: dm: use time_in_range() and time_after() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be future-proof and for better readability the time comparisons are modified to use time_in_range() and time_after() instead of plain, error-prone math. Signed-off-by: Manuel Schölling Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 1e96d78..2eca128 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -1547,8 +1548,8 @@ static void process_bio(struct cache *cache, struct prealloc *structs, static int need_commit_due_to_time(struct cache *cache) { - return jiffies < cache->last_commit_jiffies || - jiffies > cache->last_commit_jiffies + COMMIT_PERIOD; + return !time_in_range(jiffies, cache->last_commit_jiffies, + cache->last_commit_jiffies + COMMIT_PERIOD); } static int commit_if_needed(struct cache *cache) diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index b953db6..03177ca 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -829,7 +830,7 @@ static int userspace_is_remote_recovering(struct dm_dirty_log *log, int r; uint64_t region64 = region; struct log_c *lc = log->context; - static unsigned long long limit; + static unsigned long limit; struct { int64_t is_recovering; uint64_t in_sync_hint; @@ -845,7 +846,7 @@ static int userspace_is_remote_recovering(struct dm_dirty_log *log, */ if (region < lc->in_sync_hint) return 0; - else if (jiffies < limit) + else if (time_after(limit, jiffies)) return 1; limit = jiffies + (HZ / 4); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 4934789..0f78145 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -1700,8 +1701,8 @@ static void process_cell_fail(struct thin_c *tc, struct dm_bio_prison_cell *cell */ static int need_commit_due_to_time(struct pool *pool) { - return jiffies < pool->last_commit_jiffies || - jiffies > pool->last_commit_jiffies + COMMIT_PERIOD; + return !time_in_range(jiffies, pool->last_commit_jiffies, + pool->last_commit_jiffies + COMMIT_PERIOD); } #define thin_pbd(node) rb_entry((node), struct dm_thin_endio_hook, rb_node) -- cgit v0.10.2 From f495339c441b9a69c43327f71c23ffa7632e3020 Mon Sep 17 00:00:00 2001 From: Asaf Vertz Date: Tue, 6 Jan 2015 15:44:15 +0200 Subject: dm bufio: fix time comparison to use time_after_eq() To be future-proof and for better readability the time comparison is modified to use time_after_eq() instead of plain, error-prone math. Signed-off-by: Asaf Vertz Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index c33b497..86dbbc7 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -1739,7 +1740,7 @@ static unsigned get_max_age_hz(void) static bool older_than(struct dm_buffer *b, unsigned long age_hz) { - return (jiffies - b->last_accessed) >= age_hz; + return time_after_eq(jiffies, b->last_accessed + age_hz); } static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) -- cgit v0.10.2 From cf35248768b08851a26c16059ebca49dc4ad314f Mon Sep 17 00:00:00 2001 From: Loic Pefferkorn Date: Mon, 15 Dec 2014 22:18:43 +0100 Subject: dm crypt: update url in CONFIG_DM_CRYPT help text Update the obsolete url in the CONFIG_DM_CRYPT help text. Signed-off-by: Loic Pefferkorn Signed-off-by: Mike Snitzer diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 5bdedf6..09c89a4b 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -230,9 +230,8 @@ config DM_CRYPT transparently encrypts the data on it. You'll need to activate the ciphers you're going to use in the cryptoapi configuration. - Information on how to use dm-crypt can be found on - - + For further information on dm-crypt and userspace tools see: + To compile this code as a module, choose M here: the module will be called dm-crypt. -- cgit v0.10.2 From 88e2f901e78232ea42b4e462cf7a9b14d61fb79a Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Wed, 24 Dec 2014 14:48:12 +0800 Subject: dm ioctl: fix stale comment above dm_get_inactive_table() dm_table_put() was replaced by dm_put_live_table(). Signed-off-by: Junxiao Bi Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 73f791b..c8a18e4 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -639,8 +639,8 @@ static int check_name(const char *name) /* * On successful return, the caller must not attempt to acquire - * _hash_lock without first calling dm_table_put, because dm_table_destroy - * waits for this dm_table_put and could be called under this lock. + * _hash_lock without first calling dm_put_live_table, because dm_table_destroy + * waits for this dm_put_live_table and could be called under this lock. */ static struct dm_table *dm_get_inactive_table(struct mapped_device *md, int *srcu_idx) { -- cgit v0.10.2 From 9cb1397d585c28c2acff06a6002fc2c8db19a80d Mon Sep 17 00:00:00 2001 From: Rickard Strandqvist Date: Thu, 1 Jan 2015 18:52:57 +0100 Subject: dm thin metadata: remove unused dm_pool_get_data_block_size() The thin-pool target doesn't display the data block size as part of its table status, unlike the dm-cache target, so there is no need for dm_pool_get_data_block_size(). This was found using cppcheck. Signed-off-by: Rickard Strandqvist Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 43adbb8..79f6941 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1635,15 +1635,6 @@ int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, return r; } -int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result) -{ - down_read(&pmd->root_lock); - *result = pmd->data_block_size; - up_read(&pmd->root_lock); - - return 0; -} - int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) { int r = -EINVAL; diff --git a/drivers/md/dm-thin-metadata.h b/drivers/md/dm-thin-metadata.h index 921d15e..fac01a9 100644 --- a/drivers/md/dm-thin-metadata.h +++ b/drivers/md/dm-thin-metadata.h @@ -182,8 +182,6 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); -int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result); - int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result); int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result); -- cgit v0.10.2 From ff658e9c1aae9a84dd06d46f847dc0cd2bf0dd11 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Sun, 11 Jan 2015 12:45:23 +0100 Subject: dm mpath: simplify failure path of dm_multipath_init() Currently the cleanup of all error cases are open-coded. Introduce a common exit path and labels. Signed-off-by: Johannes Thumshirn Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 863fc8c..d376dc8 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1733,16 +1733,15 @@ static int __init dm_multipath_init(void) r = dm_register_target(&multipath_target); if (r < 0) { DMERR("register failed %d", r); - kmem_cache_destroy(_mpio_cache); - return -EINVAL; + r = -EINVAL; + goto bad_register_target; } kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); if (!kmultipathd) { DMERR("failed to create workqueue kmpathd"); - dm_unregister_target(&multipath_target); - kmem_cache_destroy(_mpio_cache); - return -ENOMEM; + r = -ENOMEM; + goto bad_alloc_kmultipathd; } /* @@ -1755,16 +1754,23 @@ static int __init dm_multipath_init(void) WQ_MEM_RECLAIM); if (!kmpath_handlerd) { DMERR("failed to create workqueue kmpath_handlerd"); - destroy_workqueue(kmultipathd); - dm_unregister_target(&multipath_target); - kmem_cache_destroy(_mpio_cache); - return -ENOMEM; + r = -ENOMEM; + goto bad_alloc_kmpath_handlerd; } DMINFO("version %u.%u.%u loaded", multipath_target.version[0], multipath_target.version[1], multipath_target.version[2]); + return 0; + +bad_alloc_kmpath_handlerd: + destroy_workqueue(kmultipathd); +bad_alloc_kmultipathd: + dm_unregister_target(&multipath_target); +bad_register_target: + kmem_cache_destroy(_mpio_cache); + return r; } -- cgit v0.10.2 From 0c8f86322f4debca6dc899603e56397a6ae7c2dc Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 2 Feb 2015 14:38:29 +0100 Subject: dm snapshot: remove unnecessary NULL checks before vfree() calls The vfree() function performs input parameter validation. Thus the NULL pointer test around vfree() calls is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index d6e8817..808b841 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -200,16 +200,11 @@ err_area: static void free_area(struct pstore *ps) { - if (ps->area) - vfree(ps->area); + vfree(ps->area); ps->area = NULL; - - if (ps->zero_area) - vfree(ps->zero_area); + vfree(ps->zero_area); ps->zero_area = NULL; - - if (ps->header_area) - vfree(ps->header_area); + vfree(ps->header_area); ps->header_area = NULL; } @@ -605,8 +600,7 @@ static void persistent_dtr(struct dm_exception_store *store) free_area(ps); /* Allocated in persistent_read_metadata */ - if (ps->callbacks) - vfree(ps->callbacks); + vfree(ps->callbacks); kfree(ps); } -- cgit v0.10.2 From a4afe76b2b922e6197944d7be0be7a18b53175ae Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Sat, 24 Jan 2015 15:10:49 -0700 Subject: dm: inherit QUEUE_FLAG_SG_GAPS flags from underlying queues A DM device must inherit the QUEUE_FLAG_SG_GAPS flags from its underlying block devices' request queues. This fixes problems when submitting cloned requests to multipathed devices requiring virtually contiguous buffers. Signed-off-by: Keith Busch Signed-off-by: Mike Snitzer diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 14954d8..6554d91 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1389,6 +1389,14 @@ static int queue_supports_sg_merge(struct dm_target *ti, struct dm_dev *dev, return q && !test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags); } +static int queue_supports_sg_gaps(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !test_bit(QUEUE_FLAG_SG_GAPS, &q->queue_flags); +} + static bool dm_table_all_devices_attribute(struct dm_table *t, iterate_devices_callout_fn func) { @@ -1509,6 +1517,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_set_unlocked(QUEUE_FLAG_NO_SG_MERGE, q); + if (dm_table_all_devices_attribute(t, queue_supports_sg_gaps)) + queue_flag_clear_unlocked(QUEUE_FLAG_SG_GAPS, q); + else + queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, q); + dm_table_set_integrity(t); /* -- cgit v0.10.2