diff options
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r-- | drivers/md/dm.c | 273 |
1 files changed, 200 insertions, 73 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 58f3927..8f37ed2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -19,6 +19,7 @@ #include <linux/idr.h> #include <linux/hdreg.h> #include <linux/delay.h> +#include <linux/wait.h> #include <trace/events/block.h> @@ -117,6 +118,7 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo); #define DMF_NOFLUSH_SUSPENDING 5 #define DMF_MERGE_IS_OPTIONAL 6 #define DMF_DEFERRED_REMOVE 7 +#define DMF_SUSPENDED_INTERNALLY 8 /* * A dummy definition to make RCU happy. @@ -140,7 +142,7 @@ struct mapped_device { * Use dm_get_live_table{_fast} or take suspend_lock for * dereference. */ - struct dm_table *map; + struct dm_table __rcu *map; struct list_head table_devices; struct mutex table_devices_lock; @@ -525,14 +527,15 @@ retry: goto out; tgt = dm_table_get_target(map, 0); + if (!tgt->type->ioctl) + goto out; if (dm_suspended_md(md)) { r = -EAGAIN; goto out; } - if (tgt->type->ioctl) - r = tgt->type->ioctl(tgt, cmd, arg); + r = tgt->type->ioctl(tgt, cmd, arg); out: dm_put_live_table(md, srcu_idx); @@ -1607,9 +1610,9 @@ static int dm_merge_bvec(struct request_queue *q, * Find maximum amount of I/O that won't need splitting */ max_sectors = min(max_io_len(bvm->bi_sector, ti), - (sector_t) BIO_MAX_SECTORS); + (sector_t) queue_max_sectors(q)); max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size; - if (max_size < 0) + if (unlikely(max_size < 0)) /* this shouldn't _ever_ happen */ max_size = 0; /* @@ -1621,10 +1624,10 @@ static int dm_merge_bvec(struct request_queue *q, max_size = ti->type->merge(ti, bvm, biovec, max_size); /* * If the target doesn't support merge method and some of the devices - * provided their merge_bvec method (we know this by looking at - * queue_max_hw_sectors), then we can't allow bios with multiple vector - * entries. So always set max_size to 0, and the code below allows - * just one page. + * provided their merge_bvec method (we know this by looking for the + * max_hw_sectors that dm_set_device_limits may set), then we can't + * allow bios with multiple vector entries. So always set max_size + * to 0, and the code below allows just one page. */ else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) max_size = 0; @@ -2332,7 +2335,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, merge_is_optional = dm_table_merge_is_optional(t); - old_map = md->map; + old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); rcu_assign_pointer(md->map, t); md->immutable_target_type = dm_table_get_immutable_target_type(t); @@ -2341,7 +2344,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); else clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags); - dm_sync_table(md); + if (old_map) + dm_sync_table(md); return old_map; } @@ -2351,7 +2355,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, */ static struct dm_table *__unbind(struct mapped_device *md) { - struct dm_table *map = md->map; + struct dm_table *map = rcu_dereference_protected(md->map, 1); if (!map) return NULL; @@ -2716,36 +2720,18 @@ static void unlock_fs(struct mapped_device *md) } /* - * We need to be able to change a mapping table under a mounted - * filesystem. For example we might want to move some data in - * the background. Before the table can be swapped with - * dm_bind_table, dm_suspend must be called to flush any in - * flight bios and ensure that any further io gets deferred. - */ -/* - * Suspend mechanism in request-based dm. + * If __dm_suspend returns 0, the device is completely quiescent + * now. There is no request-processing activity. All new requests + * are being added to md->deferred list. * - * 1. Flush all I/Os by lock_fs() if needed. - * 2. Stop dispatching any I/O by stopping the request_queue. - * 3. Wait for all in-flight I/Os to be completed or requeued. - * - * To abort suspend, start the request_queue. + * Caller must hold md->suspend_lock */ -int dm_suspend(struct mapped_device *md, unsigned suspend_flags) +static int __dm_suspend(struct mapped_device *md, struct dm_table *map, + unsigned suspend_flags, int interruptible) { - struct dm_table *map = NULL; - int r = 0; - int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; - int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; - - mutex_lock(&md->suspend_lock); - - if (dm_suspended_md(md)) { - r = -EINVAL; - goto out_unlock; - } - - map = md->map; + bool do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG; + bool noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG; + int r; /* * DMF_NOFLUSH_SUSPENDING must be set before presuspend. @@ -2754,7 +2740,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) if (noflush) set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); - /* This does not get reverted if there's an error later. */ + /* + * This gets reverted if there's an error later and the targets + * provide the .presuspend_undo hook. + */ dm_table_presuspend_targets(map); /* @@ -2765,8 +2754,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) */ if (!noflush && do_lockfs) { r = lock_fs(md); - if (r) - goto out_unlock; + if (r) { + dm_table_presuspend_undo_targets(map); + return r; + } } /* @@ -2782,7 +2773,8 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) * flush_workqueue(md->wq). */ set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); - synchronize_srcu(&md->io_barrier); + if (map) + synchronize_srcu(&md->io_barrier); /* * Stop md->queue before flushing md->wq in case request-based @@ -2798,11 +2790,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) * We call dm_wait_for_completion to wait for all existing requests * to finish. */ - r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); + r = dm_wait_for_completion(md, interruptible); if (noflush) clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); - synchronize_srcu(&md->io_barrier); + if (map) + synchronize_srcu(&md->io_barrier); /* were we interrupted ? */ if (r < 0) { @@ -2812,14 +2805,56 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) start_queue(md->queue); unlock_fs(md); - goto out_unlock; /* pushback list is already flushed, so skip flush */ + dm_table_presuspend_undo_targets(map); + /* pushback list is already flushed, so skip flush */ } - /* - * If dm_wait_for_completion returned 0, the device is completely - * quiescent now. There is no request-processing activity. All new - * requests are being added to md->deferred list. - */ + return r; +} + +/* + * We need to be able to change a mapping table under a mounted + * filesystem. For example we might want to move some data in + * the background. Before the table can be swapped with + * dm_bind_table, dm_suspend must be called to flush any in + * flight bios and ensure that any further io gets deferred. + */ +/* + * Suspend mechanism in request-based dm. + * + * 1. Flush all I/Os by lock_fs() if needed. + * 2. Stop dispatching any I/O by stopping the request_queue. + * 3. Wait for all in-flight I/Os to be completed or requeued. + * + * To abort suspend, start the request_queue. + */ +int dm_suspend(struct mapped_device *md, unsigned suspend_flags) +{ + struct dm_table *map = NULL; + int r = 0; + +retry: + mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); + + if (dm_suspended_md(md)) { + r = -EINVAL; + goto out_unlock; + } + + if (dm_suspended_internally_md(md)) { + /* already internally suspended, wait for internal resume */ + mutex_unlock(&md->suspend_lock); + r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); + if (r) + return r; + goto retry; + } + + map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); + + r = __dm_suspend(md, map, suspend_flags, TASK_INTERRUPTIBLE); + if (r) + goto out_unlock; set_bit(DMF_SUSPENDED, &md->flags); @@ -2830,22 +2865,13 @@ out_unlock: return r; } -int dm_resume(struct mapped_device *md) +static int __dm_resume(struct mapped_device *md, struct dm_table *map) { - int r = -EINVAL; - struct dm_table *map = NULL; - - mutex_lock(&md->suspend_lock); - if (!dm_suspended_md(md)) - goto out; - - map = md->map; - if (!map || !dm_table_get_size(map)) - goto out; - - r = dm_table_resume_targets(map); - if (r) - goto out; + if (map) { + int r = dm_table_resume_targets(map); + if (r) + return r; + } dm_queue_flush(md); @@ -2859,6 +2885,37 @@ int dm_resume(struct mapped_device *md) unlock_fs(md); + return 0; +} + +int dm_resume(struct mapped_device *md) +{ + int r = -EINVAL; + struct dm_table *map = NULL; + +retry: + mutex_lock_nested(&md->suspend_lock, SINGLE_DEPTH_NESTING); + + if (!dm_suspended_md(md)) + goto out; + + if (dm_suspended_internally_md(md)) { + /* already internally suspended, wait for internal resume */ + mutex_unlock(&md->suspend_lock); + r = wait_on_bit(&md->flags, DMF_SUSPENDED_INTERNALLY, TASK_INTERRUPTIBLE); + if (r) + return r; + goto retry; + } + + map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); + if (!map || !dm_table_get_size(map)) + goto out; + + r = __dm_resume(md, map); + if (r) + goto out; + clear_bit(DMF_SUSPENDED, &md->flags); r = 0; @@ -2872,15 +2929,80 @@ out: * Internal suspend/resume works like userspace-driven suspend. It waits * until all bios finish and prevents issuing new bios to the target drivers. * It may be used only from the kernel. - * - * Internal suspend holds md->suspend_lock, which prevents interaction with - * userspace-driven suspend. */ -void dm_internal_suspend(struct mapped_device *md) +static void __dm_internal_suspend(struct mapped_device *md, unsigned suspend_flags) { - mutex_lock(&md->suspend_lock); + struct dm_table *map = NULL; + + if (dm_suspended_internally_md(md)) + return; /* nested internal suspend */ + + if (dm_suspended_md(md)) { + set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); + return; /* nest suspend */ + } + + map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); + + /* + * Using TASK_UNINTERRUPTIBLE because only NOFLUSH internal suspend is + * supported. Properly supporting a TASK_INTERRUPTIBLE internal suspend + * would require changing .presuspend to return an error -- avoid this + * until there is a need for more elaborate variants of internal suspend. + */ + (void) __dm_suspend(md, map, suspend_flags, TASK_UNINTERRUPTIBLE); + + set_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); + + dm_table_postsuspend_targets(map); +} + +static void __dm_internal_resume(struct mapped_device *md) +{ + if (!dm_suspended_internally_md(md)) + return; /* resume from nested internal suspend */ + if (dm_suspended_md(md)) + goto done; /* resume from nested suspend */ + + /* + * NOTE: existing callers don't need to call dm_table_resume_targets + * (which may fail -- so best to avoid it for now by passing NULL map) + */ + (void) __dm_resume(md, NULL); + +done: + clear_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); + smp_mb__after_atomic(); + wake_up_bit(&md->flags, DMF_SUSPENDED_INTERNALLY); +} + +void dm_internal_suspend_noflush(struct mapped_device *md) +{ + mutex_lock(&md->suspend_lock); + __dm_internal_suspend(md, DM_SUSPEND_NOFLUSH_FLAG); + mutex_unlock(&md->suspend_lock); +} +EXPORT_SYMBOL_GPL(dm_internal_suspend_noflush); + +void dm_internal_resume(struct mapped_device *md) +{ + mutex_lock(&md->suspend_lock); + __dm_internal_resume(md); + mutex_unlock(&md->suspend_lock); +} +EXPORT_SYMBOL_GPL(dm_internal_resume); + +/* + * Fast variants of internal suspend/resume hold md->suspend_lock, + * which prevents interaction with userspace-driven suspend. + */ + +void dm_internal_suspend_fast(struct mapped_device *md) +{ + mutex_lock(&md->suspend_lock); + if (dm_suspended_md(md) || dm_suspended_internally_md(md)) return; set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); @@ -2889,9 +3011,9 @@ void dm_internal_suspend(struct mapped_device *md) dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); } -void dm_internal_resume(struct mapped_device *md) +void dm_internal_resume_fast(struct mapped_device *md) { - if (dm_suspended_md(md)) + if (dm_suspended_md(md) || dm_suspended_internally_md(md)) goto done; dm_queue_flush(md); @@ -2977,6 +3099,11 @@ int dm_suspended_md(struct mapped_device *md) return test_bit(DMF_SUSPENDED, &md->flags); } +int dm_suspended_internally_md(struct mapped_device *md) +{ + return test_bit(DMF_SUSPENDED_INTERNALLY, &md->flags); +} + int dm_test_deferred_remove_flag(struct mapped_device *md) { return test_bit(DMF_DEFERRED_REMOVE, &md->flags); |