From 83c0d5d5388a8d45f7a45e0ec34adc52a78c81ad Mon Sep 17 00:00:00 2001 From: "Moger, Babu" Date: Sat, 6 Mar 2010 02:29:45 +0000 Subject: dm mpath: pass struct pgpath to pg init done This patch removes some unnecessary argument casting. There is no functional change with this patch. Passes 'struct pgpath' through to pg_init_done() instead of the enclosed 'struct dm_path'. Tested the changes with LSI storage.. CC: Chandra Seetharaman Signed-off-by: Babu Moger Acked-by: Kiyoshi Ueda Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index e81345a..2c6bf74 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1128,8 +1128,7 @@ static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) static void pg_init_done(void *data, int errors) { - struct dm_path *path = data; - struct pgpath *pgpath = path_to_pgpath(path); + struct pgpath *pgpath = data; struct priority_group *pg = pgpath->pg; struct multipath *m = pg->m; unsigned long flags; @@ -1198,7 +1197,7 @@ static void activate_path(struct work_struct *work) container_of(work, struct pgpath, activate_path); scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), - pg_init_done, &pgpath->path); + pg_init_done, pgpath); } /* -- cgit v0.10.2 From f7b934c8127deebf4eb56fbe4a4ae0da16b6dbcd Mon Sep 17 00:00:00 2001 From: "Moger, Babu" Date: Sat, 6 Mar 2010 02:29:49 +0000 Subject: dm mpath: skip activate_path for failed paths This patch adds two minor fixes while processing device mapper path activation. Skip failed paths while calling activate_path. If the path is already failed then activate_path will fail for sure. We don't have to call in that case. In some case this might cause prolonged retries unnecessarily. Change the misleading message if the path being activated fails with SCSI_DH_NOSYS. Signed-off-by: Babu Moger Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 2c6bf74..ecada41 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -461,6 +461,9 @@ static void process_queued_ios(struct work_struct *work) m->pg_init_count++; m->pg_init_required = 0; list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) { + /* Skip failed paths */ + if (!tmp->is_active) + continue; if (queue_work(kmpath_handlerd, &tmp->activate_path)) m->pg_init_in_progress++; } @@ -1142,8 +1145,8 @@ static void pg_init_done(void *data, int errors) errors = 0; break; } - DMERR("Cannot failover device because scsi_dh_%s was not " - "loaded.", m->hw_handler_name); + DMERR("Could not failover the device: Handler scsi_dh_%s " + "Error %d.", m->hw_handler_name, errors); /* * Fail path for now, so we do not ping pong */ -- cgit v0.10.2 From ecdb2e257abc33ae6798d3ccba87bdafa40ef6b6 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Sat, 6 Mar 2010 02:29:52 +0000 Subject: dm table: remove dm_get from dm_table_get_md Remove the dm_get() in dm_table_get_md() because dm_table_get_md() could be called from presuspend/postsuspend, which are called while mapped_device is in DMF_FREEING state, where dm_get() is not allowed. Justification for that is the lifetime of both objects: As far as the current dm design/implementation, mapped_device is never freed while targets are doing something, because dm core waits for targets to become quiet in dm_put() using presuspend/postsuspend. So targets should be able to touch mapped_device without holding reference count of the mapped_device, and we should allow targets to touch mapped_device even if it is in DMF_FREEING state. Backgrounds: I'm trying to remove the multipath internal queue, since dm core now has a generic queue for request-based dm. In the patch-set, the multipath target wants to request dm core to start/stop queue. One of such start/stop requests can happen during postsuspend() while the target waits for pg-init to complete, because the target stops queue when starting pg-init and tries to restart it when completing pg-init. Since queue belongs to mapped_device, it involves calling dm_table_get_md() and dm_put(). On the other hand, postsuspend() is called in dm_put() for mapped_device which is in DMF_FREEING state, and that triggers BUG_ON(DMF_FREEING) in the 2nd dm_put(). I had tried to solve this problem by changing only multipath not to touch mapped_device which is in DMF_FREEING state, but I couldn't and I came up with a question why we need dm_get() in dm_table_get_md(). Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 4b22feb..7d70cca 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1231,8 +1231,6 @@ void dm_table_unplug_all(struct dm_table *t) struct mapped_device *dm_table_get_md(struct dm_table *t) { - dm_get(t->md); - return t->md; } diff --git a/drivers/md/dm-uevent.c b/drivers/md/dm-uevent.c index c7c555a..6b1e3b6 100644 --- a/drivers/md/dm-uevent.c +++ b/drivers/md/dm-uevent.c @@ -187,7 +187,7 @@ void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti, if (event_type >= ARRAY_SIZE(_dm_uevent_type_names)) { DMERR("%s: Invalid event_type %d", __func__, event_type); - goto out; + return; } event = dm_build_path_uevent(md, ti, @@ -195,12 +195,9 @@ void dm_path_uevent(enum dm_uevent_type event_type, struct dm_target *ti, _dm_uevent_type_names[event_type].name, path, nr_valid_paths); if (IS_ERR(event)) - goto out; + return; dm_uevent_add(md, &event->elist); - -out: - dm_put(md); } EXPORT_SYMBOL_GPL(dm_path_uevent); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index aa4e2aa..21222f5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2699,23 +2699,13 @@ int dm_suspended_md(struct mapped_device *md) int dm_suspended(struct dm_target *ti) { - struct mapped_device *md = dm_table_get_md(ti->table); - int r = dm_suspended_md(md); - - dm_put(md); - - return r; + return dm_suspended_md(dm_table_get_md(ti->table)); } EXPORT_SYMBOL_GPL(dm_suspended); int dm_noflush_suspending(struct dm_target *ti) { - struct mapped_device *md = dm_table_get_md(ti->table); - int r = __noflush_suspending(md); - - dm_put(md); - - return r; + return __noflush_suspending(dm_table_get_md(ti->table)); } EXPORT_SYMBOL_GPL(dm_noflush_suspending); -- cgit v0.10.2 From c53a381efbe3d0e0629121b3f0d2b62a0e167791 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 6 Mar 2010 02:29:56 +0000 Subject: dm: document when snapshot has finished merging Update Documentation/device-mapper/snapshot.txt to cover "How to determine when a snapshot has finished merging". Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/Documentation/device-mapper/snapshot.txt b/Documentation/device-mapper/snapshot.txt index e3a77b2..0d5bc46 100644 --- a/Documentation/device-mapper/snapshot.txt +++ b/Documentation/device-mapper/snapshot.txt @@ -122,3 +122,47 @@ volumeGroup-base: 0 2097152 snapshot-merge 254:11 254:12 P 16 brw------- 1 root root 254, 11 29 ago 18:15 /dev/mapper/volumeGroup-base-real brw------- 1 root root 254, 12 29 ago 18:16 /dev/mapper/volumeGroup-base-cow brw------- 1 root root 254, 10 29 ago 18:16 /dev/mapper/volumeGroup-base + + +How to determine when a merging is complete +=========================================== +The snapshot-merge and snapshot status lines end with: + / + +Both and include both data and metadata. +During merging, the number of sectors allocated gets smaller and +smaller. Merging has finished when the number of sectors holding data +is zero, in other words == . + +Here is a practical example (using a hybrid of lvm and dmsetup commands): + +# lvs + LV VG Attr LSize Origin Snap% Move Log Copy% Convert + base volumeGroup owi-a- 4.00g + snap volumeGroup swi-a- 1.00g base 18.97 + +# dmsetup status volumeGroup-snap +0 8388608 snapshot 397896/2097152 1560 + ^^^^ metadata sectors + +# lvconvert --merge -b volumeGroup/snap + Merging of volume snap started. + +# lvs volumeGroup/snap + LV VG Attr LSize Origin Snap% Move Log Copy% Convert + base volumeGroup Owi-a- 4.00g 17.23 + +# dmsetup status volumeGroup-base +0 8388608 snapshot-merge 281688/2097152 1104 + +# dmsetup status volumeGroup-base +0 8388608 snapshot-merge 180480/2097152 712 + +# dmsetup status volumeGroup-base +0 8388608 snapshot-merge 16/2097152 16 + +Merging has finished. + +# lvs + LV VG Attr LSize Origin Snap% Move Log Copy% Convert + base volumeGroup owi-a- 4.00g -- cgit v0.10.2 From fce323dd68e13354071538c765b062859e6f8286 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Sat, 6 Mar 2010 02:29:59 +0000 Subject: dm mpath: avoid storing private suspended state 'suspended' flag in struct multipath was introduced to check whether the multipath target is in suspended state, but the same check is done through dm_suspended() now, so remove the flag and related code. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Cc: Mike Anderson Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index ecada41..8fa0c95 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -95,8 +95,6 @@ struct multipath { mempool_t *mpio_pool; struct mutex work_mutex; - - unsigned suspended; /* Don't create new I/O internally when set. */ }; /* @@ -1278,7 +1276,6 @@ static void multipath_postsuspend(struct dm_target *ti) struct multipath *m = ti->private; mutex_lock(&m->work_mutex); - m->suspended = 1; flush_multipath_work(); mutex_unlock(&m->work_mutex); } @@ -1291,10 +1288,6 @@ static void multipath_resume(struct dm_target *ti) struct multipath *m = (struct multipath *) ti->private; unsigned long flags; - mutex_lock(&m->work_mutex); - m->suspended = 0; - mutex_unlock(&m->work_mutex); - spin_lock_irqsave(&m->lock, flags); m->queue_if_no_path = m->saved_queue_if_no_path; spin_unlock_irqrestore(&m->lock, flags); @@ -1430,11 +1423,6 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) mutex_lock(&m->work_mutex); - if (m->suspended) { - r = -EBUSY; - goto out; - } - if (dm_suspended(ti)) { r = -EBUSY; goto out; -- cgit v0.10.2 From d0259bf0eefc503d3c9c9ccda35033c3dd3aac30 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Sat, 6 Mar 2010 02:30:02 +0000 Subject: dm mpath: hold io until all pg_inits completed m->queue_io is set to block processing I/Os, and it needs to be kept while pg-init, which issues multiple path activations, is in progress. But m->queue is cleared when a path activation completes without error in pg_init_done(), even while other path activations are in progress. That may cause undesired -EIO on paths which are not complete activation. This patch fixes that by not clearing m->queue_io until all path activations complete. (Before the hardware handlers were moved into the SCSI layer, pg_init only used one path.) Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 8fa0c95..ae187e5 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1181,14 +1181,19 @@ static void pg_init_done(void *data, int errors) m->current_pgpath = NULL; m->current_pg = NULL; } - } else if (!m->pg_init_required) { - m->queue_io = 0; + } else if (!m->pg_init_required) pg->bypassed = 0; - } - m->pg_init_in_progress--; - if (!m->pg_init_in_progress) - queue_work(kmultipathd, &m->process_queued_ios); + if (--m->pg_init_in_progress) + /* Activations of other paths are still on going */ + goto out; + + if (!m->pg_init_required) + m->queue_io = 0; + + queue_work(kmultipathd, &m->process_queued_ios); + +out: spin_unlock_irqrestore(&m->lock, flags); } -- cgit v0.10.2 From 2bded7bd7e8b12a913b0b58167a48220560e1514 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Sat, 6 Mar 2010 02:32:13 +0000 Subject: dm mpath: wait for pg_init completion when suspending When suspending the device we must wait for all I/O to complete, but pg-init may be still in progress even after flushing the workqueue for kmpath_handlerd in multipath_postsuspend. This patch waits for pg-init completion correctly in multipath_postsuspend(). Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index ae187e5..030bc2a 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -69,6 +69,7 @@ struct multipath { struct list_head priority_groups; unsigned pg_init_required; /* pg_init needs calling? */ unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ + wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ unsigned nr_valid_paths; /* Total number of usable paths */ struct pgpath *current_pgpath; @@ -200,6 +201,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti) m->queue_io = 1; INIT_WORK(&m->process_queued_ios, process_queued_ios); INIT_WORK(&m->trigger_event, trigger_event); + init_waitqueue_head(&m->pg_init_wait); mutex_init(&m->work_mutex); m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); if (!m->mpio_pool) { @@ -891,9 +893,34 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, return r; } -static void flush_multipath_work(void) +static void multipath_wait_for_pg_init_completion(struct multipath *m) +{ + DECLARE_WAITQUEUE(wait, current); + unsigned long flags; + + add_wait_queue(&m->pg_init_wait, &wait); + + while (1) { + set_current_state(TASK_UNINTERRUPTIBLE); + + spin_lock_irqsave(&m->lock, flags); + if (!m->pg_init_in_progress) { + spin_unlock_irqrestore(&m->lock, flags); + break; + } + spin_unlock_irqrestore(&m->lock, flags); + + io_schedule(); + } + set_current_state(TASK_RUNNING); + + remove_wait_queue(&m->pg_init_wait, &wait); +} + +static void flush_multipath_work(struct multipath *m) { flush_workqueue(kmpath_handlerd); + multipath_wait_for_pg_init_completion(m); flush_workqueue(kmultipathd); flush_scheduled_work(); } @@ -902,7 +929,7 @@ static void multipath_dtr(struct dm_target *ti) { struct multipath *m = ti->private; - flush_multipath_work(); + flush_multipath_work(m); free_multipath(m); } @@ -1193,6 +1220,11 @@ static void pg_init_done(void *data, int errors) queue_work(kmultipathd, &m->process_queued_ios); + /* + * Wake up any thread waiting to suspend. + */ + wake_up(&m->pg_init_wait); + out: spin_unlock_irqrestore(&m->lock, flags); } @@ -1281,7 +1313,7 @@ static void multipath_postsuspend(struct dm_target *ti) struct multipath *m = ti->private; mutex_lock(&m->work_mutex); - flush_multipath_work(); + flush_multipath_work(m); mutex_unlock(&m->work_mutex); } -- cgit v0.10.2 From fb61264297ca42a2a132f0433f75ccf7fd304ac6 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Sat, 6 Mar 2010 02:32:18 +0000 Subject: dm mpath: refactor pg_init This patch pulls the pg_init path activation code out of process_queued_ios() into a new function. No functional change. Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 030bc2a..c133548 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -235,6 +235,21 @@ static void free_multipath(struct multipath *m) * Path selection *-----------------------------------------------*/ +static void __pg_init_all_paths(struct multipath *m) +{ + struct pgpath *pgpath; + + m->pg_init_count++; + m->pg_init_required = 0; + list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) { + /* Skip failed paths */ + if (!pgpath->is_active) + continue; + if (queue_work(kmpath_handlerd, &pgpath->activate_path)) + m->pg_init_in_progress++; + } +} + static void __switch_pg(struct multipath *m, struct pgpath *pgpath) { m->current_pg = pgpath->pg; @@ -439,7 +454,7 @@ static void process_queued_ios(struct work_struct *work) { struct multipath *m = container_of(work, struct multipath, process_queued_ios); - struct pgpath *pgpath = NULL, *tmp; + struct pgpath *pgpath = NULL; unsigned must_queue = 1; unsigned long flags; @@ -457,17 +472,9 @@ static void process_queued_ios(struct work_struct *work) (!pgpath && !m->queue_if_no_path)) must_queue = 0; - if (m->pg_init_required && !m->pg_init_in_progress && pgpath) { - m->pg_init_count++; - m->pg_init_required = 0; - list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) { - /* Skip failed paths */ - if (!tmp->is_active) - continue; - if (queue_work(kmpath_handlerd, &tmp->activate_path)) - m->pg_init_in_progress++; - } - } + if (m->pg_init_required && !m->pg_init_in_progress && pgpath) + __pg_init_all_paths(m); + out: spin_unlock_irqrestore(&m->lock, flags); if (!must_queue) -- cgit v0.10.2 From ede5ea0b8b815560dc54c712536fdf0b456b6ad0 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sat, 6 Mar 2010 02:32:22 +0000 Subject: dm raid1: always return error if all legs fail If all mirror legs fail, always return an error instead of holding the bio, even if the handle_errors option was set. At present it is the responsibility of the driver underneath us to deal with retries, multipath etc. The patch adds the bio to the failures list instead of holding it directly. do_failures tests first if all legs failed and, if so, returns the bio with -EIO. If any leg is still alive and handle_errors is set, do_failures calls hold_bio. Reviewed-by: Takahiro Yasui Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 6c1046d..de26fde 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -737,9 +737,12 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) dm_rh_delay(ms->rh, bio); while ((bio = bio_list_pop(&nosync))) { - if (unlikely(ms->leg_failure) && errors_handled(ms)) - hold_bio(ms, bio); - else { + if (unlikely(ms->leg_failure) && errors_handled(ms)) { + spin_lock_irq(&ms->lock); + bio_list_add(&ms->failures, bio); + spin_unlock_irq(&ms->lock); + wakeup_mirrord(ms); + } else { map_bio(get_default_mirror(ms), bio); generic_make_request(bio); } -- cgit v0.10.2 From 0f3649a9e305ea22eb196a84a2d7520afcaa6060 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 6 Mar 2010 02:32:24 +0000 Subject: dm ioctl: only issue uevent on resume if state changed Only issue a uevent on a resume if the state of the device changed, i.e. if it was suspended and/or its table was replaced. Signed-off-by: Dave Wysochanski Signed-off-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 1d66932..e3cf568 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -897,16 +897,17 @@ static int do_resume(struct dm_ioctl *param) set_disk_ro(dm_disk(md), 1); } - if (dm_suspended_md(md)) + if (dm_suspended_md(md)) { r = dm_resume(md); + if (!r) + dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr); + } if (old_map) dm_table_destroy(old_map); - if (!r) { - dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr); + if (!r) r = __dev_status(md, param); - } dm_put(md); return r; -- cgit v0.10.2 From 8215d6ec5fee1e76545decea2cd73717efb5cb42 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Sat, 6 Mar 2010 02:32:27 +0000 Subject: dm table: remove unused dm_get_device range parameters Remove unused parameters(start and len) of dm_get_device() and fix the callers. Signed-off-by: Nikanth Karthikesan Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index a936372..3bdbb61 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1160,8 +1160,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) } cc->start = tmpll; - if (dm_get_device(ti, argv[3], cc->start, ti->len, - dm_table_get_mode(ti->table), &cc->dev)) { + if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), &cc->dev)) { ti->error = "Device lookup failed"; goto bad_device; } diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index ebe7381..8520528 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -156,8 +156,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - if (dm_get_device(ti, argv[0], dc->start_read, ti->len, - dm_table_get_mode(ti->table), &dc->dev_read)) { + if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), + &dc->dev_read)) { ti->error = "Device lookup failed"; goto bad; } @@ -177,8 +177,8 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_dev_read; } - if (dm_get_device(ti, argv[3], dc->start_write, ti->len, - dm_table_get_mode(ti->table), &dc->dev_write)) { + if (dm_get_device(ti, argv[3], dm_table_get_mode(ti->table), + &dc->dev_write)) { ti->error = "Write device lookup failed"; goto bad_dev_read; } diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 82f7d6e..9200dbf 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -47,8 +47,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) } lc->start = tmp; - if (dm_get_device(ti, argv[0], lc->start, ti->len, - dm_table_get_mode(ti->table), &lc->dev)) { + if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &lc->dev)) { ti->error = "dm-linear: Device lookup failed"; goto bad; } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 7035582..5a08be0 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -543,8 +543,7 @@ static int disk_ctr(struct dm_dirty_log *log, struct dm_target *ti, return -EINVAL; } - r = dm_get_device(ti, argv[0], 0, 0 /* FIXME */, - FMODE_READ | FMODE_WRITE, &dev); + r = dm_get_device(ti, argv[0], FMODE_READ | FMODE_WRITE, &dev); if (r) return r; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c133548..826bce7 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -607,8 +607,8 @@ static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, if (!p) return ERR_PTR(-ENOMEM); - r = dm_get_device(ti, shift(as), ti->begin, ti->len, - dm_table_get_mode(ti->table), &p->path.dev); + r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table), + &p->path.dev); if (r) { ti->error = "error getting device"; goto bad; @@ -1505,8 +1505,7 @@ static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) goto out; } - r = dm_get_device(ti, argv[1], ti->begin, ti->len, - dm_table_get_mode(ti->table), &dev); + r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev); if (r) { DMWARN("message: error getting device %s", argv[1]); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index de26fde..6d66ddf 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -920,8 +920,7 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, return -EINVAL; } - if (dm_get_device(ti, argv[0], offset, ti->len, - dm_table_get_mode(ti->table), + if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &ms->mirror[mirror].dev)) { ti->error = "Device lookup failure"; return -ENXIO; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index ee8eb28..0789c22 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1081,8 +1081,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) argv++; argc--; - r = dm_get_device(ti, cow_path, 0, 0, - FMODE_READ | FMODE_WRITE, &s->cow); + r = dm_get_device(ti, cow_path, FMODE_READ | FMODE_WRITE, &s->cow); if (r) { ti->error = "Cannot get COW device"; goto bad_cow; @@ -1098,7 +1097,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) argv += args_used; argc -= args_used; - r = dm_get_device(ti, origin_path, 0, ti->len, origin_mode, &s->origin); + r = dm_get_device(ti, origin_path, origin_mode, &s->origin); if (r) { ti->error = "Cannot get origin device"; goto bad_origin; @@ -2100,8 +2099,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - r = dm_get_device(ti, argv[0], 0, ti->len, - dm_table_get_mode(ti->table), &dev); + r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev); if (r) { ti->error = "Cannot get target device"; return r; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index bd58703..e610725 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -80,8 +80,7 @@ static int get_stripe(struct dm_target *ti, struct stripe_c *sc, if (sscanf(argv[1], "%llu", &start) != 1) return -EINVAL; - if (dm_get_device(ti, argv[0], start, sc->stripe_width, - dm_table_get_mode(ti->table), + if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &sc->stripe[stripe].dev)) return -ENXIO; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 7d70cca..9924ea2 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -429,8 +429,7 @@ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, * it's already present. */ static int __table_get_device(struct dm_table *t, struct dm_target *ti, - const char *path, sector_t start, sector_t len, - fmode_t mode, struct dm_dev **result) + const char *path, fmode_t mode, struct dm_dev **result) { int r; dev_t uninitialized_var(dev); @@ -527,11 +526,10 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, } EXPORT_SYMBOL_GPL(dm_set_device_limits); -int dm_get_device(struct dm_target *ti, const char *path, sector_t start, - sector_t len, fmode_t mode, struct dm_dev **result) +int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, + struct dm_dev **result) { - return __table_get_device(ti->table, ti, path, - start, len, mode, result); + return __table_get_device(ti->table, ti, path, mode, result); } diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index d4c9c0b..1381cd9 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -118,10 +118,9 @@ struct dm_dev { /* * Constructors should call these functions to ensure destination devices * are opened/closed correctly. - * FIXME: too many arguments. */ -int dm_get_device(struct dm_target *ti, const char *path, sector_t start, - sector_t len, fmode_t mode, struct dm_dev **result); +int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, + struct dm_dev **result); void dm_put_device(struct dm_target *ti, struct dm_dev *d); /* -- cgit v0.10.2 From a97f925a32aad2a37971d7bfb657006acf04e42d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sat, 6 Mar 2010 02:32:29 +0000 Subject: dm: free dm_io before bio_endio not after Free the dm_io structure before calling bio_endio() instead of after it, to ensure that the io_pool containing it is not referenced after it is freed. This partially fixes a problem described here https://www.redhat.com/archives/dm-devel/2010-February/msg00109.html thread 1: bio_endio(bio, io_error); /* scheduling happens */ thread 2: close the device remove the device thread 1: free_io(md, io); Thread 2, when removing the device, sees non-empty md->io_pool (because the io hasn't been freed by thread 1 yet) and may crash with BUG in mempool_free. Thread 1 may also crash, when freeing into a nonexisting mempool. To fix this we must make sure that bio_endio() is the last call and the md structure is not accessed afterwards. There is another bio_endio in process_barrier, but it is called from the thread and the thread is destroyed prior to freeing the mempools, so this call is not affected by the bug. A similar bug exists with module unloads - the module may be unloaded immediately after bio_endio - but that is more difficult to fix. Signed-off-by: Mikulas Patocka Cc: stable@kernel.org Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 21222f5..7199846 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -635,8 +635,10 @@ static void dec_pending(struct dm_io *io, int error) if (!md->barrier_error && io_error != -EOPNOTSUPP) md->barrier_error = io_error; end_io_acct(io); + free_io(md, io); } else { end_io_acct(io); + free_io(md, io); if (io_error != DM_ENDIO_REQUEUE) { trace_block_bio_complete(md->queue, bio); @@ -644,8 +646,6 @@ static void dec_pending(struct dm_io *io, int error) bio_endio(bio, io_error); } } - - free_io(md, io); } } -- cgit v0.10.2 From 3abf85b5b5851b5f28d3d8a920ebb844edd08352 Mon Sep 17 00:00:00 2001 From: Peter Rajnoha Date: Sat, 6 Mar 2010 02:32:31 +0000 Subject: dm ioctl: introduce flag indicating uevent was generated Set a new DM_UEVENT_GENERATED_FLAG when returning from ioctls to indicate that a uevent was actually generated. This tells the userspace caller that it may need to wait for the event to be processed. Signed-off-by: Peter Rajnoha Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index e3cf568..d7500e1 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -285,7 +285,8 @@ retry: up_write(&_hash_lock); } -static int dm_hash_rename(uint32_t cookie, const char *old, const char *new) +static int dm_hash_rename(uint32_t cookie, uint32_t *flags, const char *old, + const char *new) { char *new_name, *old_name; struct hash_cell *hc; @@ -344,7 +345,8 @@ static int dm_hash_rename(uint32_t cookie, const char *old, const char *new) dm_table_put(table); } - dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie); + if (!dm_kobject_uevent(hc->md, KOBJ_CHANGE, cookie)) + *flags |= DM_UEVENT_GENERATED_FLAG; dm_put(hc->md); up_write(&_hash_lock); @@ -736,10 +738,10 @@ static int dev_remove(struct dm_ioctl *param, size_t param_size) __hash_remove(hc); up_write(&_hash_lock); - dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr); + if (!dm_kobject_uevent(md, KOBJ_REMOVE, param->event_nr)) + param->flags |= DM_UEVENT_GENERATED_FLAG; dm_put(md); - param->data_size = 0; return 0; } @@ -773,7 +775,9 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size) return r; param->data_size = 0; - return dm_hash_rename(param->event_nr, param->name, new_name); + + return dm_hash_rename(param->event_nr, ¶m->flags, param->name, + new_name); } static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) @@ -899,8 +903,8 @@ static int do_resume(struct dm_ioctl *param) if (dm_suspended_md(md)) { r = dm_resume(md); - if (!r) - dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr); + if (!r && !dm_kobject_uevent(md, KOBJ_CHANGE, param->event_nr)) + param->flags |= DM_UEVENT_GENERATED_FLAG; } if (old_map) @@ -1477,6 +1481,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param) { /* Always clear this flag */ param->flags &= ~DM_BUFFER_FULL_FLAG; + param->flags &= ~DM_UEVENT_GENERATED_FLAG; /* Ignores parameters */ if (cmd == DM_REMOVE_ALL_CMD || diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7199846..d21e128 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2618,18 +2618,19 @@ out: /*----------------------------------------------------------------- * Event notification. *---------------------------------------------------------------*/ -void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, +int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, unsigned cookie) { char udev_cookie[DM_COOKIE_LENGTH]; char *envp[] = { udev_cookie, NULL }; if (!cookie) - kobject_uevent(&disk_to_dev(md->disk)->kobj, action); + return kobject_uevent(&disk_to_dev(md->disk)->kobj, action); else { snprintf(udev_cookie, DM_COOKIE_LENGTH, "%s=%u", DM_COOKIE_ENV_VAR_NAME, cookie); - kobject_uevent_env(&disk_to_dev(md->disk)->kobj, action, envp); + return kobject_uevent_env(&disk_to_dev(md->disk)->kobj, + action, envp); } } diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 8dadaa5..bad1724 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -125,8 +125,8 @@ void dm_stripe_exit(void); int dm_open_count(struct mapped_device *md); int dm_lock_for_deletion(struct mapped_device *md); -void dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, - unsigned cookie); +int dm_kobject_uevent(struct mapped_device *md, enum kobject_action action, + unsigned cookie); int dm_io_init(void); void dm_io_exit(void); diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h index aa95508..2c445e1 100644 --- a/include/linux/dm-ioctl.h +++ b/include/linux/dm-ioctl.h @@ -266,9 +266,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 16 +#define DM_VERSION_MINOR 17 #define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2009-11-05)" +#define DM_VERSION_EXTRA "-ioctl (2010-03-05)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ @@ -316,4 +316,9 @@ enum { */ #define DM_QUERY_INACTIVE_TABLE_FLAG (1 << 12) /* In */ +/* + * If set, a uevent was generated for which the caller may need to wait. + */ +#define DM_UEVENT_GENERATED_FLAG (1 << 13) /* Out */ + #endif /* _LINUX_DM_IOCTL_H */ -- cgit v0.10.2 From 924e600d417ead9ef67043988055ba236f114718 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Sat, 6 Mar 2010 02:32:33 +0000 Subject: dm: eliminate some holes data structures Eliminate a 4-byte hole in 'struct dm_io_memory' by moving 'offset' above the 'ptr' to which it applies (size reduced from 24 to 16 bytes). And by association, 1-4 byte hole is eliminated in 'struct dm_io_request' (size reduced from 56 to 48 bytes). Eliminate all 6 4-byte holes and 1 cache-line in 'struct dm_snapshot' (size reduced from 392 to 368 bytes). Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 0789c22..5485377 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -83,10 +83,10 @@ struct dm_snapshot { /* Whether or not owning mapped_device is suspended */ int suspended; - mempool_t *pending_pool; - atomic_t pending_exceptions_count; + mempool_t *pending_pool; + struct dm_exception_table pending; struct dm_exception_table complete; @@ -96,6 +96,11 @@ struct dm_snapshot { */ spinlock_t pe_lock; + /* Chunks with outstanding reads */ + spinlock_t tracked_chunk_lock; + mempool_t *tracked_chunk_pool; + struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; + /* The on disk metadata handler */ struct dm_exception_store *store; @@ -105,10 +110,12 @@ struct dm_snapshot { struct bio_list queued_bios; struct work_struct queued_bios_work; - /* Chunks with outstanding reads */ - mempool_t *tracked_chunk_pool; - spinlock_t tracked_chunk_lock; - struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; + /* Wait for events based on state_bits */ + unsigned long state_bits; + + /* Range of chunks currently being merged. */ + chunk_t first_merging_chunk; + int num_merging_chunks; /* * The merge operation failed if this flag is set. @@ -125,13 +132,6 @@ struct dm_snapshot { */ int merge_failed; - /* Wait for events based on state_bits */ - unsigned long state_bits; - - /* Range of chunks currently being merged. */ - chunk_t first_merging_chunk; - int num_merging_chunks; - /* * Incoming bios that overlap with chunks being merged must wait * for them to be committed. diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index b6bf17e..5c9186b 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -37,14 +37,14 @@ enum dm_io_mem_type { struct dm_io_memory { enum dm_io_mem_type type; + unsigned offset; + union { struct page_list *pl; struct bio_vec *bvec; void *vma; void *addr; } ptr; - - unsigned offset; }; struct dm_io_notify { -- cgit v0.10.2 From f070304094edb8d516423e79edd27c97ec2020b0 Mon Sep 17 00:00:00 2001 From: Takahiro Yasui Date: Sat, 6 Mar 2010 02:32:35 +0000 Subject: dm raid1: fix deadlock when suspending failed device To prevent deadlock, bios in the hold list should be flushed before dm_rh_stop_recovery() is called in mirror_suspend(). The recovery can't start because there are pending bios and therefore dm_rh_stop_recovery deadlocks. When there are pending bios in the hold list, the recovery waits for the completion of the bios after recovery_count is acquired. The recovery_count is released when the recovery finished, however, the bios in the hold list are processed after dm_rh_stop_recovery() in mirror_presuspend(). dm_rh_stop_recovery() also acquires recovery_count, then deadlock occurs. Signed-off-by: Takahiro Yasui Signed-off-by: Alasdair G Kergon Reviewed-by: Mikulas Patocka diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 6d66ddf..ddda531 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -465,9 +465,17 @@ static void map_region(struct dm_io_region *io, struct mirror *m, static void hold_bio(struct mirror_set *ms, struct bio *bio) { /* - * If device is suspended, complete the bio. + * Lock is required to avoid race condition during suspend + * process. */ + spin_lock_irq(&ms->lock); + if (atomic_read(&ms->suspend)) { + spin_unlock_irq(&ms->lock); + + /* + * If device is suspended, complete the bio. + */ if (dm_noflush_suspending(ms->ti)) bio_endio(bio, DM_ENDIO_REQUEUE); else @@ -478,7 +486,6 @@ static void hold_bio(struct mirror_set *ms, struct bio *bio) /* * Hold bio until the suspend is complete. */ - spin_lock_irq(&ms->lock); bio_list_add(&ms->holds, bio); spin_unlock_irq(&ms->lock); } @@ -1261,6 +1268,20 @@ static void mirror_presuspend(struct dm_target *ti) atomic_set(&ms->suspend, 1); /* + * Process bios in the hold list to start recovery waiting + * for bios in the hold list. After the process, no bio has + * a chance to be added in the hold list because ms->suspend + * is set. + */ + spin_lock_irq(&ms->lock); + holds = ms->holds; + bio_list_init(&ms->holds); + spin_unlock_irq(&ms->lock); + + while ((bio = bio_list_pop(&holds))) + hold_bio(ms, bio); + + /* * We must finish up all the work that we've * generated (i.e. recovery work). */ @@ -1280,22 +1301,6 @@ static void mirror_presuspend(struct dm_target *ti) * we know that all of our I/O has been pushed. */ flush_workqueue(ms->kmirrord_wq); - - /* - * Now set ms->suspend is set and the workqueue flushed, no more - * entries can be added to ms->hold list, so process it. - * - * Bios can still arrive concurrently with or after this - * presuspend function, but they cannot join the hold list - * because ms->suspend is set. - */ - spin_lock_irq(&ms->lock); - holds = ms->holds; - bio_list_init(&ms->holds); - spin_unlock_irq(&ms->lock); - - while ((bio = bio_list_pop(&holds))) - hold_bio(ms, bio); } static void mirror_postsuspend(struct dm_target *ti) -- cgit v0.10.2