From a920f6b3accc77d9dddbc98a7426be23ee479625 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:23 +0100 Subject: dm: preserve bi_io_vec when resubmitting bios Device mapper saves and restores various fields in the bio, but it doesn't save bi_io_vec. If the device driver modifies this after a partially successful request, dm-raid1 and dm-multipath may attempt to resubmit a bio that has bi_size inconsistent with the size of vector. To make requests resubmittable in dm-raid1 and dm-multipath, we must save and restore the bio vector as well. To reduce the memory overhead involved in this, we do not save the pages in a vector and use a 16-bit field size if the page size is less than 65536. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-bio-record.h b/drivers/md/dm-bio-record.h index d3ec217..3a8cfa2 100644 --- a/drivers/md/dm-bio-record.h +++ b/drivers/md/dm-bio-record.h @@ -16,30 +16,56 @@ * functions in this file help the target record and restore the * original bio state. */ + +struct dm_bio_vec_details { +#if PAGE_SIZE < 65536 + __u16 bv_len; + __u16 bv_offset; +#else + unsigned bv_len; + unsigned bv_offset; +#endif +}; + struct dm_bio_details { sector_t bi_sector; struct block_device *bi_bdev; unsigned int bi_size; unsigned short bi_idx; unsigned long bi_flags; + struct dm_bio_vec_details bi_io_vec[BIO_MAX_PAGES]; }; static inline void dm_bio_record(struct dm_bio_details *bd, struct bio *bio) { + unsigned i; + bd->bi_sector = bio->bi_sector; bd->bi_bdev = bio->bi_bdev; bd->bi_size = bio->bi_size; bd->bi_idx = bio->bi_idx; bd->bi_flags = bio->bi_flags; + + for (i = 0; i < bio->bi_vcnt; i++) { + bd->bi_io_vec[i].bv_len = bio->bi_io_vec[i].bv_len; + bd->bi_io_vec[i].bv_offset = bio->bi_io_vec[i].bv_offset; + } } static inline void dm_bio_restore(struct dm_bio_details *bd, struct bio *bio) { + unsigned i; + bio->bi_sector = bd->bi_sector; bio->bi_bdev = bd->bi_bdev; bio->bi_size = bd->bi_size; bio->bi_idx = bd->bi_idx; bio->bi_flags = bd->bi_flags; + + for (i = 0; i < bio->bi_vcnt; i++) { + bio->bi_io_vec[i].bv_len = bd->bi_io_vec[i].bv_len; + bio->bi_io_vec[i].bv_offset = bd->bi_io_vec[i].bv_offset; + } } #endif -- cgit v0.10.2 From 95f8fac8dc6139fedfb87746e0c8fda9b803cb46 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:24 +0100 Subject: dm raid1: switch read_record from kmalloc to slab to save memory With my previous patch to save bi_io_vec, the size of dm_raid1_read_record is significantly increased (the vector list takes 3072 bytes on 32-bit machines and 4096 bytes on 64-bit machines). The structure dm_raid1_read_record used to be allocated with kmalloc, but kmalloc aligns the size on the next power-of-two so an object slightly greater than 4096 will allocate 8192 bytes of memory and half of that memory will be wasted. This patch turns kmalloc into a slab cache which doesn't have this padding so it will reduce the memory consumed. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 4d6bc10..62d5948 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -145,6 +145,8 @@ struct dm_raid1_read_record { struct dm_bio_details details; }; +static struct kmem_cache *_dm_raid1_read_record_cache; + /* * Every mirror should look like this one. */ @@ -764,9 +766,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, atomic_set(&ms->suspend, 0); atomic_set(&ms->default_mirror, DEFAULT_MIRROR); - len = sizeof(struct dm_raid1_read_record); - ms->read_record_pool = mempool_create_kmalloc_pool(MIN_READ_RECORDS, - len); + ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS, + _dm_raid1_read_record_cache); + if (!ms->read_record_pool) { ti->error = "Error creating mirror read_record_pool"; kfree(ms); @@ -1279,16 +1281,31 @@ static int __init dm_mirror_init(void) { int r; + _dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0); + if (!_dm_raid1_read_record_cache) { + DMERR("Can't allocate dm_raid1_read_record cache"); + r = -ENOMEM; + goto bad_cache; + } + r = dm_register_target(&mirror_target); - if (r < 0) + if (r < 0) { DMERR("Failed to register mirror target"); + goto bad_target; + } + + return 0; +bad_target: + kmem_cache_destroy(_dm_raid1_read_record_cache); +bad_cache: return r; } static void __exit dm_mirror_exit(void) { dm_unregister_target(&mirror_target); + kmem_cache_destroy(_dm_raid1_read_record_cache); } /* Module hooks */ -- cgit v0.10.2 From b64b6bf4fd8b678a9f8477c11773c38a0a246a6d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:24 +0100 Subject: dm io: make sync_io uninterruptible If someone sends signal to a process performing synchronous dm-io call, the kernel may crash. The function sync_io attempts to exit with -EINTR if it has pending signal, however the structure "io" is allocated on stack, so already submitted io requests end up touching unallocated stack space and corrupting kernel memory. sync_io sets its state to TASK_UNINTERRUPTIBLE, so the signal can't break out of io_schedule() --- however, if the signal was pending before sync_io entered while (1) loop, the corruption of kernel memory will happen. There is no way to cancel in-progress IOs, so the best solution is to ignore signals at this point. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 36e2b5e..e73aabd 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -370,16 +370,13 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, while (1) { set_current_state(TASK_UNINTERRUPTIBLE); - if (!atomic_read(&io.count) || signal_pending(current)) + if (!atomic_read(&io.count)) break; io_schedule(); } set_current_state(TASK_RUNNING); - if (atomic_read(&io.count)) - return -EINTR; - if (error_bits) *error_bits = io.error_bits; -- cgit v0.10.2 From 2913808eb56a6445a7b277eb8d17651c8defb035 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:25 +0100 Subject: dm snapshot: refactor __find_pending_exception Move looking-up of a pending exception from __find_pending_exception to another function. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 65ff82f..87a2803 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -972,6 +972,17 @@ static void start_copy(struct dm_snap_pending_exception *pe) &src, 1, &dest, 0, copy_callback, pe); } +static struct dm_snap_pending_exception * +__lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk) +{ + struct dm_snap_exception *e = lookup_exception(&s->pending, chunk); + + if (!e) + return NULL; + + return container_of(e, struct dm_snap_pending_exception, e); +} + /* * Looks to see if this snapshot already has a pending exception * for this chunk, otherwise it allocates a new one and inserts @@ -983,21 +994,10 @@ static void start_copy(struct dm_snap_pending_exception *pe) static struct dm_snap_pending_exception * __find_pending_exception(struct dm_snapshot *s, struct bio *bio) { - struct dm_snap_exception *e; - struct dm_snap_pending_exception *pe; + struct dm_snap_pending_exception *pe, *pe2; chunk_t chunk = sector_to_chunk(s, bio->bi_sector); /* - * Is there a pending exception for this already ? - */ - e = lookup_exception(&s->pending, chunk); - if (e) { - /* cast the exception to a pending exception */ - pe = container_of(e, struct dm_snap_pending_exception, e); - goto out; - } - - /* * Create a new pending exception, we don't want * to hold the lock while we do this. */ @@ -1010,11 +1010,10 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) return NULL; } - e = lookup_exception(&s->pending, chunk); - if (e) { + pe2 = __lookup_pending_exception(s, chunk); + if (pe2) { free_pending_exception(pe); - pe = container_of(e, struct dm_snap_pending_exception, e); - goto out; + return pe2; } pe->e.old_chunk = chunk; @@ -1032,7 +1031,6 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) get_pending_exception(pe); insert_exception(&s->pending, &pe->e); - out: return pe; } @@ -1083,11 +1081,14 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, * writeable. */ if (bio_rw(bio) == WRITE) { - pe = __find_pending_exception(s, bio); + pe = __lookup_pending_exception(s, chunk); if (!pe) { - __invalidate_snapshot(s, -ENOMEM); - r = -EIO; - goto out_unlock; + pe = __find_pending_exception(s, bio); + if (!pe) { + __invalidate_snapshot(s, -ENOMEM); + r = -EIO; + goto out_unlock; + } } remap_exception(s, &pe->e, bio, chunk); @@ -1217,10 +1218,13 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) if (e) goto next_snapshot; - pe = __find_pending_exception(snap, bio); + pe = __lookup_pending_exception(snap, chunk); if (!pe) { - __invalidate_snapshot(snap, -ENOMEM); - goto next_snapshot; + pe = __find_pending_exception(snap, bio); + if (!pe) { + __invalidate_snapshot(snap, -ENOMEM); + goto next_snapshot; + } } if (!primary_pe) { -- cgit v0.10.2 From c66213921c816f6b1b16a84911618ba9a363b134 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:25 +0100 Subject: dm snapshot: avoid dropping lock in __find_pending_exception It is uncommon and bug-prone to drop a lock in a function that is called with the lock held, so this is moved to the caller. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 87a2803..eb7291a 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -992,23 +992,10 @@ __lookup_pending_exception(struct dm_snapshot *s, chunk_t chunk) * this. */ static struct dm_snap_pending_exception * -__find_pending_exception(struct dm_snapshot *s, struct bio *bio) +__find_pending_exception(struct dm_snapshot *s, + struct dm_snap_pending_exception *pe, chunk_t chunk) { - struct dm_snap_pending_exception *pe, *pe2; - chunk_t chunk = sector_to_chunk(s, bio->bi_sector); - - /* - * Create a new pending exception, we don't want - * to hold the lock while we do this. - */ - up_write(&s->lock); - pe = alloc_pending_exception(s); - down_write(&s->lock); - - if (!s->valid) { - free_pending_exception(pe); - return NULL; - } + struct dm_snap_pending_exception *pe2; pe2 = __lookup_pending_exception(s, chunk); if (pe2) { @@ -1083,7 +1070,17 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, if (bio_rw(bio) == WRITE) { pe = __lookup_pending_exception(s, chunk); if (!pe) { - pe = __find_pending_exception(s, bio); + up_write(&s->lock); + pe = alloc_pending_exception(s); + down_write(&s->lock); + + if (!s->valid) { + free_pending_exception(pe); + r = -EIO; + goto out_unlock; + } + + pe = __find_pending_exception(s, pe, chunk); if (!pe) { __invalidate_snapshot(s, -ENOMEM); r = -EIO; @@ -1220,7 +1217,16 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) pe = __lookup_pending_exception(snap, chunk); if (!pe) { - pe = __find_pending_exception(snap, bio); + up_write(&snap->lock); + pe = alloc_pending_exception(snap); + down_write(&snap->lock); + + if (!snap->valid) { + free_pending_exception(pe); + goto next_snapshot; + } + + pe = __find_pending_exception(snap, pe, chunk); if (!pe) { __invalidate_snapshot(snap, -ENOMEM); goto next_snapshot; -- cgit v0.10.2 From 35bf659b008e83e725dcd30f542e38461dbb867c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:26 +0100 Subject: dm snapshot: avoid having two exceptions for the same chunk We need to check if the exception was completed after dropping the lock. After regaining the lock, __find_pending_exception checks if the exception was already placed into &s->pending hash. But we don't check if the exception was already completed and placed into &s->complete hash. If the process waiting in alloc_pending_exception was delayed at this point because of a scheduling latency and the exception was meanwhile completed, we'd miss that and allocate another pending exception for already completed chunk. It would lead to a situation where two records for the same chunk exist and potential data corruption because multiple snapshot I/Os to the affected chunk could be redirected to different locations in the snapshot. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index eb7291a..462750c 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1080,6 +1080,13 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, goto out_unlock; } + e = lookup_exception(&s->complete, chunk); + if (e) { + free_pending_exception(pe); + remap_exception(s, e, bio, chunk); + goto out_unlock; + } + pe = __find_pending_exception(s, pe, chunk); if (!pe) { __invalidate_snapshot(s, -ENOMEM); @@ -1226,6 +1233,12 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) goto next_snapshot; } + e = lookup_exception(&snap->complete, chunk); + if (e) { + free_pending_exception(pe); + goto next_snapshot; + } + pe = __find_pending_exception(snap, pe, chunk); if (!pe) { __invalidate_snapshot(snap, -ENOMEM); -- cgit v0.10.2 From 5642b8a61a15436231adf27b2b1bd96901b623dd Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Thu, 2 Apr 2009 19:55:27 +0100 Subject: dm target: use module refcount directly The tt_internal's 'use' field is superfluous: the module's refcount can do the work properly. An acceptable side-effect is that this increases the reference counts reported by 'lsmod'. Remove the superfluous test when removing a target module. [Crash possible without this on SMP - agk] Cc: stable@kernel.org Signed-off-by: Cheng Renquan Signed-off-by: Alasdair G Kergon Reviewed-by: Alasdair G Kergon Reviewed-by: Jonathan Brassow diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 7decf10..db72c94 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -18,7 +18,6 @@ struct tt_internal { struct target_type tt; struct list_head list; - long use; }; static LIST_HEAD(_targets); @@ -44,12 +43,8 @@ static struct tt_internal *get_target_type(const char *name) down_read(&_lock); ti = __find_target_type(name); - if (ti) { - if ((ti->use == 0) && !try_module_get(ti->tt.module)) - ti = NULL; - else - ti->use++; - } + if (ti && !try_module_get(ti->tt.module)) + ti = NULL; up_read(&_lock); return ti; @@ -77,10 +72,7 @@ void dm_put_target_type(struct target_type *t) struct tt_internal *ti = (struct tt_internal *) t; down_read(&_lock); - if (--ti->use == 0) - module_put(ti->tt.module); - - BUG_ON(ti->use < 0); + module_put(ti->tt.module); up_read(&_lock); return; @@ -140,12 +132,6 @@ void dm_unregister_target(struct target_type *t) BUG(); } - if (ti->use) { - DMCRIT("Attempt to unregister target still in use: %s", - t->name); - BUG(); - } - list_del(&ti->list); kfree(ti); -- cgit v0.10.2 From aea9058801c0acfa2831af1714da412dfb0018c2 Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Thu, 2 Apr 2009 19:55:27 +0100 Subject: dm: path selector use module refcount directly Fix refcount corruption in dm-path-selector Refcounting with non-atomic ops under shared lock will corrupt the counter in multi-processor system and may trigger BUG_ON(). Use module refcount. # same approach as dm-target-use-module-refcount-directly.patch here # https://www.redhat.com/archives/dm-devel/2008-December/msg00075.html Typical oops: kernel BUG at linux-2.6.29-rc3/drivers/md/dm-path-selector.c:90! Pid: 11148, comm: dmsetup Not tainted 2.6.29-rc3-nm #1 dm_put_path_selector+0x4d/0x61 [dm_multipath] Call Trace: [] free_priority_group+0x33/0xb3 [dm_multipath] [] free_multipath+0x31/0x67 [dm_multipath] [] multipath_dtr+0x2d/0x32 [dm_multipath] [] dm_table_destroy+0x64/0xd8 [dm_mod] [] __unbind+0x46/0x4b [dm_mod] [] dm_swap_table+0x60/0x14d [dm_mod] [] dev_suspend+0xfd/0x177 [dm_mod] [] dm_ctl_ioctl+0x24c/0x29c [dm_mod] [] ? get_page_from_freelist+0x49c/0x61d [] ? dev_suspend+0x0/0x177 [dm_mod] [] vfs_ioctl+0x2a/0x77 [] do_vfs_ioctl+0x448/0x4a0 [] sys_ioctl+0x57/0x7a [] system_call_fastpath+0x16/0x1b Cc: stable@kernel.org Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-path-selector.c b/drivers/md/dm-path-selector.c index 96ea226..42c04f0 100644 --- a/drivers/md/dm-path-selector.c +++ b/drivers/md/dm-path-selector.c @@ -17,9 +17,7 @@ struct ps_internal { struct path_selector_type pst; - struct list_head list; - long use; }; #define pst_to_psi(__pst) container_of((__pst), struct ps_internal, pst) @@ -45,12 +43,8 @@ static struct ps_internal *get_path_selector(const char *name) down_read(&_ps_lock); psi = __find_path_selector_type(name); - if (psi) { - if ((psi->use == 0) && !try_module_get(psi->pst.module)) - psi = NULL; - else - psi->use++; - } + if (psi && !try_module_get(psi->pst.module)) + psi = NULL; up_read(&_ps_lock); return psi; @@ -84,11 +78,7 @@ void dm_put_path_selector(struct path_selector_type *pst) if (!psi) goto out; - if (--psi->use == 0) - module_put(psi->pst.module); - - BUG_ON(psi->use < 0); - + module_put(psi->pst.module); out: up_read(&_ps_lock); } @@ -136,11 +126,6 @@ int dm_unregister_path_selector(struct path_selector_type *pst) return -EINVAL; } - if (psi->use) { - up_write(&_ps_lock); - return -ETXTBSY; - } - list_del(&psi->list); up_write(&_ps_lock); -- cgit v0.10.2 From 570b9d968bf9b16974252ef7cbce73fa6dac34f3 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 2 Apr 2009 19:55:28 +0100 Subject: dm table: fix upgrade mode race upgrade_mode() sets bdev to NULL temporarily, and does not have any locking to exclude anything from seeing that NULL. In dm_table_any_congested() bdev_get_queue() can dereference that NULL and cause a reported oops. Fix this by not changing that field during the mode upgrade. Cc: stable@kernel.org Cc: Neil Brown Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 2fd66c3..e8361b1 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -399,28 +399,30 @@ static int check_device_area(struct dm_dev_internal *dd, sector_t start, } /* - * This upgrades the mode on an already open dm_dev. Being + * This upgrades the mode on an already open dm_dev, being * careful to leave things as they were if we fail to reopen the - * device. + * device and not to touch the existing bdev field in case + * it is accessed concurrently inside dm_table_any_congested(). */ static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, struct mapped_device *md) { int r; - struct dm_dev_internal dd_copy; - dev_t dev = dd->dm_dev.bdev->bd_dev; + struct dm_dev_internal dd_new, dd_old; - dd_copy = *dd; + dd_new = dd_old = *dd; + + dd_new.dm_dev.mode |= new_mode; + dd_new.dm_dev.bdev = NULL; + + r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md); + if (r) + return r; dd->dm_dev.mode |= new_mode; - dd->dm_dev.bdev = NULL; - r = open_dev(dd, dev, md); - if (!r) - close_dev(&dd_copy, md); - else - *dd = dd_copy; + close_dev(&dd_old, md); - return r; + return 0; } /* -- cgit v0.10.2 From 45194e4f89fbdd97a2b7d2698c05f0b00c19e820 Mon Sep 17 00:00:00 2001 From: Cheng Renquan Date: Thu, 2 Apr 2009 19:55:28 +0100 Subject: dm target: remove struct tt_internal The tt_internal is really just a list_head to manage registered target_type in a double linked list, Here embed the list_head into target_type directly, 1. to avoid kmalloc/kfree; 2. then tt_internal is really unneeded; Cc: stable@kernel.org Signed-off-by: Cheng Renquan Signed-off-by: Alasdair G Kergon Reviewed-by: Alasdair G Kergon diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index db72c94..04feccf 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -14,40 +14,34 @@ #define DM_MSG_PREFIX "target" -struct tt_internal { - struct target_type tt; - - struct list_head list; -}; - static LIST_HEAD(_targets); static DECLARE_RWSEM(_lock); #define DM_MOD_NAME_SIZE 32 -static inline struct tt_internal *__find_target_type(const char *name) +static inline struct target_type *__find_target_type(const char *name) { - struct tt_internal *ti; + struct target_type *tt; - list_for_each_entry (ti, &_targets, list) - if (!strcmp(name, ti->tt.name)) - return ti; + list_for_each_entry(tt, &_targets, list) + if (!strcmp(name, tt->name)) + return tt; return NULL; } -static struct tt_internal *get_target_type(const char *name) +static struct target_type *get_target_type(const char *name) { - struct tt_internal *ti; + struct target_type *tt; down_read(&_lock); - ti = __find_target_type(name); - if (ti && !try_module_get(ti->tt.module)) - ti = NULL; + tt = __find_target_type(name); + if (tt && !try_module_get(tt->module)) + tt = NULL; up_read(&_lock); - return ti; + return tt; } static void load_module(const char *name) @@ -57,83 +51,59 @@ static void load_module(const char *name) struct target_type *dm_get_target_type(const char *name) { - struct tt_internal *ti = get_target_type(name); + struct target_type *tt = get_target_type(name); - if (!ti) { + if (!tt) { load_module(name); - ti = get_target_type(name); + tt = get_target_type(name); } - return ti ? &ti->tt : NULL; + return tt; } -void dm_put_target_type(struct target_type *t) +void dm_put_target_type(struct target_type *tt) { - struct tt_internal *ti = (struct tt_internal *) t; - down_read(&_lock); - module_put(ti->tt.module); + module_put(tt->module); up_read(&_lock); - - return; } -static struct tt_internal *alloc_target(struct target_type *t) -{ - struct tt_internal *ti = kzalloc(sizeof(*ti), GFP_KERNEL); - - if (ti) - ti->tt = *t; - - return ti; -} - - int dm_target_iterate(void (*iter_func)(struct target_type *tt, void *param), void *param) { - struct tt_internal *ti; + struct target_type *tt; down_read(&_lock); - list_for_each_entry (ti, &_targets, list) - iter_func(&ti->tt, param); + list_for_each_entry(tt, &_targets, list) + iter_func(tt, param); up_read(&_lock); return 0; } -int dm_register_target(struct target_type *t) +int dm_register_target(struct target_type *tt) { int rv = 0; - struct tt_internal *ti = alloc_target(t); - - if (!ti) - return -ENOMEM; down_write(&_lock); - if (__find_target_type(t->name)) + if (__find_target_type(tt->name)) rv = -EEXIST; else - list_add(&ti->list, &_targets); + list_add(&tt->list, &_targets); up_write(&_lock); - if (rv) - kfree(ti); return rv; } -void dm_unregister_target(struct target_type *t) +void dm_unregister_target(struct target_type *tt) { - struct tt_internal *ti; - down_write(&_lock); - if (!(ti = __find_target_type(t->name))) { - DMCRIT("Unregistering unrecognised target: %s", t->name); + if (!__find_target_type(tt->name)) { + DMCRIT("Unregistering unrecognised target: %s", tt->name); BUG(); } - list_del(&ti->list); - kfree(ti); + list_del(&tt->list); up_write(&_lock); } @@ -142,17 +112,17 @@ void dm_unregister_target(struct target_type *t) * io-err: always fails an io, useful for bringing * up LVs that have holes in them. */ -static int io_err_ctr(struct dm_target *ti, unsigned int argc, char **args) +static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args) { return 0; } -static void io_err_dtr(struct dm_target *ti) +static void io_err_dtr(struct dm_target *tt) { /* empty */ } -static int io_err_map(struct dm_target *ti, struct bio *bio, +static int io_err_map(struct dm_target *tt, struct bio *bio, union map_info *map_context) { return -EIO; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 20194e0..b48397c 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -60,7 +60,7 @@ int dm_table_barrier_ok(struct dm_table *t); int dm_target_init(void); void dm_target_exit(void); struct target_type *dm_get_target_type(const char *name); -void dm_put_target_type(struct target_type *t); +void dm_put_target_type(struct target_type *tt); int dm_target_iterate(void (*iter_func)(struct target_type *tt, void *param), void *param); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 8209e08..66ec05a 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -139,6 +139,9 @@ struct target_type { dm_ioctl_fn ioctl; dm_merge_fn merge; dm_busy_fn busy; + + /* For internal device-mapper use. */ + struct list_head list; }; struct io_restrictions { -- cgit v0.10.2 From b81d6cf79b57f6123a5d4f7a4932cc359995484d Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 2 Apr 2009 19:55:28 +0100 Subject: dm crypt: use kzfree Use kzfree() instead of memset() + kfree(). Signed-off-by: Johannes Weiner Reviewed-by: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index bfefd07..53394e8 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1156,8 +1156,7 @@ bad_ivmode: crypto_free_ablkcipher(tfm); bad_cipher: /* Must zero key material before freeing */ - memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); - kfree(cc); + kzfree(cc); return -EINVAL; } @@ -1183,8 +1182,7 @@ static void crypt_dtr(struct dm_target *ti) dm_put_device(ti, cc->dev); /* Must zero key material before freeing */ - memset(cc, 0, sizeof(*cc) + cc->key_size * sizeof(u8)); - kfree(cc); + kzfree(cc); } static int crypt_map(struct dm_target *ti, struct bio *bio, -- cgit v0.10.2 From 84e67c9319eb2232757a022c24f6a461291eaee5 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 2 Apr 2009 19:55:29 +0100 Subject: dm log: use standard kernel module refcount Avoid private module usage accounting by removing 'use' from dm_dirty_log_internal. The standard module reference counting is sufficient. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 737961f..094c8f0 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -20,7 +20,6 @@ struct dm_dirty_log_internal { struct dm_dirty_log_type *type; struct list_head list; - long use; }; static LIST_HEAD(_log_types); @@ -44,12 +43,8 @@ static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name) spin_lock(&_lock); log_type = __find_dirty_log_type(name); - if (log_type) { - if (!log_type->use && !try_module_get(log_type->type->module)) - log_type = NULL; - else - log_type->use++; - } + if (log_type && !try_module_get(log_type->type->module)) + log_type = NULL; spin_unlock(&_lock); @@ -120,10 +115,7 @@ static void put_type(struct dm_dirty_log_type *type) if (!log_type) goto out; - if (!--log_type->use) - module_put(type->module); - - BUG_ON(log_type->use < 0); + module_put(type->module); out: spin_unlock(&_lock); @@ -173,11 +165,6 @@ int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type) return -EINVAL; } - if (log_type->use) { - spin_unlock(&_lock); - return -ETXTBSY; - } - list_del(&log_type->list); spin_unlock(&_lock); -- cgit v0.10.2 From ec44ab9d6681ddf9026b593e866bec9c0e075e1d Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 2 Apr 2009 19:55:30 +0100 Subject: dm log: remove struct dm_dirty_log_internal Remove the 'dm_dirty_log_internal' structure. The resulting cleanup eliminates extra memory allocations. Therefore exposing the internal list_head to the external 'dm_dirty_log_type' structure is a worthwhile compromise. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 094c8f0..be233bc 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -16,34 +16,28 @@ #define DM_MSG_PREFIX "dirty region log" -struct dm_dirty_log_internal { - struct dm_dirty_log_type *type; - - struct list_head list; -}; - static LIST_HEAD(_log_types); static DEFINE_SPINLOCK(_lock); -static struct dm_dirty_log_internal *__find_dirty_log_type(const char *name) +static struct dm_dirty_log_type *__find_dirty_log_type(const char *name) { - struct dm_dirty_log_internal *log_type; + struct dm_dirty_log_type *log_type; list_for_each_entry(log_type, &_log_types, list) - if (!strcmp(name, log_type->type->name)) + if (!strcmp(name, log_type->name)) return log_type; return NULL; } -static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name) +static struct dm_dirty_log_type *_get_dirty_log_type(const char *name) { - struct dm_dirty_log_internal *log_type; + struct dm_dirty_log_type *log_type; spin_lock(&_lock); log_type = __find_dirty_log_type(name); - if (log_type && !try_module_get(log_type->type->module)) + if (log_type && !try_module_get(log_type->module)) log_type = NULL; spin_unlock(&_lock); @@ -71,14 +65,14 @@ static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name) static struct dm_dirty_log_type *get_type(const char *type_name) { char *p, *type_name_dup; - struct dm_dirty_log_internal *log_type; + struct dm_dirty_log_type *log_type; if (!type_name) return NULL; log_type = _get_dirty_log_type(type_name); if (log_type) - return log_type->type; + return log_type; type_name_dup = kstrdup(type_name, GFP_KERNEL); if (!type_name_dup) { @@ -100,19 +94,16 @@ static struct dm_dirty_log_type *get_type(const char *type_name) kfree(type_name_dup); - return log_type ? log_type->type : NULL; + return log_type; } static void put_type(struct dm_dirty_log_type *type) { - struct dm_dirty_log_internal *log_type; - if (!type) return; spin_lock(&_lock); - log_type = __find_dirty_log_type(type->name); - if (!log_type) + if (!__find_dirty_log_type(type->name)) goto out; module_put(type->module); @@ -121,32 +112,15 @@ out: spin_unlock(&_lock); } -static struct dm_dirty_log_internal *_alloc_dirty_log_type(struct dm_dirty_log_type *type) -{ - struct dm_dirty_log_internal *log_type = kzalloc(sizeof(*log_type), - GFP_KERNEL); - - if (log_type) - log_type->type = type; - - return log_type; -} - int dm_dirty_log_type_register(struct dm_dirty_log_type *type) { - struct dm_dirty_log_internal *log_type = _alloc_dirty_log_type(type); int r = 0; - if (!log_type) - return -ENOMEM; - spin_lock(&_lock); if (!__find_dirty_log_type(type->name)) - list_add(&log_type->list, &_log_types); - else { - kfree(log_type); + list_add(&type->list, &_log_types); + else r = -EEXIST; - } spin_unlock(&_lock); return r; @@ -155,20 +129,16 @@ EXPORT_SYMBOL(dm_dirty_log_type_register); int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type) { - struct dm_dirty_log_internal *log_type; - spin_lock(&_lock); - log_type = __find_dirty_log_type(type->name); - if (!log_type) { + if (!__find_dirty_log_type(type->name)) { spin_unlock(&_lock); return -EINVAL; } - list_del(&log_type->list); + list_del(&type->list); spin_unlock(&_lock); - kfree(log_type); return 0; } diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h index 600c5fb..727602b 100644 --- a/include/linux/dm-dirty-log.h +++ b/include/linux/dm-dirty-log.h @@ -28,6 +28,9 @@ struct dm_dirty_log_type { const char *name; struct module *module; + /* For internal device-mapper use */ + struct list_head list; + int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, unsigned argc, char **argv); void (*dtr)(struct dm_dirty_log *log); -- cgit v0.10.2 From b2a114652940ccf7e9668ad447ca78bf16a31139 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 2 Apr 2009 19:55:30 +0100 Subject: dm exception store: separate type from instance Introduce struct dm_exception_store_type. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index bb9f33d..aed1f11 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -37,11 +37,15 @@ struct dm_snap_exception { * Abstraction to handle the meta/layout of exception stores (the * COW device). */ -struct dm_exception_store { +struct dm_exception_store; +struct dm_exception_store_type { + int (*ctr) (struct dm_exception_store *store, + unsigned argc, char **argv); + /* * Destroys this object when you've finished with it. */ - void (*destroy) (struct dm_exception_store *store); + void (*dtr) (struct dm_exception_store *store); /* * The target shouldn't read the COW device until this is @@ -81,8 +85,13 @@ struct dm_exception_store { void (*fraction_full) (struct dm_exception_store *store, sector_t *numerator, sector_t *denominator); +}; + +struct dm_exception_store { + struct dm_exception_store_type type; struct dm_snapshot *snap; + void *context; }; diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 936b34e..0bbbe3b 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -683,12 +683,13 @@ int dm_create_persistent(struct dm_exception_store *store) return -ENOMEM; } - store->destroy = persistent_destroy; - store->read_metadata = persistent_read_metadata; - store->prepare_exception = persistent_prepare_exception; - store->commit_exception = persistent_commit_exception; - store->drop_snapshot = persistent_drop_snapshot; - store->fraction_full = persistent_fraction_full; + store->type.dtr = persistent_destroy; + store->type.read_metadata = persistent_read_metadata; + store->type.prepare_exception = persistent_prepare_exception; + store->type.commit_exception = persistent_commit_exception; + store->type.drop_snapshot = persistent_drop_snapshot; + store->type.fraction_full = persistent_fraction_full; + store->context = ps; return 0; diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index 7f6e2e6..b558176 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -39,7 +39,7 @@ static int transient_read_metadata(struct dm_exception_store *store, static int transient_prepare_exception(struct dm_exception_store *store, struct dm_snap_exception *e) { - struct transient_c *tc = (struct transient_c *) store->context; + struct transient_c *tc = store->context; sector_t size = get_dev_size(store->snap->cow->bdev); if (size < (tc->next_free + store->snap->chunk_size)) @@ -71,12 +71,12 @@ int dm_create_transient(struct dm_exception_store *store) { struct transient_c *tc; - store->destroy = transient_destroy; - store->read_metadata = transient_read_metadata; - store->prepare_exception = transient_prepare_exception; - store->commit_exception = transient_commit_exception; - store->drop_snapshot = NULL; - store->fraction_full = transient_fraction_full; + store->type.dtr = transient_destroy; + store->type.read_metadata = transient_read_metadata; + store->type.prepare_exception = transient_prepare_exception; + store->type.commit_exception = transient_commit_exception; + store->type.drop_snapshot = NULL; + store->type.fraction_full = transient_fraction_full; tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); if (!tc) diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 462750c..dabd58e 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -665,7 +665,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) spin_lock_init(&s->tracked_chunk_lock); /* Metadata must only be loaded into one table at once */ - r = s->store.read_metadata(&s->store, dm_add_exception, (void *)s); + r = s->store.type.read_metadata(&s->store, dm_add_exception, (void *)s); if (r < 0) { ti->error = "Failed to read snapshot metadata"; goto bad_load_and_register; @@ -700,7 +700,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) dm_kcopyd_client_destroy(s->kcopyd_client); bad5: - s->store.destroy(&s->store); + s->store.type.dtr(&s->store); bad4: exit_exception_table(&s->pending, pending_cache); @@ -725,7 +725,7 @@ static void __free_exceptions(struct dm_snapshot *s) exit_exception_table(&s->pending, pending_cache); exit_exception_table(&s->complete, exception_cache); - s->store.destroy(&s->store); + s->store.type.dtr(&s->store); } static void snapshot_dtr(struct dm_target *ti) @@ -820,8 +820,8 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err) else if (err == -ENOMEM) DMERR("Invalidating snapshot: Unable to allocate exception."); - if (s->store.drop_snapshot) - s->store.drop_snapshot(&s->store); + if (s->store.type.drop_snapshot) + s->store.type.drop_snapshot(&s->store); s->valid = 0; @@ -943,8 +943,8 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) else /* Update the metadata if we are persistent */ - s->store.commit_exception(&s->store, &pe->e, commit_callback, - pe); + s->store.type.commit_exception(&s->store, &pe->e, + commit_callback, pe); } /* @@ -1010,7 +1010,7 @@ __find_pending_exception(struct dm_snapshot *s, atomic_set(&pe->ref_count, 0); pe->started = 0; - if (s->store.prepare_exception(&s->store, &pe->e)) { + if (s->store.type.prepare_exception(&s->store, &pe->e)) { free_pending_exception(pe); return NULL; } @@ -1149,9 +1149,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, if (!snap->valid) snprintf(result, maxlen, "Invalid"); else { - if (snap->store.fraction_full) { + if (snap->store.type.fraction_full) { sector_t numerator, denominator; - snap->store.fraction_full(&snap->store, + snap->store.type.fraction_full(&snap->store, &numerator, &denominator); snprintf(result, maxlen, "%llu/%llu", -- cgit v0.10.2 From 7513c2a761d69d2a93f17146b3563527d3618ba0 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 2 Apr 2009 19:55:30 +0100 Subject: dm raid1: add is_remote_recovering hook for clusters The logging API needs an extra function to make cluster mirroring possible. This new function allows us to check whether a mirror region is being recovered on another machine in the cluster. This helps us prevent simultaneous recovery I/O and process I/O to the same locations on disk. Cluster-aware log modules will implement this function. Single machine log modules will not. So, there is no performance penalty for single machine mirrors. Signed-off-by: Jonathan Brassow Acked-by: Heinz Mauelshagen Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 62d5948..536ef0b 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -588,6 +588,9 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) int state; struct bio *bio; struct bio_list sync, nosync, recover, *this_list = NULL; + struct bio_list requeue; + struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); + region_t region; if (!writes->head) return; @@ -598,10 +601,18 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) bio_list_init(&sync); bio_list_init(&nosync); bio_list_init(&recover); + bio_list_init(&requeue); while ((bio = bio_list_pop(writes))) { - state = dm_rh_get_state(ms->rh, - dm_rh_bio_to_region(ms->rh, bio), 1); + region = dm_rh_bio_to_region(ms->rh, bio); + + if (log->type->is_remote_recovering && + log->type->is_remote_recovering(log, region)) { + bio_list_add(&requeue, bio); + continue; + } + + state = dm_rh_get_state(ms->rh, region, 1); switch (state) { case DM_RH_CLEAN: case DM_RH_DIRTY: @@ -621,6 +632,16 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) } /* + * Add bios that are delayed due to remote recovery + * back on to the write queue + */ + if (unlikely(requeue.head)) { + spin_lock_irq(&ms->lock); + bio_list_merge(&ms->writes, &requeue); + spin_unlock_irq(&ms->lock); + } + + /* * Increment the pending counts for any regions that will * be written to (writes to recover regions are going to * be delayed). diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h index 727602b..5e8b11d 100644 --- a/include/linux/dm-dirty-log.h +++ b/include/linux/dm-dirty-log.h @@ -116,6 +116,16 @@ struct dm_dirty_log_type { */ int (*status)(struct dm_dirty_log *log, status_type_t status_type, char *result, unsigned maxlen); + + /* + * is_remote_recovering is necessary for cluster mirroring. It provides + * a way to detect recovery on another node, so we aren't writing + * concurrently. This function is likely to block (when a cluster log + * is used). + * + * Returns: 0, 1 + */ + int (*is_remote_recovering)(struct dm_dirty_log *log, region_t region); }; int dm_dirty_log_type_register(struct dm_dirty_log_type *type); -- cgit v0.10.2 From 493df71c6420b211a68ae82b889c1e8a5fe701be Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 2 Apr 2009 19:55:31 +0100 Subject: dm exception store: introduce registry Move exception stores into a registry. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index dccbfb0..8912a36 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -14,6 +14,168 @@ #define DM_MSG_PREFIX "snapshot exception stores" +static LIST_HEAD(_exception_store_types); +static DEFINE_SPINLOCK(_lock); + +static struct dm_exception_store_type *__find_exception_store_type(const char *name) +{ + struct dm_exception_store_type *type; + + list_for_each_entry(type, &_exception_store_types, list) + if (!strcmp(name, type->name)) + return type; + + return NULL; +} + +static struct dm_exception_store_type *_get_exception_store_type(const char *name) +{ + struct dm_exception_store_type *type; + + spin_lock(&_lock); + + type = __find_exception_store_type(name); + + if (type && !try_module_get(type->module)) + type = NULL; + + spin_unlock(&_lock); + + return type; +} + +/* + * get_type + * @type_name + * + * Attempt to retrieve the dm_exception_store_type by name. If not already + * available, attempt to load the appropriate module. + * + * Exstore modules are named "dm-exstore-" followed by the 'type_name'. + * Modules may contain multiple types. + * This function will first try the module "dm-exstore-", + * then truncate 'type_name' on the last '-' and try again. + * + * For example, if type_name was "clustered-shared", it would search + * 'dm-exstore-clustered-shared' then 'dm-exstore-clustered'. + * + * 'dm-exception-store-' is too long of a name in my + * opinion, which is why I've chosen to have the files + * containing exception store implementations be 'dm-exstore-'. + * If you want your module to be autoloaded, you will follow this + * naming convention. + * + * Returns: dm_exception_store_type* on success, NULL on failure + */ +static struct dm_exception_store_type *get_type(const char *type_name) +{ + char *p, *type_name_dup; + struct dm_exception_store_type *type; + + type = _get_exception_store_type(type_name); + if (type) + return type; + + type_name_dup = kstrdup(type_name, GFP_KERNEL); + if (!type_name_dup) { + DMERR("No memory left to attempt load for \"%s\"", type_name); + return NULL; + } + + while (request_module("dm-exstore-%s", type_name_dup) || + !(type = _get_exception_store_type(type_name))) { + p = strrchr(type_name_dup, '-'); + if (!p) + break; + p[0] = '\0'; + } + + if (!type) + DMWARN("Module for exstore type \"%s\" not found.", type_name); + + kfree(type_name_dup); + + return type; +} + +static void put_type(struct dm_exception_store_type *type) +{ + spin_lock(&_lock); + module_put(type->module); + spin_unlock(&_lock); +} + +int dm_exception_store_type_register(struct dm_exception_store_type *type) +{ + int r = 0; + + spin_lock(&_lock); + if (!__find_exception_store_type(type->name)) + list_add(&type->list, &_exception_store_types); + else + r = -EEXIST; + spin_unlock(&_lock); + + return r; +} +EXPORT_SYMBOL(dm_exception_store_type_register); + +int dm_exception_store_type_unregister(struct dm_exception_store_type *type) +{ + spin_lock(&_lock); + + if (!__find_exception_store_type(type->name)) { + spin_unlock(&_lock); + return -EINVAL; + } + + list_del(&type->list); + + spin_unlock(&_lock); + + return 0; +} +EXPORT_SYMBOL(dm_exception_store_type_unregister); + +int dm_exception_store_create(const char *type_name, + struct dm_exception_store **store) +{ + int r = 0; + struct dm_exception_store_type *type; + struct dm_exception_store *tmp_store; + + tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL); + if (!tmp_store) + return -ENOMEM; + + type = get_type(type_name); + if (!type) { + kfree(tmp_store); + return -EINVAL; + } + + tmp_store->type = type; + + r = type->ctr(tmp_store, 0, NULL); + if (r) { + put_type(type); + kfree(tmp_store); + return r; + } + + *store = tmp_store; + return 0; +} +EXPORT_SYMBOL(dm_exception_store_create); + +void dm_exception_store_destroy(struct dm_exception_store *store) +{ + store->type->dtr(store); + put_type(store->type); + kfree(store); +} +EXPORT_SYMBOL(dm_exception_store_destroy); + int dm_exception_store_init(void) { int r; diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index aed1f11..3137715 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -39,6 +39,9 @@ struct dm_snap_exception { */ struct dm_exception_store; struct dm_exception_store_type { + const char *name; + struct module *module; + int (*ctr) (struct dm_exception_store *store, unsigned argc, char **argv); @@ -85,10 +88,13 @@ struct dm_exception_store_type { void (*fraction_full) (struct dm_exception_store *store, sector_t *numerator, sector_t *denominator); + + /* For internal device-mapper use only. */ + struct list_head list; }; struct dm_exception_store { - struct dm_exception_store_type type; + struct dm_exception_store_type *type; struct dm_snapshot *snap; @@ -138,6 +144,13 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) # endif +int dm_exception_store_type_register(struct dm_exception_store_type *type); +int dm_exception_store_type_unregister(struct dm_exception_store_type *type); + +int dm_exception_store_create(const char *type_name, + struct dm_exception_store **store); +void dm_exception_store_destroy(struct dm_exception_store *store); + int dm_exception_store_init(void); void dm_exception_store_exit(void); @@ -150,8 +163,4 @@ void dm_persistent_snapshot_exit(void); int dm_transient_snapshot_init(void); void dm_transient_snapshot_exit(void); -int dm_create_persistent(struct dm_exception_store *store); - -int dm_create_transient(struct dm_exception_store *store); - #endif /* _LINUX_DM_EXCEPTION_STORE */ diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 0bbbe3b..e85b7a1 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -478,7 +478,7 @@ static void persistent_fraction_full(struct dm_exception_store *store, *denominator = get_dev_size(store->snap->cow->bdev); } -static void persistent_destroy(struct dm_exception_store *store) +static void persistent_dtr(struct dm_exception_store *store) { struct pstore *ps = get_info(store); @@ -656,7 +656,8 @@ static void persistent_drop_snapshot(struct dm_exception_store *store) DMWARN("write header failed"); } -int dm_create_persistent(struct dm_exception_store *store) +static int persistent_ctr(struct dm_exception_store *store, + unsigned argc, char **argv) { struct pstore *ps; @@ -683,23 +684,69 @@ int dm_create_persistent(struct dm_exception_store *store) return -ENOMEM; } - store->type.dtr = persistent_destroy; - store->type.read_metadata = persistent_read_metadata; - store->type.prepare_exception = persistent_prepare_exception; - store->type.commit_exception = persistent_commit_exception; - store->type.drop_snapshot = persistent_drop_snapshot; - store->type.fraction_full = persistent_fraction_full; - store->context = ps; return 0; } +static int persistent_status(struct dm_exception_store *store, + status_type_t status, char *result, + unsigned int maxlen) +{ + int sz = 0; + + return sz; +} + +static struct dm_exception_store_type _persistent_type = { + .name = "persistent", + .module = THIS_MODULE, + .ctr = persistent_ctr, + .dtr = persistent_dtr, + .read_metadata = persistent_read_metadata, + .prepare_exception = persistent_prepare_exception, + .commit_exception = persistent_commit_exception, + .drop_snapshot = persistent_drop_snapshot, + .fraction_full = persistent_fraction_full, + .status = persistent_status, +}; + +static struct dm_exception_store_type _persistent_compat_type = { + .name = "P", + .module = THIS_MODULE, + .ctr = persistent_ctr, + .dtr = persistent_dtr, + .read_metadata = persistent_read_metadata, + .prepare_exception = persistent_prepare_exception, + .commit_exception = persistent_commit_exception, + .drop_snapshot = persistent_drop_snapshot, + .fraction_full = persistent_fraction_full, + .status = persistent_status, +}; + int dm_persistent_snapshot_init(void) { - return 0; + int r; + + r = dm_exception_store_type_register(&_persistent_type); + if (r) { + DMERR("Unable to register persistent exception store type"); + return r; + } + + r = dm_exception_store_type_register(&_persistent_compat_type); + if (r) { + DMERR("Unable to register old-style persistent exception " + "store type"); + dm_exception_store_type_unregister(&_persistent_type); + return r; + } + + return r; } void dm_persistent_snapshot_exit(void) { + dm_exception_store_type_unregister(&_persistent_type); + dm_exception_store_type_unregister(&_persistent_compat_type); } diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index b558176..51bc4a7 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -23,7 +23,7 @@ struct transient_c { sector_t next_free; }; -static void transient_destroy(struct dm_exception_store *store) +static void transient_dtr(struct dm_exception_store *store) { kfree(store->context); } @@ -67,17 +67,11 @@ static void transient_fraction_full(struct dm_exception_store *store, *denominator = get_dev_size(store->snap->cow->bdev); } -int dm_create_transient(struct dm_exception_store *store) +static int transient_ctr(struct dm_exception_store *store, + unsigned argc, char **argv) { struct transient_c *tc; - store->type.dtr = transient_destroy; - store->type.read_metadata = transient_read_metadata; - store->type.prepare_exception = transient_prepare_exception; - store->type.commit_exception = transient_commit_exception; - store->type.drop_snapshot = NULL; - store->type.fraction_full = transient_fraction_full; - tc = kmalloc(sizeof(struct transient_c), GFP_KERNEL); if (!tc) return -ENOMEM; @@ -88,11 +82,62 @@ int dm_create_transient(struct dm_exception_store *store) return 0; } +static int transient_status(struct dm_exception_store *store, + status_type_t status, char *result, + unsigned maxlen) +{ + int sz = 0; + + return sz; +} + +static struct dm_exception_store_type _transient_type = { + .name = "transient", + .module = THIS_MODULE, + .ctr = transient_ctr, + .dtr = transient_dtr, + .read_metadata = transient_read_metadata, + .prepare_exception = transient_prepare_exception, + .commit_exception = transient_commit_exception, + .fraction_full = transient_fraction_full, + .status = transient_status, +}; + +static struct dm_exception_store_type _transient_compat_type = { + .name = "N", + .module = THIS_MODULE, + .ctr = transient_ctr, + .dtr = transient_dtr, + .read_metadata = transient_read_metadata, + .prepare_exception = transient_prepare_exception, + .commit_exception = transient_commit_exception, + .fraction_full = transient_fraction_full, + .status = transient_status, +}; + int dm_transient_snapshot_init(void) { - return 0; + int r; + + r = dm_exception_store_type_register(&_transient_type); + if (r) { + DMWARN("Unable to register transient exception store type"); + return r; + } + + r = dm_exception_store_type_register(&_transient_compat_type); + if (r) { + DMWARN("Unable to register old-style transient " + "exception store type"); + dm_exception_store_type_unregister(&_transient_type); + return r; + } + + return r; } void dm_transient_snapshot_exit(void) { + dm_exception_store_type_unregister(&_transient_type); + dm_exception_store_type_unregister(&_transient_compat_type); } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index dabd58e..be698f3 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -610,8 +610,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto bad3; - s->type = persistent; - s->valid = 1; s->active = 0; atomic_set(&s->pending_exceptions_count, 0); @@ -626,19 +624,15 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad3; } - s->store.snap = s; - - if (persistent == 'P') - r = dm_create_persistent(&s->store); - else - r = dm_create_transient(&s->store); - + r = dm_exception_store_create(argv[2], &s->store); if (r) { ti->error = "Couldn't create exception store"; r = -EINVAL; goto bad4; } + s->store->snap = s; + r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); if (r) { ti->error = "Could not create kcopyd client"; @@ -665,7 +659,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) spin_lock_init(&s->tracked_chunk_lock); /* Metadata must only be loaded into one table at once */ - r = s->store.type.read_metadata(&s->store, dm_add_exception, (void *)s); + r = s->store->type->read_metadata(s->store, dm_add_exception, + (void *)s); if (r < 0) { ti->error = "Failed to read snapshot metadata"; goto bad_load_and_register; @@ -700,7 +695,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) dm_kcopyd_client_destroy(s->kcopyd_client); bad5: - s->store.type.dtr(&s->store); + s->store->type->dtr(s->store); bad4: exit_exception_table(&s->pending, pending_cache); @@ -725,7 +720,7 @@ static void __free_exceptions(struct dm_snapshot *s) exit_exception_table(&s->pending, pending_cache); exit_exception_table(&s->complete, exception_cache); - s->store.type.dtr(&s->store); + s->store->type->dtr(s->store); } static void snapshot_dtr(struct dm_target *ti) @@ -820,8 +815,8 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err) else if (err == -ENOMEM) DMERR("Invalidating snapshot: Unable to allocate exception."); - if (s->store.type.drop_snapshot) - s->store.type.drop_snapshot(&s->store); + if (s->store->type->drop_snapshot) + s->store->type->drop_snapshot(s->store); s->valid = 0; @@ -943,8 +938,8 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) else /* Update the metadata if we are persistent */ - s->store.type.commit_exception(&s->store, &pe->e, - commit_callback, pe); + s->store->type->commit_exception(s->store, &pe->e, + commit_callback, pe); } /* @@ -1010,7 +1005,7 @@ __find_pending_exception(struct dm_snapshot *s, atomic_set(&pe->ref_count, 0); pe->started = 0; - if (s->store.type.prepare_exception(&s->store, &pe->e)) { + if (s->store->type->prepare_exception(s->store, &pe->e)) { free_pending_exception(pe); return NULL; } @@ -1149,11 +1144,11 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, if (!snap->valid) snprintf(result, maxlen, "Invalid"); else { - if (snap->store.type.fraction_full) { + if (snap->store->type->fraction_full) { sector_t numerator, denominator; - snap->store.type.fraction_full(&snap->store, - &numerator, - &denominator); + snap->store->type->fraction_full(snap->store, + &numerator, + &denominator); snprintf(result, maxlen, "%llu/%llu", (unsigned long long)numerator, (unsigned long long)denominator); @@ -1169,9 +1164,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, * to make private copies if the output is to * make sense. */ - snprintf(result, maxlen, "%s %s %c %llu", + snprintf(result, maxlen, "%s %s %s %llu", snap->origin->name, snap->cow->name, - snap->type, + snap->store->type->name, (unsigned long long)snap->chunk_size); break; } diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index d9e62b4..627be0f 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -61,7 +61,7 @@ struct dm_snapshot { spinlock_t pe_lock; /* The on disk metadata handler */ - struct dm_exception_store store; + struct dm_exception_store *store; struct dm_kcopyd_client *kcopyd_client; -- cgit v0.10.2 From 0cea9c78270cdf1d2ad74ce0e083d5555a0842e8 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 2 Apr 2009 19:55:32 +0100 Subject: dm exception store: move dm_target pointer Move target pointer from snapshot to exception store. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 8912a36..fe0cfa6 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -137,7 +137,7 @@ int dm_exception_store_type_unregister(struct dm_exception_store_type *type) } EXPORT_SYMBOL(dm_exception_store_type_unregister); -int dm_exception_store_create(const char *type_name, +int dm_exception_store_create(const char *type_name, struct dm_target *ti, struct dm_exception_store **store) { int r = 0; @@ -155,6 +155,7 @@ int dm_exception_store_create(const char *type_name, } tmp_store->type = type; + tmp_store->ti = ti; r = type->ctr(tmp_store, 0, NULL); if (r) { diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 3137715..4b7f7d4 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -95,6 +95,7 @@ struct dm_exception_store_type { struct dm_exception_store { struct dm_exception_store_type *type; + struct dm_target *ti; struct dm_snapshot *snap; @@ -147,7 +148,7 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) int dm_exception_store_type_register(struct dm_exception_store_type *type); int dm_exception_store_type_unregister(struct dm_exception_store_type *type); -int dm_exception_store_create(const char *type_name, +int dm_exception_store_create(const char *type_name, struct dm_target *ti, struct dm_exception_store **store); void dm_exception_store_destroy(struct dm_exception_store *store); diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index be698f3..4429c2a 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -615,7 +615,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) atomic_set(&s->pending_exceptions_count, 0); init_rwsem(&s->lock); spin_lock_init(&s->pe_lock); - s->ti = ti; /* Allocate hash table for COW data */ if (init_hash_tables(s)) { @@ -624,7 +623,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad3; } - r = dm_exception_store_create(argv[2], &s->store); + r = dm_exception_store_create(argv[2], ti, &s->store); if (r) { ti->error = "Couldn't create exception store"; r = -EINVAL; @@ -820,7 +819,7 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err) s->valid = 0; - dm_table_event(s->ti->table); + dm_table_event(s->store->ti->table); } static void get_pending_exception(struct dm_snap_pending_exception *pe) @@ -1196,7 +1195,7 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) goto next_snapshot; /* Nothing to do if writing beyond end of snapshot */ - if (bio->bi_sector >= dm_table_get_size(snap->ti->table)) + if (bio->bi_sector >= dm_table_get_size(snap->store->ti->table)) goto next_snapshot; /* diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 627be0f..93cd8ee 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -25,7 +25,6 @@ struct exception_table { struct dm_snapshot { struct rw_semaphore lock; - struct dm_target *ti; struct dm_dev *origin; struct dm_dev *cow; -- cgit v0.10.2 From d0216849519bec8dc96301a3cd80316e71243839 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 2 Apr 2009 19:55:32 +0100 Subject: dm exception store: move chunk_fields Move chunk fields from snapshot to exception store. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index fe0cfa6..59c949b 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -138,6 +138,8 @@ int dm_exception_store_type_unregister(struct dm_exception_store_type *type) EXPORT_SYMBOL(dm_exception_store_type_unregister); int dm_exception_store_create(const char *type_name, struct dm_target *ti, + chunk_t chunk_size, chunk_t chunk_mask, + chunk_t chunk_shift, struct dm_exception_store **store) { int r = 0; @@ -157,6 +159,10 @@ int dm_exception_store_create(const char *type_name, struct dm_target *ti, tmp_store->type = type; tmp_store->ti = ti; + tmp_store->chunk_size = chunk_size; + tmp_store->chunk_mask = chunk_mask; + tmp_store->chunk_shift = chunk_shift; + r = type->ctr(tmp_store, 0, NULL); if (r) { put_type(type); diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 4b7f7d4..449a1e4 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -99,6 +99,11 @@ struct dm_exception_store { struct dm_snapshot *snap; + /* Size of data blocks saved - must be a power of 2 */ + chunk_t chunk_size; + chunk_t chunk_mask; + chunk_t chunk_shift; + void *context; }; @@ -149,6 +154,8 @@ int dm_exception_store_type_register(struct dm_exception_store_type *type); int dm_exception_store_type_unregister(struct dm_exception_store_type *type); int dm_exception_store_create(const char *type_name, struct dm_target *ti, + chunk_t chunk_size, chunk_t chunk_mask, + chunk_t chunk_shift, struct dm_exception_store **store); void dm_exception_store_destroy(struct dm_exception_store *store); diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index e85b7a1..c3c5815 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -141,7 +141,7 @@ static int alloc_area(struct pstore *ps) int r = -ENOMEM; size_t len; - len = ps->snap->chunk_size << SECTOR_SHIFT; + len = ps->snap->store->chunk_size << SECTOR_SHIFT; /* * Allocate the chunk_size block of memory that will hold @@ -190,8 +190,8 @@ static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) { struct dm_io_region where = { .bdev = ps->snap->cow->bdev, - .sector = ps->snap->chunk_size * chunk, - .count = ps->snap->chunk_size, + .sector = ps->snap->store->chunk_size * chunk, + .count = ps->snap->store->chunk_size, }; struct dm_io_request io_req = { .bi_rw = rw, @@ -247,15 +247,15 @@ static int area_io(struct pstore *ps, int rw) static void zero_memory_area(struct pstore *ps) { - memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); + memset(ps->area, 0, ps->snap->store->chunk_size << SECTOR_SHIFT); } static int zero_disk_area(struct pstore *ps, chunk_t area) { struct dm_io_region where = { .bdev = ps->snap->cow->bdev, - .sector = ps->snap->chunk_size * area_location(ps, area), - .count = ps->snap->chunk_size, + .sector = ps->snap->store->chunk_size * area_location(ps, area), + .count = ps->snap->store->chunk_size, }; struct dm_io_request io_req = { .bi_rw = WRITE, @@ -278,16 +278,17 @@ static int read_header(struct pstore *ps, int *new_snapshot) /* * Use default chunk size (or hardsect_size, if larger) if none supplied */ - if (!ps->snap->chunk_size) { - ps->snap->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, + if (!ps->snap->store->chunk_size) { + ps->snap->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, bdev_hardsect_size(ps->snap->cow->bdev) >> 9); - ps->snap->chunk_mask = ps->snap->chunk_size - 1; - ps->snap->chunk_shift = ffs(ps->snap->chunk_size) - 1; + ps->snap->store->chunk_mask = ps->snap->store->chunk_size - 1; + ps->snap->store->chunk_shift = ffs(ps->snap->store->chunk_size) + - 1; chunk_size_supplied = 0; } ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> - chunk_size)); + store->chunk_size)); if (IS_ERR(ps->io_client)) return PTR_ERR(ps->io_client); @@ -317,22 +318,22 @@ static int read_header(struct pstore *ps, int *new_snapshot) ps->version = le32_to_cpu(dh->version); chunk_size = le32_to_cpu(dh->chunk_size); - if (!chunk_size_supplied || ps->snap->chunk_size == chunk_size) + if (!chunk_size_supplied || ps->snap->store->chunk_size == chunk_size) return 0; DMWARN("chunk size %llu in device metadata overrides " "table chunk size of %llu.", (unsigned long long)chunk_size, - (unsigned long long)ps->snap->chunk_size); + (unsigned long long)ps->snap->store->chunk_size); /* We had a bogus chunk_size. Fix stuff up. */ free_area(ps); - ps->snap->chunk_size = chunk_size; - ps->snap->chunk_mask = chunk_size - 1; - ps->snap->chunk_shift = ffs(chunk_size) - 1; + ps->snap->store->chunk_size = chunk_size; + ps->snap->store->chunk_mask = chunk_size - 1; + ps->snap->store->chunk_shift = ffs(chunk_size) - 1; - r = dm_io_client_resize(sectors_to_pages(ps->snap->chunk_size), + r = dm_io_client_resize(sectors_to_pages(ps->snap->store->chunk_size), ps->io_client); if (r) return r; @@ -349,13 +350,13 @@ static int write_header(struct pstore *ps) { struct disk_header *dh; - memset(ps->area, 0, ps->snap->chunk_size << SECTOR_SHIFT); + memset(ps->area, 0, ps->snap->store->chunk_size << SECTOR_SHIFT); dh = (struct disk_header *) ps->area; dh->magic = cpu_to_le32(SNAP_MAGIC); dh->valid = cpu_to_le32(ps->valid); dh->version = cpu_to_le32(ps->version); - dh->chunk_size = cpu_to_le32(ps->snap->chunk_size); + dh->chunk_size = cpu_to_le32(ps->snap->store->chunk_size); return chunk_io(ps, 0, WRITE, 1); } @@ -474,7 +475,7 @@ static struct pstore *get_info(struct dm_exception_store *store) static void persistent_fraction_full(struct dm_exception_store *store, sector_t *numerator, sector_t *denominator) { - *numerator = get_info(store)->next_free * store->snap->chunk_size; + *numerator = get_info(store)->next_free * store->chunk_size; *denominator = get_dev_size(store->snap->cow->bdev); } @@ -507,8 +508,8 @@ static int persistent_read_metadata(struct dm_exception_store *store, /* * Now we know correct chunk_size, complete the initialisation. */ - ps->exceptions_per_area = (ps->snap->chunk_size << SECTOR_SHIFT) / - sizeof(struct disk_exception); + ps->exceptions_per_area = (ps->snap->store->chunk_size << SECTOR_SHIFT) + / sizeof(struct disk_exception); ps->callbacks = dm_vcalloc(ps->exceptions_per_area, sizeof(*ps->callbacks)); if (!ps->callbacks) @@ -567,7 +568,7 @@ static int persistent_prepare_exception(struct dm_exception_store *store, sector_t size = get_dev_size(store->snap->cow->bdev); /* Is there enough room ? */ - if (size < ((ps->next_free + 1) * store->snap->chunk_size)) + if (size < ((ps->next_free + 1) * store->chunk_size)) return -ENOSPC; e->new_chunk = ps->next_free; diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index 51bc4a7..c542aba 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -42,11 +42,11 @@ static int transient_prepare_exception(struct dm_exception_store *store, struct transient_c *tc = store->context; sector_t size = get_dev_size(store->snap->cow->bdev); - if (size < (tc->next_free + store->snap->chunk_size)) + if (size < (tc->next_free + store->chunk_size)) return -1; e->new_chunk = sector_to_chunk(store->snap, tc->next_free); - tc->next_free += store->snap->chunk_size; + tc->next_free += store->chunk_size; return 0; } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 4429c2a..7a90fed 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -468,7 +468,7 @@ static int calc_max_buckets(void) /* * Allocate room for a suitable hash table. */ -static int init_hash_tables(struct dm_snapshot *s) +static int init_hash_tables(struct dm_snapshot *s, chunk_t chunk_shift) { sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; @@ -480,7 +480,7 @@ static int init_hash_tables(struct dm_snapshot *s) origin_dev_size = get_dev_size(s->origin->bdev); max_buckets = calc_max_buckets(); - hash_size = min(origin_dev_size, cow_dev_size) >> s->chunk_shift; + hash_size = min(origin_dev_size, cow_dev_size) >> chunk_shift; hash_size = min(hash_size, max_buckets); hash_size = rounddown_pow_of_two(hash_size); @@ -515,19 +515,20 @@ static ulong round_up(ulong n, ulong size) } static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg, - char **error) + chunk_t *chunk_size, chunk_t *chunk_mask, + chunk_t *chunk_shift, char **error) { - unsigned long chunk_size; + unsigned long chunk_size_ulong; char *value; - chunk_size = simple_strtoul(chunk_size_arg, &value, 10); + chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10); if (*chunk_size_arg == '\0' || *value != '\0') { *error = "Invalid chunk size"; return -EINVAL; } - if (!chunk_size) { - s->chunk_size = s->chunk_mask = s->chunk_shift = 0; + if (!chunk_size_ulong) { + *chunk_size = *chunk_mask = *chunk_shift = 0; return 0; } @@ -535,23 +536,23 @@ static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg, * Chunk size must be multiple of page size. Silently * round up if it's not. */ - chunk_size = round_up(chunk_size, PAGE_SIZE >> 9); + chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9); /* Check chunk_size is a power of 2 */ - if (!is_power_of_2(chunk_size)) { + if (!is_power_of_2(chunk_size_ulong)) { *error = "Chunk size is not a power of 2"; return -EINVAL; } /* Validate the chunk size against the device block size */ - if (chunk_size % (bdev_hardsect_size(s->cow->bdev) >> 9)) { + if (chunk_size_ulong % (bdev_hardsect_size(s->cow->bdev) >> 9)) { *error = "Chunk size is not a multiple of device blocksize"; return -EINVAL; } - s->chunk_size = chunk_size; - s->chunk_mask = chunk_size - 1; - s->chunk_shift = ffs(chunk_size) - 1; + *chunk_size = chunk_size_ulong; + *chunk_mask = chunk_size_ulong - 1; + *chunk_shift = ffs(chunk_size_ulong) - 1; return 0; } @@ -567,6 +568,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) char persistent; char *origin_path; char *cow_path; + chunk_t chunk_size, chunk_mask, chunk_shift; if (argc != 4) { ti->error = "requires exactly 4 arguments"; @@ -606,7 +608,8 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad2; } - r = set_chunk_size(s, argv[3], &ti->error); + r = set_chunk_size(s, argv[3], &chunk_size, &chunk_mask, &chunk_shift, + &ti->error); if (r) goto bad3; @@ -617,13 +620,14 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) spin_lock_init(&s->pe_lock); /* Allocate hash table for COW data */ - if (init_hash_tables(s)) { + if (init_hash_tables(s, chunk_shift)) { ti->error = "Unable to allocate hash table space"; r = -ENOMEM; goto bad3; } - r = dm_exception_store_create(argv[2], ti, &s->store); + r = dm_exception_store_create(argv[2], ti, chunk_size, chunk_mask, + chunk_shift, &s->store); if (r) { ti->error = "Couldn't create exception store"; r = -EINVAL; @@ -680,7 +684,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->private = s; - ti->split_io = s->chunk_size; + ti->split_io = s->store->chunk_size; return 0; @@ -955,7 +959,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) src.bdev = bdev; src.sector = chunk_to_sector(s, pe->e.old_chunk); - src.count = min(s->chunk_size, dev_size - src.sector); + src.count = min(s->store->chunk_size, dev_size - src.sector); dest.bdev = s->cow->bdev; dest.sector = chunk_to_sector(s, pe->e.new_chunk); @@ -1021,7 +1025,7 @@ static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e, bio->bi_bdev = s->cow->bdev; bio->bi_sector = chunk_to_sector(s, dm_chunk_number(e->new_chunk) + (chunk - e->old_chunk)) + - (bio->bi_sector & s->chunk_mask); + (bio->bi_sector & s->store->chunk_mask); } static int snapshot_map(struct dm_target *ti, struct bio *bio, @@ -1166,7 +1170,7 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, snprintf(result, maxlen, "%s %s %s %llu", snap->origin->name, snap->cow->name, snap->store->type->name, - (unsigned long long)snap->chunk_size); + (unsigned long long)snap->store->chunk_size); break; } @@ -1377,7 +1381,8 @@ static void origin_resume(struct dm_target *ti) o = __lookup_origin(dev->bdev); if (o) list_for_each_entry (snap, &o->snapshots, list) - chunk_size = min_not_zero(chunk_size, snap->chunk_size); + chunk_size = min_not_zero(chunk_size, + snap->store->chunk_size); up_read(&_origins_lock); ti->split_io = chunk_size; diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 93cd8ee..c2e4ebe 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -32,11 +32,6 @@ struct dm_snapshot { /* List of snapshots per Origin */ struct list_head list; - /* Size of data blocks saved - must be a power of 2 */ - chunk_t chunk_size; - chunk_t chunk_mask; - chunk_t chunk_shift; - /* You can't use a snapshot if this is 0 (e.g. if full) */ int valid; @@ -84,12 +79,12 @@ static inline sector_t get_dev_size(struct block_device *bdev) static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector) { - return (sector & ~s->chunk_mask) >> s->chunk_shift; + return (sector & ~s->store->chunk_mask) >> s->store->chunk_shift; } static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk) { - return chunk << s->chunk_shift; + return chunk << s->store->chunk_shift; } static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs) -- cgit v0.10.2 From 49beb2b87a972a994ff77633234ca3bf0d30a1d8 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 2 Apr 2009 19:55:33 +0100 Subject: dm exception store: move cow pointer Move COW device from snapshot to exception store. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 59c949b..2078b92 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -139,7 +139,7 @@ EXPORT_SYMBOL(dm_exception_store_type_unregister); int dm_exception_store_create(const char *type_name, struct dm_target *ti, chunk_t chunk_size, chunk_t chunk_mask, - chunk_t chunk_shift, + chunk_t chunk_shift, struct dm_dev *cow, struct dm_exception_store **store) { int r = 0; @@ -163,6 +163,8 @@ int dm_exception_store_create(const char *type_name, struct dm_target *ti, tmp_store->chunk_mask = chunk_mask; tmp_store->chunk_shift = chunk_shift; + tmp_store->cow = cow; + r = type->ctr(tmp_store, 0, NULL); if (r) { put_type(type); diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 449a1e4..4dbf357 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -99,6 +99,8 @@ struct dm_exception_store { struct dm_snapshot *snap; + struct dm_dev *cow; + /* Size of data blocks saved - must be a power of 2 */ chunk_t chunk_size; chunk_t chunk_mask; @@ -155,7 +157,7 @@ int dm_exception_store_type_unregister(struct dm_exception_store_type *type); int dm_exception_store_create(const char *type_name, struct dm_target *ti, chunk_t chunk_size, chunk_t chunk_mask, - chunk_t chunk_shift, + chunk_t chunk_shift, struct dm_dev *cow, struct dm_exception_store **store); void dm_exception_store_destroy(struct dm_exception_store *store); diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index c3c5815..505afac 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -189,7 +189,7 @@ static void do_metadata(struct work_struct *work) static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) { struct dm_io_region where = { - .bdev = ps->snap->cow->bdev, + .bdev = ps->snap->store->cow->bdev, .sector = ps->snap->store->chunk_size * chunk, .count = ps->snap->store->chunk_size, }; @@ -253,7 +253,7 @@ static void zero_memory_area(struct pstore *ps) static int zero_disk_area(struct pstore *ps, chunk_t area) { struct dm_io_region where = { - .bdev = ps->snap->cow->bdev, + .bdev = ps->snap->store->cow->bdev, .sector = ps->snap->store->chunk_size * area_location(ps, area), .count = ps->snap->store->chunk_size, }; @@ -280,7 +280,7 @@ static int read_header(struct pstore *ps, int *new_snapshot) */ if (!ps->snap->store->chunk_size) { ps->snap->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, - bdev_hardsect_size(ps->snap->cow->bdev) >> 9); + bdev_hardsect_size(ps->snap->store->cow->bdev) >> 9); ps->snap->store->chunk_mask = ps->snap->store->chunk_size - 1; ps->snap->store->chunk_shift = ffs(ps->snap->store->chunk_size) - 1; @@ -476,7 +476,7 @@ static void persistent_fraction_full(struct dm_exception_store *store, sector_t *numerator, sector_t *denominator) { *numerator = get_info(store)->next_free * store->chunk_size; - *denominator = get_dev_size(store->snap->cow->bdev); + *denominator = get_dev_size(store->cow->bdev); } static void persistent_dtr(struct dm_exception_store *store) @@ -565,7 +565,7 @@ static int persistent_prepare_exception(struct dm_exception_store *store, struct pstore *ps = get_info(store); uint32_t stride; chunk_t next_free; - sector_t size = get_dev_size(store->snap->cow->bdev); + sector_t size = get_dev_size(store->cow->bdev); /* Is there enough room ? */ if (size < ((ps->next_free + 1) * store->chunk_size)) diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index c542aba..77f58be 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -40,7 +40,7 @@ static int transient_prepare_exception(struct dm_exception_store *store, struct dm_snap_exception *e) { struct transient_c *tc = store->context; - sector_t size = get_dev_size(store->snap->cow->bdev); + sector_t size = get_dev_size(store->cow->bdev); if (size < (tc->next_free + store->chunk_size)) return -1; @@ -64,7 +64,7 @@ static void transient_fraction_full(struct dm_exception_store *store, sector_t *numerator, sector_t *denominator) { *numerator = ((struct transient_c *) store->context)->next_free; - *denominator = get_dev_size(store->snap->cow->bdev); + *denominator = get_dev_size(store->cow->bdev); } static int transient_ctr(struct dm_exception_store *store, diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 7a90fed..5c067ef 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -468,7 +468,8 @@ static int calc_max_buckets(void) /* * Allocate room for a suitable hash table. */ -static int init_hash_tables(struct dm_snapshot *s, chunk_t chunk_shift) +static int init_hash_tables(struct dm_snapshot *s, chunk_t chunk_shift, + struct dm_dev *cow) { sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; @@ -476,7 +477,7 @@ static int init_hash_tables(struct dm_snapshot *s, chunk_t chunk_shift) * Calculate based on the size of the original volume or * the COW volume... */ - cow_dev_size = get_dev_size(s->cow->bdev); + cow_dev_size = get_dev_size(cow->bdev); origin_dev_size = get_dev_size(s->origin->bdev); max_buckets = calc_max_buckets(); @@ -516,7 +517,8 @@ static ulong round_up(ulong n, ulong size) static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg, chunk_t *chunk_size, chunk_t *chunk_mask, - chunk_t *chunk_shift, char **error) + chunk_t *chunk_shift, struct dm_dev *cow, + char **error) { unsigned long chunk_size_ulong; char *value; @@ -545,7 +547,7 @@ static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg, } /* Validate the chunk size against the device block size */ - if (chunk_size_ulong % (bdev_hardsect_size(s->cow->bdev) >> 9)) { + if (chunk_size_ulong % (bdev_hardsect_size(cow->bdev) >> 9)) { *error = "Chunk size is not a multiple of device blocksize"; return -EINVAL; } @@ -569,6 +571,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) char *origin_path; char *cow_path; chunk_t chunk_size, chunk_mask, chunk_shift; + struct dm_dev *cow; if (argc != 4) { ti->error = "requires exactly 4 arguments"; @@ -601,7 +604,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) } r = dm_get_device(ti, cow_path, 0, 0, - FMODE_READ | FMODE_WRITE, &s->cow); + FMODE_READ | FMODE_WRITE, &cow); if (r) { dm_put_device(ti, s->origin); ti->error = "Cannot get COW device"; @@ -609,7 +612,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) } r = set_chunk_size(s, argv[3], &chunk_size, &chunk_mask, &chunk_shift, - &ti->error); + cow, &ti->error); if (r) goto bad3; @@ -620,14 +623,14 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) spin_lock_init(&s->pe_lock); /* Allocate hash table for COW data */ - if (init_hash_tables(s, chunk_shift)) { + if (init_hash_tables(s, chunk_shift, cow)) { ti->error = "Unable to allocate hash table space"; r = -ENOMEM; goto bad3; } r = dm_exception_store_create(argv[2], ti, chunk_size, chunk_mask, - chunk_shift, &s->store); + chunk_shift, cow, &s->store); if (r) { ti->error = "Couldn't create exception store"; r = -EINVAL; @@ -705,7 +708,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) exit_exception_table(&s->complete, exception_cache); bad3: - dm_put_device(ti, s->cow); + dm_put_device(ti, cow); dm_put_device(ti, s->origin); bad2: @@ -732,6 +735,7 @@ static void snapshot_dtr(struct dm_target *ti) int i; #endif struct dm_snapshot *s = ti->private; + struct dm_dev *cow = s->store->cow; flush_workqueue(ksnapd); @@ -759,7 +763,7 @@ static void snapshot_dtr(struct dm_target *ti) mempool_destroy(s->pending_pool); dm_put_device(ti, s->origin); - dm_put_device(ti, s->cow); + dm_put_device(ti, cow); kfree(s); } @@ -961,7 +965,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) src.sector = chunk_to_sector(s, pe->e.old_chunk); src.count = min(s->store->chunk_size, dev_size - src.sector); - dest.bdev = s->cow->bdev; + dest.bdev = s->store->cow->bdev; dest.sector = chunk_to_sector(s, pe->e.new_chunk); dest.count = src.count; @@ -1022,7 +1026,7 @@ __find_pending_exception(struct dm_snapshot *s, static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e, struct bio *bio, chunk_t chunk) { - bio->bi_bdev = s->cow->bdev; + bio->bi_bdev = s->store->cow->bdev; bio->bi_sector = chunk_to_sector(s, dm_chunk_number(e->new_chunk) + (chunk - e->old_chunk)) + (bio->bi_sector & s->store->chunk_mask); @@ -1168,7 +1172,7 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, * make sense. */ snprintf(result, maxlen, "%s %s %s %llu", - snap->origin->name, snap->cow->name, + snap->origin->name, snap->store->cow->name, snap->store->type->name, (unsigned long long)snap->store->chunk_size); break; diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index c2e4ebe..c8a486e 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -27,7 +27,6 @@ struct dm_snapshot { struct rw_semaphore lock; struct dm_dev *origin; - struct dm_dev *cow; /* List of snapshots per Origin */ struct list_head list; -- cgit v0.10.2 From 71fab00a6bef7fb53119271a8abdbaf40970d28a Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 2 Apr 2009 19:55:33 +0100 Subject: dm snapshot: remove dm_snap header use Move useful functions out of dm-snap.h and stop using dm-snap.h. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 4dbf357..835f402 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -97,8 +97,6 @@ struct dm_exception_store { struct dm_exception_store_type *type; struct dm_target *ti; - struct dm_snapshot *snap; - struct dm_dev *cow; /* Size of data blocks saved - must be a power of 2 */ @@ -152,6 +150,20 @@ static inline void dm_consecutive_chunk_count_inc(struct dm_snap_exception *e) # endif +/* + * Return the number of sectors in the device. + */ +static inline sector_t get_dev_size(struct block_device *bdev) +{ + return bdev->bd_inode->i_size >> SECTOR_SHIFT; +} + +static inline chunk_t sector_to_chunk(struct dm_exception_store *store, + sector_t sector) +{ + return (sector & ~store->chunk_mask) >> store->chunk_shift; +} + int dm_exception_store_type_register(struct dm_exception_store_type *type); int dm_exception_store_type_unregister(struct dm_exception_store_type *type); diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 505afac..3907c4c 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -6,7 +6,6 @@ */ #include "dm-exception-store.h" -#include "dm-snap.h" #include #include @@ -89,7 +88,7 @@ struct commit_callback { * The top level structure for a persistent exception store. */ struct pstore { - struct dm_snapshot *snap; /* up pointer to my snapshot */ + struct dm_exception_store *store; int version; int valid; uint32_t exceptions_per_area; @@ -141,7 +140,7 @@ static int alloc_area(struct pstore *ps) int r = -ENOMEM; size_t len; - len = ps->snap->store->chunk_size << SECTOR_SHIFT; + len = ps->store->chunk_size << SECTOR_SHIFT; /* * Allocate the chunk_size block of memory that will hold @@ -189,9 +188,9 @@ static void do_metadata(struct work_struct *work) static int chunk_io(struct pstore *ps, chunk_t chunk, int rw, int metadata) { struct dm_io_region where = { - .bdev = ps->snap->store->cow->bdev, - .sector = ps->snap->store->chunk_size * chunk, - .count = ps->snap->store->chunk_size, + .bdev = ps->store->cow->bdev, + .sector = ps->store->chunk_size * chunk, + .count = ps->store->chunk_size, }; struct dm_io_request io_req = { .bi_rw = rw, @@ -247,15 +246,15 @@ static int area_io(struct pstore *ps, int rw) static void zero_memory_area(struct pstore *ps) { - memset(ps->area, 0, ps->snap->store->chunk_size << SECTOR_SHIFT); + memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT); } static int zero_disk_area(struct pstore *ps, chunk_t area) { struct dm_io_region where = { - .bdev = ps->snap->store->cow->bdev, - .sector = ps->snap->store->chunk_size * area_location(ps, area), - .count = ps->snap->store->chunk_size, + .bdev = ps->store->cow->bdev, + .sector = ps->store->chunk_size * area_location(ps, area), + .count = ps->store->chunk_size, }; struct dm_io_request io_req = { .bi_rw = WRITE, @@ -278,17 +277,16 @@ static int read_header(struct pstore *ps, int *new_snapshot) /* * Use default chunk size (or hardsect_size, if larger) if none supplied */ - if (!ps->snap->store->chunk_size) { - ps->snap->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, - bdev_hardsect_size(ps->snap->store->cow->bdev) >> 9); - ps->snap->store->chunk_mask = ps->snap->store->chunk_size - 1; - ps->snap->store->chunk_shift = ffs(ps->snap->store->chunk_size) - - 1; + if (!ps->store->chunk_size) { + ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, + bdev_hardsect_size(ps->store->cow->bdev) >> 9); + ps->store->chunk_mask = ps->store->chunk_size - 1; + ps->store->chunk_shift = ffs(ps->store->chunk_size) - 1; chunk_size_supplied = 0; } - ps->io_client = dm_io_client_create(sectors_to_pages(ps->snap-> - store->chunk_size)); + ps->io_client = dm_io_client_create(sectors_to_pages(ps->store-> + chunk_size)); if (IS_ERR(ps->io_client)) return PTR_ERR(ps->io_client); @@ -318,22 +316,22 @@ static int read_header(struct pstore *ps, int *new_snapshot) ps->version = le32_to_cpu(dh->version); chunk_size = le32_to_cpu(dh->chunk_size); - if (!chunk_size_supplied || ps->snap->store->chunk_size == chunk_size) + if (!chunk_size_supplied || ps->store->chunk_size == chunk_size) return 0; DMWARN("chunk size %llu in device metadata overrides " "table chunk size of %llu.", (unsigned long long)chunk_size, - (unsigned long long)ps->snap->store->chunk_size); + (unsigned long long)ps->store->chunk_size); /* We had a bogus chunk_size. Fix stuff up. */ free_area(ps); - ps->snap->store->chunk_size = chunk_size; - ps->snap->store->chunk_mask = chunk_size - 1; - ps->snap->store->chunk_shift = ffs(chunk_size) - 1; + ps->store->chunk_size = chunk_size; + ps->store->chunk_mask = chunk_size - 1; + ps->store->chunk_shift = ffs(chunk_size) - 1; - r = dm_io_client_resize(sectors_to_pages(ps->snap->store->chunk_size), + r = dm_io_client_resize(sectors_to_pages(ps->store->chunk_size), ps->io_client); if (r) return r; @@ -350,13 +348,13 @@ static int write_header(struct pstore *ps) { struct disk_header *dh; - memset(ps->area, 0, ps->snap->store->chunk_size << SECTOR_SHIFT); + memset(ps->area, 0, ps->store->chunk_size << SECTOR_SHIFT); dh = (struct disk_header *) ps->area; dh->magic = cpu_to_le32(SNAP_MAGIC); dh->valid = cpu_to_le32(ps->valid); dh->version = cpu_to_le32(ps->version); - dh->chunk_size = cpu_to_le32(ps->snap->store->chunk_size); + dh->chunk_size = cpu_to_le32(ps->store->chunk_size); return chunk_io(ps, 0, WRITE, 1); } @@ -508,8 +506,8 @@ static int persistent_read_metadata(struct dm_exception_store *store, /* * Now we know correct chunk_size, complete the initialisation. */ - ps->exceptions_per_area = (ps->snap->store->chunk_size << SECTOR_SHIFT) - / sizeof(struct disk_exception); + ps->exceptions_per_area = (ps->store->chunk_size << SECTOR_SHIFT) / + sizeof(struct disk_exception); ps->callbacks = dm_vcalloc(ps->exceptions_per_area, sizeof(*ps->callbacks)); if (!ps->callbacks) @@ -667,7 +665,7 @@ static int persistent_ctr(struct dm_exception_store *store, if (!ps) return -ENOMEM; - ps->snap = store->snap; + ps->store = store; ps->valid = 1; ps->version = SNAPSHOT_DISK_VERSION; ps->area = NULL; diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index 77f58be..a883d6e 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -6,7 +6,6 @@ */ #include "dm-exception-store.h" -#include "dm-snap.h" #include #include @@ -45,7 +44,7 @@ static int transient_prepare_exception(struct dm_exception_store *store, if (size < (tc->next_free + store->chunk_size)) return -1; - e->new_chunk = sector_to_chunk(store->snap, tc->next_free); + e->new_chunk = sector_to_chunk(store, tc->next_free); tc->next_free += store->chunk_size; return 0; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 5c067ef..467c586 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -637,8 +637,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad4; } - s->store->snap = s; - r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); if (r) { ti->error = "Could not create kcopyd client"; @@ -962,11 +960,11 @@ static void start_copy(struct dm_snap_pending_exception *pe) dev_size = get_dev_size(bdev); src.bdev = bdev; - src.sector = chunk_to_sector(s, pe->e.old_chunk); + src.sector = chunk_to_sector(s->store, pe->e.old_chunk); src.count = min(s->store->chunk_size, dev_size - src.sector); dest.bdev = s->store->cow->bdev; - dest.sector = chunk_to_sector(s, pe->e.new_chunk); + dest.sector = chunk_to_sector(s->store, pe->e.new_chunk); dest.count = src.count; /* Hand over to kcopyd */ @@ -1027,9 +1025,11 @@ static void remap_exception(struct dm_snapshot *s, struct dm_snap_exception *e, struct bio *bio, chunk_t chunk) { bio->bi_bdev = s->store->cow->bdev; - bio->bi_sector = chunk_to_sector(s, dm_chunk_number(e->new_chunk) + - (chunk - e->old_chunk)) + - (bio->bi_sector & s->store->chunk_mask); + bio->bi_sector = chunk_to_sector(s->store, + dm_chunk_number(e->new_chunk) + + (chunk - e->old_chunk)) + + (bio->bi_sector & + s->store->chunk_mask); } static int snapshot_map(struct dm_target *ti, struct bio *bio, @@ -1041,7 +1041,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, chunk_t chunk; struct dm_snap_pending_exception *pe = NULL; - chunk = sector_to_chunk(s, bio->bi_sector); + chunk = sector_to_chunk(s->store, bio->bi_sector); /* Full snapshots are not usable */ /* To get here the table must be live so s->active is always set. */ @@ -1210,7 +1210,7 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) * Remember, different snapshots can have * different chunk sizes. */ - chunk = sector_to_chunk(snap, bio->bi_sector); + chunk = sector_to_chunk(snap->store, bio->bi_sector); /* * Check exception table to see if block diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index c8a486e..4f61bc4 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -68,22 +68,10 @@ struct dm_snapshot { struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; }; -/* - * Return the number of sectors in the device. - */ -static inline sector_t get_dev_size(struct block_device *bdev) -{ - return bdev->bd_inode->i_size >> SECTOR_SHIFT; -} - -static inline chunk_t sector_to_chunk(struct dm_snapshot *s, sector_t sector) -{ - return (sector & ~s->store->chunk_mask) >> s->store->chunk_shift; -} - -static inline sector_t chunk_to_sector(struct dm_snapshot *s, chunk_t chunk) +static inline sector_t chunk_to_sector(struct dm_exception_store *store, + chunk_t chunk) { - return chunk << s->store->chunk_shift; + return chunk << store->chunk_shift; } static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs) -- cgit v0.10.2 From ccc45ea8aeffec49fa5985efc3649aa67bb4fcb7 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 2 Apr 2009 19:55:34 +0100 Subject: dm snapshot: remove dm_snap header Move some of the last bits from dm-snap.h into dm-snap.c where they belong and remove dm-snap.h. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 467c586..bb28f97 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -20,9 +20,9 @@ #include #include #include +#include #include "dm-exception-store.h" -#include "dm-snap.h" #include "dm-bio-list.h" #define DM_MSG_PREFIX "snapshots" @@ -47,9 +47,79 @@ */ #define MIN_IOS 256 +#define DM_TRACKED_CHUNK_HASH_SIZE 16 +#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ + (DM_TRACKED_CHUNK_HASH_SIZE - 1)) + +struct exception_table { + uint32_t hash_mask; + unsigned hash_shift; + struct list_head *table; +}; + +struct dm_snapshot { + struct rw_semaphore lock; + + struct dm_dev *origin; + + /* List of snapshots per Origin */ + struct list_head list; + + /* You can't use a snapshot if this is 0 (e.g. if full) */ + int valid; + + /* Origin writes don't trigger exceptions until this is set */ + int active; + + /* Used for display of table */ + char type; + + mempool_t *pending_pool; + + atomic_t pending_exceptions_count; + + struct exception_table pending; + struct exception_table complete; + + /* + * pe_lock protects all pending_exception operations and access + * as well as the snapshot_bios list. + */ + spinlock_t pe_lock; + + /* The on disk metadata handler */ + struct dm_exception_store *store; + + struct dm_kcopyd_client *kcopyd_client; + + /* Queue of snapshot writes for ksnapd to flush */ + struct bio_list queued_bios; + struct work_struct queued_bios_work; + + /* Chunks with outstanding reads */ + mempool_t *tracked_chunk_pool; + spinlock_t tracked_chunk_lock; + struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; +}; + static struct workqueue_struct *ksnapd; static void flush_queued_bios(struct work_struct *work); +static sector_t chunk_to_sector(struct dm_exception_store *store, + chunk_t chunk) +{ + return chunk << store->chunk_shift; +} + +static int bdev_equal(struct block_device *lhs, struct block_device *rhs) +{ + /* + * There is only ever one instance of a particular block + * device so we can compare pointers safely. + */ + return lhs == rhs; +} + struct dm_snap_pending_exception { struct dm_snap_exception e; diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h deleted file mode 100644 index 4f61bc4..0000000 --- a/drivers/md/dm-snap.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2001-2002 Sistina Software (UK) Limited. - * - * This file is released under the GPL. - */ - -#ifndef DM_SNAPSHOT_H -#define DM_SNAPSHOT_H - -#include -#include "dm-exception-store.h" -#include "dm-bio-list.h" -#include -#include - -struct exception_table { - uint32_t hash_mask; - unsigned hash_shift; - struct list_head *table; -}; - -#define DM_TRACKED_CHUNK_HASH_SIZE 16 -#define DM_TRACKED_CHUNK_HASH(x) ((unsigned long)(x) & \ - (DM_TRACKED_CHUNK_HASH_SIZE - 1)) - -struct dm_snapshot { - struct rw_semaphore lock; - - struct dm_dev *origin; - - /* List of snapshots per Origin */ - struct list_head list; - - /* You can't use a snapshot if this is 0 (e.g. if full) */ - int valid; - - /* Origin writes don't trigger exceptions until this is set */ - int active; - - /* Used for display of table */ - char type; - - mempool_t *pending_pool; - - atomic_t pending_exceptions_count; - - struct exception_table pending; - struct exception_table complete; - - /* - * pe_lock protects all pending_exception operations and access - * as well as the snapshot_bios list. - */ - spinlock_t pe_lock; - - /* The on disk metadata handler */ - struct dm_exception_store *store; - - struct dm_kcopyd_client *kcopyd_client; - - /* Queue of snapshot writes for ksnapd to flush */ - struct bio_list queued_bios; - struct work_struct queued_bios_work; - - /* Chunks with outstanding reads */ - mempool_t *tracked_chunk_pool; - spinlock_t tracked_chunk_lock; - struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; -}; - -static inline sector_t chunk_to_sector(struct dm_exception_store *store, - chunk_t chunk) -{ - return chunk << store->chunk_shift; -} - -static inline int bdev_equal(struct block_device *lhs, struct block_device *rhs) -{ - /* - * There is only ever one instance of a particular block - * device so we can compare pointers safely. - */ - return lhs == rhs; -} - -#endif -- cgit v0.10.2 From 2e4a31df2b10cbcaf43c333112f6f7440a035c69 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 2 Apr 2009 19:55:34 +0100 Subject: dm snapshot: use DMEMIT macro for status Use DMEMIT in place of snprintf. This makes it easier later when other modules are helping to populate our status output. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index bb28f97..fcb1ac1 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1214,24 +1214,25 @@ static void snapshot_resume(struct dm_target *ti) static int snapshot_status(struct dm_target *ti, status_type_t type, char *result, unsigned int maxlen) { + unsigned sz = 0; struct dm_snapshot *snap = ti->private; switch (type) { case STATUSTYPE_INFO: if (!snap->valid) - snprintf(result, maxlen, "Invalid"); + DMEMIT("Invalid"); else { if (snap->store->type->fraction_full) { sector_t numerator, denominator; snap->store->type->fraction_full(snap->store, &numerator, &denominator); - snprintf(result, maxlen, "%llu/%llu", - (unsigned long long)numerator, - (unsigned long long)denominator); + DMEMIT("%llu/%llu", + (unsigned long long)numerator, + (unsigned long long)denominator); } else - snprintf(result, maxlen, "Unknown"); + DMEMIT("Unknown"); } break; @@ -1241,10 +1242,10 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, * to make private copies if the output is to * make sense. */ - snprintf(result, maxlen, "%s %s %s %llu", - snap->origin->name, snap->store->cow->name, - snap->store->type->name, - (unsigned long long)snap->store->chunk_size); + DMEMIT("%s", snap->origin->name); + DMEMIT(" %s %s %llu", snap->store->cow->name, + snap->store->type->name, + (unsigned long long)snap->store->chunk_size); break; } -- cgit v0.10.2 From fee1998e9c690f9920671e1e0ef187a48cfbbde4 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 2 Apr 2009 19:55:34 +0100 Subject: dm snapshot: move ctr parsing to exception store First step of having the exception stores parse their own arguments - generalizing the interface. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 2078b92..a2e26c2 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -7,6 +7,7 @@ #include "dm-exception-store.h" +#include #include #include #include @@ -137,49 +138,129 @@ int dm_exception_store_type_unregister(struct dm_exception_store_type *type) } EXPORT_SYMBOL(dm_exception_store_type_unregister); -int dm_exception_store_create(const char *type_name, struct dm_target *ti, - chunk_t chunk_size, chunk_t chunk_mask, - chunk_t chunk_shift, struct dm_dev *cow, +/* + * Round a number up to the nearest 'size' boundary. size must + * be a power of 2. + */ +static ulong round_up(ulong n, ulong size) +{ + size--; + return (n + size) & ~size; +} + +static int set_chunk_size(struct dm_exception_store *store, + const char *chunk_size_arg, char **error) +{ + unsigned long chunk_size_ulong; + char *value; + + chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10); + if (*chunk_size_arg == '\0' || *value != '\0') { + *error = "Invalid chunk size"; + return -EINVAL; + } + + if (!chunk_size_ulong) { + store->chunk_size = store->chunk_mask = store->chunk_shift = 0; + return 0; + } + + /* + * Chunk size must be multiple of page size. Silently + * round up if it's not. + */ + chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9); + + /* Check chunk_size is a power of 2 */ + if (!is_power_of_2(chunk_size_ulong)) { + *error = "Chunk size is not a power of 2"; + return -EINVAL; + } + + /* Validate the chunk size against the device block size */ + if (chunk_size_ulong % (bdev_hardsect_size(store->cow->bdev) >> 9)) { + *error = "Chunk size is not a multiple of device blocksize"; + return -EINVAL; + } + + store->chunk_size = chunk_size_ulong; + store->chunk_mask = chunk_size_ulong - 1; + store->chunk_shift = ffs(chunk_size_ulong) - 1; + + return 0; +} + +int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, + unsigned *args_used, struct dm_exception_store **store) { int r = 0; struct dm_exception_store_type *type; struct dm_exception_store *tmp_store; + char persistent; + + if (argc < 3) { + ti->error = "Insufficient exception store arguments"; + return -EINVAL; + } tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL); - if (!tmp_store) + if (!tmp_store) { + ti->error = "Exception store allocation failed"; return -ENOMEM; + } - type = get_type(type_name); - if (!type) { - kfree(tmp_store); + persistent = toupper(*argv[1]); + if (persistent != 'P' && persistent != 'N') { + ti->error = "Persistent flag is not P or N"; return -EINVAL; } + type = get_type(argv[1]); + if (!type) { + ti->error = "Exception store type not recognised"; + r = -EINVAL; + goto bad_type; + } + tmp_store->type = type; tmp_store->ti = ti; - tmp_store->chunk_size = chunk_size; - tmp_store->chunk_mask = chunk_mask; - tmp_store->chunk_shift = chunk_shift; + r = dm_get_device(ti, argv[0], 0, 0, + FMODE_READ | FMODE_WRITE, &tmp_store->cow); + if (r) { + ti->error = "Cannot get COW device"; + goto bad_cow; + } - tmp_store->cow = cow; + r = set_chunk_size(tmp_store, argv[2], &ti->error); + if (r) + goto bad_cow; r = type->ctr(tmp_store, 0, NULL); if (r) { - put_type(type); - kfree(tmp_store); - return r; + ti->error = "Exception store type constructor failed"; + goto bad_ctr; } + *args_used = 3; *store = tmp_store; return 0; + +bad_ctr: + dm_put_device(ti, tmp_store->cow); +bad_cow: + put_type(type); +bad_type: + kfree(tmp_store); + return r; } EXPORT_SYMBOL(dm_exception_store_create); void dm_exception_store_destroy(struct dm_exception_store *store) { store->type->dtr(store); + dm_put_device(store->ti, store->cow); put_type(store->type); kfree(store); } diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 835f402..023a707 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -167,9 +167,8 @@ static inline chunk_t sector_to_chunk(struct dm_exception_store *store, int dm_exception_store_type_register(struct dm_exception_store_type *type); int dm_exception_store_type_unregister(struct dm_exception_store_type *type); -int dm_exception_store_create(const char *type_name, struct dm_target *ti, - chunk_t chunk_size, chunk_t chunk_mask, - chunk_t chunk_shift, struct dm_dev *cow, +int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, + unsigned *args_used, struct dm_exception_store **store); void dm_exception_store_destroy(struct dm_exception_store *store); diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index fcb1ac1..974916b 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -7,7 +7,6 @@ */ #include -#include #include #include #include @@ -538,8 +537,7 @@ static int calc_max_buckets(void) /* * Allocate room for a suitable hash table. */ -static int init_hash_tables(struct dm_snapshot *s, chunk_t chunk_shift, - struct dm_dev *cow) +static int init_hash_tables(struct dm_snapshot *s) { sector_t hash_size, cow_dev_size, origin_dev_size, max_buckets; @@ -547,11 +545,11 @@ static int init_hash_tables(struct dm_snapshot *s, chunk_t chunk_shift, * Calculate based on the size of the original volume or * the COW volume... */ - cow_dev_size = get_dev_size(cow->bdev); + cow_dev_size = get_dev_size(s->store->cow->bdev); origin_dev_size = get_dev_size(s->origin->bdev); max_buckets = calc_max_buckets(); - hash_size = min(origin_dev_size, cow_dev_size) >> chunk_shift; + hash_size = min(origin_dev_size, cow_dev_size) >> s->store->chunk_shift; hash_size = min(hash_size, max_buckets); hash_size = rounddown_pow_of_two(hash_size); @@ -576,60 +574,6 @@ static int init_hash_tables(struct dm_snapshot *s, chunk_t chunk_shift, } /* - * Round a number up to the nearest 'size' boundary. size must - * be a power of 2. - */ -static ulong round_up(ulong n, ulong size) -{ - size--; - return (n + size) & ~size; -} - -static int set_chunk_size(struct dm_snapshot *s, const char *chunk_size_arg, - chunk_t *chunk_size, chunk_t *chunk_mask, - chunk_t *chunk_shift, struct dm_dev *cow, - char **error) -{ - unsigned long chunk_size_ulong; - char *value; - - chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10); - if (*chunk_size_arg == '\0' || *value != '\0') { - *error = "Invalid chunk size"; - return -EINVAL; - } - - if (!chunk_size_ulong) { - *chunk_size = *chunk_mask = *chunk_shift = 0; - return 0; - } - - /* - * Chunk size must be multiple of page size. Silently - * round up if it's not. - */ - chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9); - - /* Check chunk_size is a power of 2 */ - if (!is_power_of_2(chunk_size_ulong)) { - *error = "Chunk size is not a power of 2"; - return -EINVAL; - } - - /* Validate the chunk size against the device block size */ - if (chunk_size_ulong % (bdev_hardsect_size(cow->bdev) >> 9)) { - *error = "Chunk size is not a multiple of device blocksize"; - return -EINVAL; - } - - *chunk_size = chunk_size_ulong; - *chunk_mask = chunk_size_ulong - 1; - *chunk_shift = ffs(chunk_size_ulong) - 1; - - return 0; -} - -/* * Construct a snapshot mapping:

*/ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) @@ -637,55 +581,45 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) struct dm_snapshot *s; int i; int r = -EINVAL; - char persistent; char *origin_path; - char *cow_path; - chunk_t chunk_size, chunk_mask, chunk_shift; - struct dm_dev *cow; + struct dm_exception_store *store; + unsigned args_used; if (argc != 4) { ti->error = "requires exactly 4 arguments"; r = -EINVAL; - goto bad1; + goto bad_args; } origin_path = argv[0]; - cow_path = argv[1]; - persistent = toupper(*argv[2]); + argv++; + argc--; - if (persistent != 'P' && persistent != 'N') { - ti->error = "Persistent flag is not P or N"; + r = dm_exception_store_create(ti, argc, argv, &args_used, &store); + if (r) { + ti->error = "Couldn't create exception store"; r = -EINVAL; - goto bad1; + goto bad_args; } + argv += args_used; + argc -= args_used; + s = kmalloc(sizeof(*s), GFP_KERNEL); - if (s == NULL) { + if (!s) { ti->error = "Cannot allocate snapshot context private " "structure"; r = -ENOMEM; - goto bad1; + goto bad_snap; } r = dm_get_device(ti, origin_path, 0, ti->len, FMODE_READ, &s->origin); if (r) { ti->error = "Cannot get origin device"; - goto bad2; + goto bad_origin; } - r = dm_get_device(ti, cow_path, 0, 0, - FMODE_READ | FMODE_WRITE, &cow); - if (r) { - dm_put_device(ti, s->origin); - ti->error = "Cannot get COW device"; - goto bad2; - } - - r = set_chunk_size(s, argv[3], &chunk_size, &chunk_mask, &chunk_shift, - cow, &ti->error); - if (r) - goto bad3; - + s->store = store; s->valid = 1; s->active = 0; atomic_set(&s->pending_exceptions_count, 0); @@ -693,30 +627,22 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) spin_lock_init(&s->pe_lock); /* Allocate hash table for COW data */ - if (init_hash_tables(s, chunk_shift, cow)) { + if (init_hash_tables(s)) { ti->error = "Unable to allocate hash table space"; r = -ENOMEM; - goto bad3; - } - - r = dm_exception_store_create(argv[2], ti, chunk_size, chunk_mask, - chunk_shift, cow, &s->store); - if (r) { - ti->error = "Couldn't create exception store"; - r = -EINVAL; - goto bad4; + goto bad_hash_tables; } r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); if (r) { ti->error = "Could not create kcopyd client"; - goto bad5; + goto bad_kcopyd; } s->pending_pool = mempool_create_slab_pool(MIN_IOS, pending_cache); if (!s->pending_pool) { ti->error = "Could not allocate mempool for pending exceptions"; - goto bad6; + goto bad_pending_pool; } s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, @@ -759,30 +685,29 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) return 0; - bad_load_and_register: +bad_load_and_register: mempool_destroy(s->tracked_chunk_pool); - bad_tracked_chunk_pool: +bad_tracked_chunk_pool: mempool_destroy(s->pending_pool); - bad6: +bad_pending_pool: dm_kcopyd_client_destroy(s->kcopyd_client); - bad5: - s->store->type->dtr(s->store); - - bad4: +bad_kcopyd: exit_exception_table(&s->pending, pending_cache); exit_exception_table(&s->complete, exception_cache); - bad3: - dm_put_device(ti, cow); +bad_hash_tables: dm_put_device(ti, s->origin); - bad2: +bad_origin: kfree(s); - bad1: +bad_snap: + dm_exception_store_destroy(store); + +bad_args: return r; } @@ -793,8 +718,6 @@ static void __free_exceptions(struct dm_snapshot *s) exit_exception_table(&s->pending, pending_cache); exit_exception_table(&s->complete, exception_cache); - - s->store->type->dtr(s->store); } static void snapshot_dtr(struct dm_target *ti) @@ -803,7 +726,6 @@ static void snapshot_dtr(struct dm_target *ti) int i; #endif struct dm_snapshot *s = ti->private; - struct dm_dev *cow = s->store->cow; flush_workqueue(ksnapd); @@ -831,7 +753,8 @@ static void snapshot_dtr(struct dm_target *ti) mempool_destroy(s->pending_pool); dm_put_device(ti, s->origin); - dm_put_device(ti, cow); + + dm_exception_store_destroy(s->store); kfree(s); } -- cgit v0.10.2 From 1e302a929e2da6e8448e2058e4b07b07252b57fe Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 2 Apr 2009 19:55:35 +0100 Subject: dm snapshot: move status to exception store Let the exception store types print out their status through the new API, rather than having the snapshot code do it. Adjust the buffer position to allow for the preceding DMEMIT in the arguments to type->status(). Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 023a707..0a2e6e7 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -79,8 +79,9 @@ struct dm_exception_store_type { */ void (*drop_snapshot) (struct dm_exception_store *store); - int (*status) (struct dm_exception_store *store, status_type_t status, - char *result, unsigned int maxlen); + unsigned (*status) (struct dm_exception_store *store, + status_type_t status, char *result, + unsigned maxlen); /* * Return how full the snapshot is. diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 3907c4c..1799205 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -688,11 +688,19 @@ static int persistent_ctr(struct dm_exception_store *store, return 0; } -static int persistent_status(struct dm_exception_store *store, - status_type_t status, char *result, - unsigned int maxlen) +static unsigned persistent_status(struct dm_exception_store *store, + status_type_t status, char *result, + unsigned maxlen) { - int sz = 0; + unsigned sz = 0; + + switch (status) { + case STATUSTYPE_INFO: + break; + case STATUSTYPE_TABLE: + DMEMIT(" %s P %llu", store->cow->name, + (unsigned long long)store->chunk_size); + } return sz; } diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c index a883d6e..cde5aa5 100644 --- a/drivers/md/dm-snap-transient.c +++ b/drivers/md/dm-snap-transient.c @@ -81,11 +81,19 @@ static int transient_ctr(struct dm_exception_store *store, return 0; } -static int transient_status(struct dm_exception_store *store, - status_type_t status, char *result, - unsigned maxlen) +static unsigned transient_status(struct dm_exception_store *store, + status_type_t status, char *result, + unsigned maxlen) { - int sz = 0; + unsigned sz = 0; + + switch (status) { + case STATUSTYPE_INFO: + break; + case STATUSTYPE_TABLE: + DMEMIT(" %s N %llu", store->cow->name, + (unsigned long long)store->chunk_size); + } return sz; } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 974916b..981a041 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -70,9 +70,6 @@ struct dm_snapshot { /* Origin writes don't trigger exceptions until this is set */ int active; - /* Used for display of table */ - char type; - mempool_t *pending_pool; atomic_t pending_exceptions_count; @@ -1166,9 +1163,8 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, * make sense. */ DMEMIT("%s", snap->origin->name); - DMEMIT(" %s %s %llu", snap->store->cow->name, - snap->store->type->name, - (unsigned long long)snap->store->chunk_size); + snap->store->type->status(snap->store, type, result + sz, + maxlen - sz); break; } -- cgit v0.10.2 From a32079ce17899192a7b98a42753be467605a8b2f Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 2 Apr 2009 19:55:35 +0100 Subject: dm snapshot: persistent fix dtr cleanup The persistent exception store destructor does not properly account for all conditions in which it can be called. If it is called after 'ctr' but before 'read_metadata' (e.g. if something else in 'snapshot_ctr' fails) then it will attempt to free areas of memory that haven't been allocated yet. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index 1799205..e75c6dd 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -162,9 +162,12 @@ static int alloc_area(struct pstore *ps) static void free_area(struct pstore *ps) { - vfree(ps->area); + if (ps->area) + vfree(ps->area); ps->area = NULL; - vfree(ps->zero_area); + + if (ps->zero_area) + vfree(ps->zero_area); ps->zero_area = NULL; } @@ -482,9 +485,16 @@ static void persistent_dtr(struct dm_exception_store *store) struct pstore *ps = get_info(store); destroy_workqueue(ps->metadata_wq); - dm_io_client_destroy(ps->io_client); - vfree(ps->callbacks); + + /* Created in read_header */ + if (ps->io_client) + dm_io_client_destroy(ps->io_client); free_area(ps); + + /* Allocated in persistent_read_metadata */ + if (ps->callbacks) + vfree(ps->callbacks); + kfree(ps); } @@ -661,7 +671,7 @@ static int persistent_ctr(struct dm_exception_store *store, struct pstore *ps; /* allocate the pstore */ - ps = kmalloc(sizeof(*ps), GFP_KERNEL); + ps = kzalloc(sizeof(*ps), GFP_KERNEL); if (!ps) return -ENOMEM; -- cgit v0.10.2 From 99c75e3130cff0c869c657ee97fb4a68958eeffc Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:36 +0100 Subject: dm: bio list add bio_list_add_head Introduce a function that adds a bio to the head of the list for use by the patch that will support barriers. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h index d4509be..345098b 100644 --- a/drivers/md/dm-bio-list.h +++ b/drivers/md/dm-bio-list.h @@ -52,6 +52,16 @@ static inline void bio_list_add(struct bio_list *bl, struct bio *bio) bl->tail = bio; } +static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) +{ + bio->bi_next = bl->head; + + bl->head = bio; + + if (!bl->tail) + bl->tail = bio; +} + static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) { if (!bl2->head) -- cgit v0.10.2 From 143773965b4677bd72dbbf71f52bea0df2ed4e18 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:36 +0100 Subject: dm: remove unnecessary work queue type field Remove "type" field from struct dm_wq_req because we no longer need it to have more than one value. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8d40f27..bbc7ecf 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -100,9 +100,6 @@ union map_info *dm_get_mapinfo(struct bio *bio) * Work processed by per-device workqueue. */ struct dm_wq_req { - enum { - DM_WQ_FLUSH_DEFERRED, - } type; struct work_struct work; struct mapped_device *md; void *context; @@ -1434,32 +1431,24 @@ static void dm_wq_work(struct work_struct *work) struct mapped_device *md = req->md; down_write(&md->io_lock); - switch (req->type) { - case DM_WQ_FLUSH_DEFERRED: - __flush_deferred_io(md); - break; - default: - DMERR("dm_wq_work: unrecognised work type %d", req->type); - BUG(); - } + __flush_deferred_io(md); up_write(&md->io_lock); } -static void dm_wq_queue(struct mapped_device *md, int type, void *context, +static void dm_wq_queue(struct mapped_device *md, void *context, struct dm_wq_req *req) { - req->type = type; req->md = md; req->context = context; INIT_WORK(&req->work, dm_wq_work); queue_work(md->wq, &req->work); } -static void dm_queue_flush(struct mapped_device *md, int type, void *context) +static void dm_queue_flush(struct mapped_device *md, void *context) { struct dm_wq_req req; - dm_wq_queue(md, type, context, &req); + dm_wq_queue(md, context, &req); flush_workqueue(md->wq); } @@ -1605,7 +1594,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) /* were we interrupted ? */ if (r < 0) { - dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); + dm_queue_flush(md, NULL); unlock_fs(md); goto out; /* pushback list is already flushed, so skip flush */ @@ -1645,7 +1634,7 @@ int dm_resume(struct mapped_device *md) if (r) goto out; - dm_queue_flush(md, DM_WQ_FLUSH_DEFERRED, NULL); + dm_queue_flush(md, NULL); unlock_fs(md); -- cgit v0.10.2 From 9a1fb46448cac50e93115322ad28f417936f7852 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:36 +0100 Subject: dm: remove unnecessary work queue context field Remove the context field from struct dm_wq_req because we will no longer need it. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index bbc7ecf..f913b50 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -102,7 +102,6 @@ union map_info *dm_get_mapinfo(struct bio *bio) struct dm_wq_req { struct work_struct work; struct mapped_device *md; - void *context; }; struct mapped_device { @@ -1435,20 +1434,18 @@ static void dm_wq_work(struct work_struct *work) up_write(&md->io_lock); } -static void dm_wq_queue(struct mapped_device *md, void *context, - struct dm_wq_req *req) +static void dm_wq_queue(struct mapped_device *md, struct dm_wq_req *req) { req->md = md; - req->context = context; INIT_WORK(&req->work, dm_wq_work); queue_work(md->wq, &req->work); } -static void dm_queue_flush(struct mapped_device *md, void *context) +static void dm_queue_flush(struct mapped_device *md) { struct dm_wq_req req; - dm_wq_queue(md, context, &req); + dm_wq_queue(md, &req); flush_workqueue(md->wq); } @@ -1594,7 +1591,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) /* were we interrupted ? */ if (r < 0) { - dm_queue_flush(md, NULL); + dm_queue_flush(md); unlock_fs(md); goto out; /* pushback list is already flushed, so skip flush */ @@ -1634,7 +1631,7 @@ int dm_resume(struct mapped_device *md) if (r) goto out; - dm_queue_flush(md, NULL); + dm_queue_flush(md); unlock_fs(md); -- cgit v0.10.2 From 53d5914f288b67ddc4d594d6a09568fe114bb909 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:37 +0100 Subject: dm: remove unnecessary struct dm_wq_req Remove struct dm_wq_req and move "work" directly into struct mapped_device. In the revised implementation, the thread will do just one type of work (processing the queue). Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f913b50..dac79d1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -99,11 +99,6 @@ union map_info *dm_get_mapinfo(struct bio *bio) /* * Work processed by per-device workqueue. */ -struct dm_wq_req { - struct work_struct work; - struct mapped_device *md; -}; - struct mapped_device { struct rw_semaphore io_lock; struct mutex suspend_lock; @@ -125,6 +120,7 @@ struct mapped_device { */ atomic_t pending; wait_queue_head_t wait; + struct work_struct work; struct bio_list deferred; struct bio_list pushback; @@ -1070,6 +1066,8 @@ out: static struct block_device_operations dm_blk_dops; +static void dm_wq_work(struct work_struct *work); + /* * Allocate and initialise a blank device with a given minor. */ @@ -1136,6 +1134,7 @@ static struct mapped_device *alloc_dev(int minor) atomic_set(&md->pending, 0); init_waitqueue_head(&md->wait); + INIT_WORK(&md->work, dm_wq_work); init_waitqueue_head(&md->eventq); md->disk->major = _major; @@ -1426,26 +1425,17 @@ static void __merge_pushback_list(struct mapped_device *md) static void dm_wq_work(struct work_struct *work) { - struct dm_wq_req *req = container_of(work, struct dm_wq_req, work); - struct mapped_device *md = req->md; + struct mapped_device *md = container_of(work, struct mapped_device, + work); down_write(&md->io_lock); __flush_deferred_io(md); up_write(&md->io_lock); } -static void dm_wq_queue(struct mapped_device *md, struct dm_wq_req *req) -{ - req->md = md; - INIT_WORK(&req->work, dm_wq_work); - queue_work(md->wq, &req->work); -} - static void dm_queue_flush(struct mapped_device *md) { - struct dm_wq_req req; - - dm_wq_queue(md, &req); + queue_work(md->wq, &md->work); flush_workqueue(md->wq); } -- cgit v0.10.2 From 8a53c28db42853591edbe8103e2ce3c4f2917f42 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:37 +0100 Subject: dm: rename __split_bio Rename __split_bio() to __split_and_process_bio() because it not only splits the bio to serveral parts, but also submits them to target drivers. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index dac79d1..75d7104 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -826,9 +826,9 @@ static int __clone_and_map(struct clone_info *ci) } /* - * Split the bio into several clones. + * Split the bio into several clones and submit it to targets. */ -static int __split_bio(struct mapped_device *md, struct bio *bio) +static int __split_and_process_bio(struct mapped_device *md, struct bio *bio) { struct clone_info ci; int error = 0; @@ -951,7 +951,7 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); } - r = __split_bio(md, bio); + r = __split_and_process_bio(md, bio); up_read(&md->io_lock); out_req: @@ -1405,7 +1405,7 @@ static void __flush_deferred_io(struct mapped_device *md) struct bio *c; while ((c = bio_list_pop(&md->deferred))) { - if (__split_bio(md, c)) + if (__split_and_process_bio(md, c)) bio_io_error(c); } -- cgit v0.10.2 From f0b9a4502baa18f8a255a2866bb4e0655fb35974 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:38 +0100 Subject: dm: move bio_io_error into __split_and_process_bio Move the bio_io_error() calls directly into __split_and_process_bio(). This avoids some code duplication in later patches. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 75d7104..385c2e8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -828,18 +828,20 @@ static int __clone_and_map(struct clone_info *ci) /* * Split the bio into several clones and submit it to targets. */ -static int __split_and_process_bio(struct mapped_device *md, struct bio *bio) +static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) { struct clone_info ci; int error = 0; ci.map = dm_get_table(md); - if (unlikely(!ci.map)) - return -EIO; + if (unlikely(!ci.map)) { + bio_io_error(bio); + return; + } if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { dm_table_put(ci.map); bio_endio(bio, -EOPNOTSUPP); - return 0; + return; } ci.md = md; ci.bio = bio; @@ -859,8 +861,6 @@ static int __split_and_process_bio(struct mapped_device *md, struct bio *bio) /* drop the extra reference count */ dec_pending(ci.io, error); dm_table_put(ci.map); - - return 0; } /*----------------------------------------------------------------- * CRUD END @@ -951,8 +951,9 @@ static int dm_request(struct request_queue *q, struct bio *bio) down_read(&md->io_lock); } - r = __split_and_process_bio(md, bio); + __split_and_process_bio(md, bio); up_read(&md->io_lock); + return 0; out_req: if (r < 0) @@ -1404,10 +1405,8 @@ static void __flush_deferred_io(struct mapped_device *md) { struct bio *c; - while ((c = bio_list_pop(&md->deferred))) { - if (__split_and_process_bio(md, c)) - bio_io_error(c); - } + while ((c = bio_list_pop(&md->deferred))) + __split_and_process_bio(md, c); clear_bit(DMF_BLOCK_IO, &md->flags); } -- cgit v0.10.2 From ef2085870ea448b3c19160d899cf4f948da6a384 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:38 +0100 Subject: dm: merge __flush_deferred_io into caller Merge __flush_deferred_io() into the only caller, dm_wq_work(). There's no need to have a function that has only one caller. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 385c2e8..4ba0811 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1401,14 +1401,20 @@ static int dm_wait_for_completion(struct mapped_device *md) /* * Process the deferred bios */ -static void __flush_deferred_io(struct mapped_device *md) +static void dm_wq_work(struct work_struct *work) { + struct mapped_device *md = container_of(work, struct mapped_device, + work); struct bio *c; + down_write(&md->io_lock); + while ((c = bio_list_pop(&md->deferred))) __split_and_process_bio(md, c); clear_bit(DMF_BLOCK_IO, &md->flags); + + up_write(&md->io_lock); } static void __merge_pushback_list(struct mapped_device *md) @@ -1422,16 +1428,6 @@ static void __merge_pushback_list(struct mapped_device *md) spin_unlock_irqrestore(&md->pushback_lock, flags); } -static void dm_wq_work(struct work_struct *work) -{ - struct mapped_device *md = container_of(work, struct mapped_device, - work); - - down_write(&md->io_lock); - __flush_deferred_io(md); - up_write(&md->io_lock); -} - static void dm_queue_flush(struct mapped_device *md) { queue_work(md->wq, &md->work); -- cgit v0.10.2 From 401600dfd368305e641d79db16d514f55c084544 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:38 +0100 Subject: dm: allow uninterruptible wait for pending io Allow uninterruptible wait for pending IOs. Add argument "interruptible" to dm_wait_for_completion that specifies either interruptible or uninterruptible waiting. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4ba0811..ae21833 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1375,18 +1375,19 @@ void dm_put(struct mapped_device *md) } EXPORT_SYMBOL_GPL(dm_put); -static int dm_wait_for_completion(struct mapped_device *md) +static int dm_wait_for_completion(struct mapped_device *md, int interruptible) { int r = 0; while (1) { - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(interruptible); smp_mb(); if (!atomic_read(&md->pending)) break; - if (signal_pending(current)) { + if (interruptible == TASK_INTERRUPTIBLE && + signal_pending(current)) { r = -EINTR; break; } @@ -1565,7 +1566,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) /* * Wait for the already-mapped ios to complete. */ - r = dm_wait_for_completion(md); + r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); down_write(&md->io_lock); remove_wait_queue(&md->wait, &wait); -- cgit v0.10.2 From 022c261100e15652d720395b17ce76304fb2f97f Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:39 +0100 Subject: dm: merge pushback and deferred bio lists Merge pushback and deferred lists into one list - use deferred list for both deferred and pushed-back bios. This will be needed for proper support of barrier bios: it is impossible to support ordering correctly with two lists because the requests on both lists will be mixed up. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ae21833..f570372 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -102,7 +102,6 @@ union map_info *dm_get_mapinfo(struct bio *bio) struct mapped_device { struct rw_semaphore io_lock; struct mutex suspend_lock; - spinlock_t pushback_lock; rwlock_t map_lock; atomic_t holders; atomic_t open_count; @@ -122,7 +121,7 @@ struct mapped_device { wait_queue_head_t wait; struct work_struct work; struct bio_list deferred; - struct bio_list pushback; + spinlock_t deferred_lock; /* * Processing queue (flush/barriers) @@ -445,7 +444,9 @@ static int queue_io(struct mapped_device *md, struct bio *bio) return 1; } + spin_lock_irq(&md->deferred_lock); bio_list_add(&md->deferred, bio); + spin_unlock_irq(&md->deferred_lock); up_write(&md->io_lock); return 0; /* deferred successfully */ @@ -529,16 +530,14 @@ static void dec_pending(struct dm_io *io, int error) if (io->error == DM_ENDIO_REQUEUE) { /* * Target requested pushing back the I/O. - * This must be handled before the sleeper on - * suspend queue merges the pushback list. */ - spin_lock_irqsave(&md->pushback_lock, flags); + spin_lock_irqsave(&md->deferred_lock, flags); if (__noflush_suspending(md)) - bio_list_add(&md->pushback, io->bio); + bio_list_add(&md->deferred, io->bio); else /* noflush suspend was interrupted. */ io->error = -EIO; - spin_unlock_irqrestore(&md->pushback_lock, flags); + spin_unlock_irqrestore(&md->deferred_lock, flags); } end_io_acct(io); @@ -1096,7 +1095,7 @@ static struct mapped_device *alloc_dev(int minor) init_rwsem(&md->io_lock); mutex_init(&md->suspend_lock); - spin_lock_init(&md->pushback_lock); + spin_lock_init(&md->deferred_lock); rwlock_init(&md->map_lock); atomic_set(&md->holders, 1); atomic_set(&md->open_count, 0); @@ -1410,25 +1409,21 @@ static void dm_wq_work(struct work_struct *work) down_write(&md->io_lock); - while ((c = bio_list_pop(&md->deferred))) +next_bio: + spin_lock_irq(&md->deferred_lock); + c = bio_list_pop(&md->deferred); + spin_unlock_irq(&md->deferred_lock); + + if (c) { __split_and_process_bio(md, c); + goto next_bio; + } clear_bit(DMF_BLOCK_IO, &md->flags); up_write(&md->io_lock); } -static void __merge_pushback_list(struct mapped_device *md) -{ - unsigned long flags; - - spin_lock_irqsave(&md->pushback_lock, flags); - clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); - bio_list_merge_head(&md->deferred, &md->pushback); - bio_list_init(&md->pushback); - spin_unlock_irqrestore(&md->pushback_lock, flags); -} - static void dm_queue_flush(struct mapped_device *md) { queue_work(md->wq, &md->work); @@ -1572,7 +1567,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) remove_wait_queue(&md->wait, &wait); if (noflush) - __merge_pushback_list(md); + clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); up_write(&md->io_lock); /* were we interrupted ? */ -- cgit v0.10.2 From b44ebeb017b8a5fe5439e1259708b68cf83a8921 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:39 +0100 Subject: dm: move wait queue declaration Move wait queue declaration and unplug to dm_wait_for_completion. The purpose is to minimize duplicate code in the further patches. The patch reorders functions a little bit. It doesn't change any functionality. For proper non-deadlock operation, add_wait_queue must happen before set_current_state(interruptible) and before the test for !atomic_read(&md->pending). Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index f570372..7867d90 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1377,6 +1377,11 @@ EXPORT_SYMBOL_GPL(dm_put); static int dm_wait_for_completion(struct mapped_device *md, int interruptible) { int r = 0; + DECLARE_WAITQUEUE(wait, current); + + dm_unplug_all(md->queue); + + add_wait_queue(&md->wait, &wait); while (1) { set_current_state(interruptible); @@ -1395,6 +1400,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) } set_current_state(TASK_RUNNING); + remove_wait_queue(&md->wait, &wait); + return r; } @@ -1501,7 +1508,6 @@ static void unlock_fs(struct mapped_device *md) int dm_suspend(struct mapped_device *md, unsigned suspend_flags) { struct dm_table *map = NULL; - DECLARE_WAITQUEUE(wait, current); int r = 0; int do_lockfs = suspend_flags & DM_SUSPEND_LOCKFS_FLAG ? 1 : 0; int noflush = suspend_flags & DM_SUSPEND_NOFLUSH_FLAG ? 1 : 0; @@ -1551,20 +1557,14 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) down_write(&md->io_lock); set_bit(DMF_BLOCK_IO, &md->flags); - add_wait_queue(&md->wait, &wait); up_write(&md->io_lock); - /* unplug */ - if (map) - dm_table_unplug_all(map); - /* * Wait for the already-mapped ios to complete. */ r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); down_write(&md->io_lock); - remove_wait_queue(&md->wait, &wait); if (noflush) clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); -- cgit v0.10.2 From 99360b4c18f7675b50d283301d46d755affe75fd Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 2 Apr 2009 19:55:39 +0100 Subject: dm: set queue ordered mode Set queue ordered mode. It doesn't really matter what we set here because we don't ever put any requests on the queue. But we need to set something other than QUEUE_ORDERED_NONE so that __generic_make_request passes barrier requests to us. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7867d90..788ba96 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1112,6 +1112,7 @@ static struct mapped_device *alloc_dev(int minor) md->queue->backing_dev_info.congested_fn = dm_any_congested; md->queue->backing_dev_info.congested_data = md; blk_queue_make_request(md->queue, dm_request); + blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL); blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY); md->queue->unplug_fn = dm_unplug_all; blk_queue_merge_bvec(md->queue, dm_merge_bvec); -- cgit v0.10.2