From 6d45d93ead319423099b82a4efd775bc0f159121 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 16 Oct 2009 23:18:14 +0100 Subject: dm snapshot: sort by chunk size to fix race Avoid a race causing corruption when snapshots of the same origin have different chunk sizes by sorting the internal list of snapshots by chunk size, largest first. https://bugzilla.redhat.com/show_bug.cgi?id=182659 For example, let's have two snapshots with different chunk sizes. The first snapshot (1) has small chunk size and the second snapshot (2) has large chunk size. Let's have chunks A, B, C in these snapshots: snapshot1: ====A==== ====B==== snapshot2: ==========C========== (Chunk size is a power of 2. Chunks are aligned.) A write to the origin at a position within A and C comes along. It triggers reallocation of A, then reallocation of C and links them together using A as the 'primary' exception. Then another write to the origin comes along at a position within B and C. It creates pending exception for B. C already has a reallocation in progress and it already has a primary exception (A), so nothing is done to it: B and C are not linked. If the reallocation of B finishes before the reallocation of C, because there is no link with the pending exception for C it does not know to wait for it and, the second write is dispatched to the origin and causes data corruption in the chunk C in snapshot2. To avoid this situation, we maintain snapshots sorted in descending order of chunk size. This leads to a guaranteed ordering on the links between the pending exceptions and avoids the problem explained above - both A and B now get linked to C. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 57f1bf7..3a53a5a 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -296,6 +296,7 @@ static void __insert_origin(struct origin *o) */ static int register_snapshot(struct dm_snapshot *snap) { + struct dm_snapshot *l; struct origin *o, *new_o; struct block_device *bdev = snap->origin->bdev; @@ -319,7 +320,11 @@ static int register_snapshot(struct dm_snapshot *snap) __insert_origin(o); } - list_add_tail(&snap->list, &o->snapshots); + /* Sort the list according to chunk size, largest-first smallest-last */ + list_for_each_entry(l, &o->snapshots, list) + if (l->store->chunk_size < snap->store->chunk_size) + break; + list_add_tail(&snap->list, &l->list); up_write(&_origins_lock); return 0; -- cgit v0.10.2 From 034a186d29dbcef099e57ab23ec39440596be911 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Fri, 16 Oct 2009 23:18:14 +0100 Subject: dm snapshot: free exception store on init failure While initializing the snapshot module, if we fail to register the snapshot target then we must back-out the exception store module initialization. Cc: stable@kernel.org Signed-off-by: Jonathan Brassow Reviewed-by: Mikulas Patocka Reviewed-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 3a53a5a..53f4063 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1470,7 +1470,7 @@ static int __init dm_snapshot_init(void) r = dm_register_target(&snapshot_target); if (r) { DMERR("snapshot target register failed %d", r); - return r; + goto bad_register_snapshot_target; } r = dm_register_target(&origin_target); @@ -1527,6 +1527,9 @@ bad2: dm_unregister_target(&origin_target); bad1: dm_unregister_target(&snapshot_target); + +bad_register_snapshot_target: + dm_exception_store_exit(); return r; } -- cgit v0.10.2 From bca915aae803cf01fde4461fc9c093cf5a86d7fc Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 16 Oct 2009 23:18:15 +0100 Subject: dm log: userspace fix incorrect luid cast in userspace_ctr mips: drivers/md/dm-log-userspace-base.c: In function `userspace_ctr': drivers/md/dm-log-userspace-base.c:159: warning: cast from pointer to integer of different size Cc: stable@kernel.org Cc: Jonathan Brassow Signed-off-by: Andrew Morton Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-log-userspace-base.c b/drivers/md/dm-log-userspace-base.c index 652bd33..7ac2c14 100644 --- a/drivers/md/dm-log-userspace-base.c +++ b/drivers/md/dm-log-userspace-base.c @@ -156,7 +156,7 @@ static int userspace_ctr(struct dm_dirty_log *log, struct dm_target *ti, } /* The ptr value is sufficient for local unique id */ - lc->luid = (uint64_t)lc; + lc->luid = (unsigned long)lc; lc->ti = ti; -- cgit v0.10.2 From 03022c54b9725026c0370a810168975c387ad04c Mon Sep 17 00:00:00 2001 From: Zdenek Kabelac Date: Fri, 16 Oct 2009 23:18:15 +0100 Subject: dm: add missing del_gendisk to alloc_dev error path Add missing del_gendisk() to error path when creation of workqueue fails. Otherwice there is a resource leak and following warning is shown: WARNING: at fs/sysfs/dir.c:487 sysfs_add_one+0xc5/0x160() sysfs: cannot create duplicate filename '/devices/virtual/block/dm-0' Cc: stable@kernel.org Signed-off-by: Zdenek Kabelac Reviewed-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 376f1ab..c5f9918 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1822,6 +1822,7 @@ static struct mapped_device *alloc_dev(int minor) bad_bdev: destroy_workqueue(md->wq); bad_thread: + del_gendisk(md->disk); put_disk(md->disk); bad_disk: blk_cleanup_queue(md->queue); -- cgit v0.10.2 From f88fb981183e71daf40bbd84bc8251bbf7b59e19 Mon Sep 17 00:00:00 2001 From: Kiyoshi Ueda Date: Fri, 16 Oct 2009 23:18:15 +0100 Subject: dm: dec_pending needs locking to save error value Multiple instances of dec_pending() can run concurrently so a lock is needed when it saves the first error code. I have never experienced actual problem without locking and just found this during code inspection while implementing the barrier support patch for request-based dm. This patch adds the locking. I've done compile, boot and basic I/O testings. Cc: stable@kernel.org Signed-off-by: Kiyoshi Ueda Signed-off-by: Jun'ichi Nomura Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c5f9918..724efc6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -47,6 +47,7 @@ struct dm_io { atomic_t io_count; struct bio *bio; unsigned long start_time; + spinlock_t endio_lock; }; /* @@ -578,8 +579,12 @@ static void dec_pending(struct dm_io *io, int error) struct mapped_device *md = io->md; /* Push-back supersedes any I/O errors */ - if (error && !(io->error > 0 && __noflush_suspending(md))) - io->error = error; + if (unlikely(error)) { + spin_lock_irqsave(&io->endio_lock, flags); + if (!(io->error > 0 && __noflush_suspending(md))) + io->error = error; + spin_unlock_irqrestore(&io->endio_lock, flags); + } if (atomic_dec_and_test(&io->io_count)) { if (io->error == DM_ENDIO_REQUEUE) { @@ -1226,6 +1231,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) atomic_set(&ci.io->io_count, 1); ci.io->bio = bio; ci.io->md = md; + spin_lock_init(&ci.io->endio_lock); ci.sector = bio->bi_sector; ci.sector_count = bio_sectors(bio); if (unlikely(bio_empty_barrier(bio))) -- cgit v0.10.2 From 3f2412dc85260e5aae7ebb03bf50d5b1407e3083 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 16 Oct 2009 23:18:16 +0100 Subject: dm snapshot: require non zero chunk size by end of ctr If we are creating snapshot with memory-stored exception store, fail if the user didn't specify chunk size. Zero chunk size would probably crash a lot of places in the rest of snapshot code. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Reviewed-by: Jonathan Brassow Reviewed-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 53f4063..9cb392b 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -673,6 +673,11 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) bio_list_init(&s->queued_bios); INIT_WORK(&s->queued_bios_work, flush_queued_bios); + if (!s->store->chunk_size) { + ti->error = "Chunk size not set"; + goto bad_load_and_register; + } + /* Add snapshot to the list of snapshots for this origin */ /* Exceptions aren't triggered till snapshot_resume() is called */ if (register_snapshot(s)) { -- cgit v0.10.2 From 0e8c4e4e3ebb15756ddc4170a88149a2cd323cfe Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 16 Oct 2009 23:18:16 +0100 Subject: dm exception store: fix failed set_chunk_size error path Properly close the device if failing because of an invalid chunk size. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 556acff..e5de762 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -251,7 +251,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, r = set_chunk_size(tmp_store, argv[2], &ti->error); if (r) - goto bad_cow; + goto bad_ctr; r = type->ctr(tmp_store, 0, NULL); if (r) { -- cgit v0.10.2 From 4c6fff445d7aa753957856278d4d93bcad6e2c14 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 16 Oct 2009 23:18:16 +0100 Subject: dm snapshot: lock snapshot while supplying status This patch locks the snapshot when returning status. It fixes a race when it could return an invalid number of free chunks if someone was simultaneously modifying it. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 9cb392b..9bc814a 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1152,6 +1152,8 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, unsigned sz = 0; struct dm_snapshot *snap = ti->private; + down_write(&snap->lock); + switch (type) { case STATUSTYPE_INFO: if (!snap->valid) @@ -1183,6 +1185,8 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, break; } + up_write(&snap->lock); + return 0; } -- cgit v0.10.2 From df96eee679ba28c98cf722fa7c9f4286ee1ed0bd Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 16 Oct 2009 23:18:17 +0100 Subject: dm snapshot: use unsigned integer chunk size Use unsigned integer chunk size. Maximum chunk size is 512kB, there won't ever be need to use 4GB chunk size, so the number can be 32-bit. This fixes compiler failure on 32-bit systems with large block devices. Cc: stable@kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Reviewed-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index e5de762..932d1b1 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -155,7 +155,8 @@ static int set_chunk_size(struct dm_exception_store *store, char *value; chunk_size_ulong = simple_strtoul(chunk_size_arg, &value, 10); - if (*chunk_size_arg == '\0' || *value != '\0') { + if (*chunk_size_arg == '\0' || *value != '\0' || + chunk_size_ulong > UINT_MAX) { *error = "Invalid chunk size"; return -EINVAL; } @@ -171,34 +172,35 @@ static int set_chunk_size(struct dm_exception_store *store, */ chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9); - return dm_exception_store_set_chunk_size(store, chunk_size_ulong, + return dm_exception_store_set_chunk_size(store, + (unsigned) chunk_size_ulong, error); } int dm_exception_store_set_chunk_size(struct dm_exception_store *store, - unsigned long chunk_size_ulong, + unsigned chunk_size, char **error) { /* Check chunk_size is a power of 2 */ - if (!is_power_of_2(chunk_size_ulong)) { + if (!is_power_of_2(chunk_size)) { *error = "Chunk size is not a power of 2"; return -EINVAL; } /* Validate the chunk size against the device block size */ - if (chunk_size_ulong % (bdev_logical_block_size(store->cow->bdev) >> 9)) { + if (chunk_size % (bdev_logical_block_size(store->cow->bdev) >> 9)) { *error = "Chunk size is not a multiple of device blocksize"; return -EINVAL; } - if (chunk_size_ulong > INT_MAX >> SECTOR_SHIFT) { + if (chunk_size > INT_MAX >> SECTOR_SHIFT) { *error = "Chunk size is too high"; return -EINVAL; } - store->chunk_size = chunk_size_ulong; - store->chunk_mask = chunk_size_ulong - 1; - store->chunk_shift = ffs(chunk_size_ulong) - 1; + store->chunk_size = chunk_size; + store->chunk_mask = chunk_size - 1; + store->chunk_shift = ffs(chunk_size) - 1; return 0; } diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h index 812c718..8a223a4 100644 --- a/drivers/md/dm-exception-store.h +++ b/drivers/md/dm-exception-store.h @@ -101,9 +101,9 @@ struct dm_exception_store { struct dm_dev *cow; /* Size of data blocks saved - must be a power of 2 */ - chunk_t chunk_size; - chunk_t chunk_mask; - chunk_t chunk_shift; + unsigned chunk_size; + unsigned chunk_mask; + unsigned chunk_shift; void *context; }; @@ -169,7 +169,7 @@ int dm_exception_store_type_register(struct dm_exception_store_type *type); int dm_exception_store_type_unregister(struct dm_exception_store_type *type); int dm_exception_store_set_chunk_size(struct dm_exception_store *store, - unsigned long chunk_size_ulong, + unsigned chunk_size, char **error); int dm_exception_store_create(struct dm_target *ti, int argc, char **argv, diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c index d5b2e08..0c74642 100644 --- a/drivers/md/dm-snap-persistent.c +++ b/drivers/md/dm-snap-persistent.c @@ -284,12 +284,13 @@ static int read_header(struct pstore *ps, int *new_snapshot) { int r; struct disk_header *dh; - chunk_t chunk_size; + unsigned chunk_size; int chunk_size_supplied = 1; char *chunk_err; /* - * Use default chunk size (or hardsect_size, if larger) if none supplied + * Use default chunk size (or logical_block_size, if larger) + * if none supplied */ if (!ps->store->chunk_size) { ps->store->chunk_size = max(DM_CHUNK_SIZE_DEFAULT_SECTORS, @@ -334,10 +335,9 @@ static int read_header(struct pstore *ps, int *new_snapshot) return 0; if (chunk_size_supplied) - DMWARN("chunk size %llu in device metadata overrides " - "table chunk size of %llu.", - (unsigned long long)chunk_size, - (unsigned long long)ps->store->chunk_size); + DMWARN("chunk size %u in device metadata overrides " + "table chunk size of %u.", + chunk_size, ps->store->chunk_size); /* We had a bogus chunk_size. Fix stuff up. */ free_area(ps); @@ -345,8 +345,8 @@ static int read_header(struct pstore *ps, int *new_snapshot) r = dm_exception_store_set_chunk_size(ps->store, chunk_size, &chunk_err); if (r) { - DMERR("invalid on-disk chunk size %llu: %s.", - (unsigned long long)chunk_size, chunk_err); + DMERR("invalid on-disk chunk size %u: %s.", + chunk_size, chunk_err); return r; } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 9bc814a..3a3ba46 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -961,7 +961,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) src.bdev = bdev; src.sector = chunk_to_sector(s->store, pe->e.old_chunk); - src.count = min(s->store->chunk_size, dev_size - src.sector); + src.count = min((sector_t)s->store->chunk_size, dev_size - src.sector); dest.bdev = s->store->cow->bdev; dest.sector = chunk_to_sector(s->store, pe->e.new_chunk); @@ -1402,7 +1402,7 @@ static void origin_resume(struct dm_target *ti) struct dm_dev *dev = ti->private; struct dm_snapshot *snap; struct origin *o; - chunk_t chunk_size = 0; + unsigned chunk_size = 0; down_read(&_origins_lock); o = __lookup_origin(dev->bdev); -- cgit v0.10.2 From c1cc65caa19bb8a1b2e371000ef2719581db1691 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 16 Oct 2009 23:18:22 +0100 Subject: dm snapshot: allow chunk size to be less than page size Allow the snapshot chunk size to be smaller than the page size The code is now capable of handling this due to some previous fixes and enhancements. As the page size varies between computers, prior to this patch, the chunk size of a snapshot dictated which machines could read it: Snapshots created on one machine might not be readable on another. Signed-off-by: Mikulas Patocka Reviewed-by: Mike Snitzer Reviewed-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 932d1b1..7dbe652 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -138,16 +138,6 @@ int dm_exception_store_type_unregister(struct dm_exception_store_type *type) } EXPORT_SYMBOL(dm_exception_store_type_unregister); -/* - * Round a number up to the nearest 'size' boundary. size must - * be a power of 2. - */ -static ulong round_up(ulong n, ulong size) -{ - size--; - return (n + size) & ~size; -} - static int set_chunk_size(struct dm_exception_store *store, const char *chunk_size_arg, char **error) { @@ -166,12 +156,6 @@ static int set_chunk_size(struct dm_exception_store *store, return 0; } - /* - * Chunk size must be multiple of page size. Silently - * round up if it's not. - */ - chunk_size_ulong = round_up(chunk_size_ulong, PAGE_SIZE >> 9); - return dm_exception_store_set_chunk_size(store, (unsigned) chunk_size_ulong, error); -- cgit v0.10.2