From 924362629bf5645aee5f49f8a0d0d5b193e65997 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 24 Apr 2008 21:41:50 +0100 Subject: dm snapshot: fix chunksize sector conversion If a snapshot has a smaller chunksize than the page size the conversion to pages currently returns 0 instead of 1, causing: kernel BUG in mempool_resize. Signed-off-by: Mikulas Patocka Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon Cc: stable@kernel.org diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 5bbce29..c7d305b 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -131,7 +131,7 @@ struct pstore { static unsigned sectors_to_pages(unsigned sectors) { - return sectors / (PAGE_SIZE >> 9); + return DIV_ROUND_UP(sectors, PAGE_SIZE >> 9); } static int alloc_area(struct pstore *ps) -- cgit v0.10.2 From 8ee2767a5903fde549fd3597690f64c8d951051a Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Thu, 24 Apr 2008 21:42:36 +0100 Subject: dm snapshot: reduce default memory allocation Limit the amount of memory allocated per snapshot on systems with a large page size. (The larger default chunk size on these systems compensates for the smaller number of pages reserved.) Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 4dc8a43..08047fb 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -36,9 +36,9 @@ #define SNAPSHOT_COPY_PRIORITY 2 /* - * Each snapshot reserves this many pages for io + * Reserve 1MB for each snapshot initially (with minimum of 1 page). */ -#define SNAPSHOT_PAGES 256 +#define SNAPSHOT_PAGES (((1UL << 20) >> PAGE_SHIFT) ? : 1) static struct workqueue_struct *ksnapd; static void flush_queued_bios(struct work_struct *work); -- cgit v0.10.2 From c12bfc923ee02de5611730ddec087c11b3947038 Mon Sep 17 00:00:00 2001 From: "Robert P. J. Day" Date: Thu, 24 Apr 2008 21:42:44 +0100 Subject: dm raid1: use list_split_init Use shorter list_splice_init() for brevity. Signed-off-by: Robert P. J. Day Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 762cb08..64489e7 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -405,24 +405,22 @@ static void rh_update_states(struct region_hash *rh) write_lock_irq(&rh->hash_lock); spin_lock(&rh->region_lock); if (!list_empty(&rh->clean_regions)) { - list_splice(&rh->clean_regions, &clean); - INIT_LIST_HEAD(&rh->clean_regions); + list_splice_init(&rh->clean_regions, &clean); list_for_each_entry(reg, &clean, list) list_del(®->hash_list); } if (!list_empty(&rh->recovered_regions)) { - list_splice(&rh->recovered_regions, &recovered); - INIT_LIST_HEAD(&rh->recovered_regions); + list_splice_init(&rh->recovered_regions, &recovered); list_for_each_entry (reg, &recovered, list) list_del(®->hash_list); } if (!list_empty(&rh->failed_recovered_regions)) { - list_splice(&rh->failed_recovered_regions, &failed_recovered); - INIT_LIST_HEAD(&rh->failed_recovered_regions); + list_splice_init(&rh->failed_recovered_regions, + &failed_recovered); list_for_each_entry(reg, &failed_recovered, list) list_del(®->hash_list); -- cgit v0.10.2 From b7fd54a70f99061721e604d72d940541e5b2b168 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Thu, 24 Apr 2008 21:43:06 +0100 Subject: dm log: generalise name in messages Change dm-log.c messages from "mirror log" to "dirty region log" as a new dm target wants to share this code. Signed-off-by: Heinz Mauelshagen Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 2a74b21..34c25b0 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -1,5 +1,6 @@ /* * Copyright (C) 2003 Sistina Software + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. * * This file is released under the LGPL. */ @@ -12,7 +13,7 @@ #include "dm-log.h" #include "dm-io.h" -#define DM_MSG_PREFIX "mirror log" +#define DM_MSG_PREFIX "dirty region log" static LIST_HEAD(_log_types); static DEFINE_SPINLOCK(_lock); @@ -315,7 +316,7 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti, int r; if (argc < 1 || argc > 2) { - DMWARN("wrong number of arguments to mirror log"); + DMWARN("wrong number of arguments to dirty region log"); return -EINVAL; } @@ -325,8 +326,8 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti, else if (!strcmp(argv[1], "nosync")) sync = NOSYNC; else { - DMWARN("unrecognised sync argument to mirror log: %s", - argv[1]); + DMWARN("unrecognised sync argument to " + "dirty region log: %s", argv[1]); return -EINVAL; } } @@ -467,7 +468,7 @@ static int disk_ctr(struct dirty_log *log, struct dm_target *ti, struct dm_dev *dev; if (argc < 2 || argc > 3) { - DMWARN("wrong number of arguments to disk mirror log"); + DMWARN("wrong number of arguments to disk dirty region log"); return -EINVAL; } @@ -524,7 +525,7 @@ static int disk_resume(struct dirty_log *log) /* read the disk header */ r = read_header(lc); if (r) { - DMWARN("%s: Failed to read header on mirror log device", + DMWARN("%s: Failed to read header on dirty region log device", lc->log_dev->name); fail_log_device(lc); /* @@ -562,7 +563,7 @@ static int disk_resume(struct dirty_log *log) /* write the new header */ r = write_header(lc); if (r) { - DMWARN("%s: Failed to write header on mirror log device", + DMWARN("%s: Failed to write header on dirty region log device", lc->log_dev->name); fail_log_device(lc); } -- cgit v0.10.2 From 769aef30f0f505c44bbe9fcd2c911a052a386139 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Thu, 24 Apr 2008 21:43:09 +0100 Subject: dm log: move dirty region log code into separate module Move the dirty region log code into a separate module so other targets can share the code. Signed-off-by: Heinz Mauelshagen Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/Makefile b/drivers/md/Makefile index d9aa7ed..be4b069 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -6,7 +6,7 @@ dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ dm-ioctl.o dm-io.o kcopyd.o dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o dm-snapshot-objs := dm-snap.o dm-exception-store.o -dm-mirror-objs := dm-log.o dm-raid1.o +dm-mirror-objs := dm-raid1.o dm-rdac-objs := dm-mpath-rdac.o dm-hp-sw-objs := dm-mpath-hp-sw.o md-mod-objs := md.o bitmap.o @@ -39,7 +39,7 @@ obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o obj-$(CONFIG_DM_MULTIPATH_HP) += dm-hp-sw.o obj-$(CONFIG_DM_MULTIPATH_RDAC) += dm-rdac.o obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o -obj-$(CONFIG_DM_MIRROR) += dm-mirror.o +obj-$(CONFIG_DM_MIRROR) += dm-mirror.o dm-log.o obj-$(CONFIG_DM_ZERO) += dm-zero.o quiet_cmd_unroll = UNROLL $@ diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 34c25b0..f6b20de 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -777,7 +777,7 @@ int __init dm_dirty_log_init(void) return r; } -void dm_dirty_log_exit(void) +void __exit dm_dirty_log_exit(void) { dm_unregister_dirty_log_type(&_disk_type); dm_unregister_dirty_log_type(&_core_type); @@ -787,3 +787,10 @@ EXPORT_SYMBOL(dm_register_dirty_log_type); EXPORT_SYMBOL(dm_unregister_dirty_log_type); EXPORT_SYMBOL(dm_create_dirty_log); EXPORT_SYMBOL(dm_destroy_dirty_log); + +module_init(dm_dirty_log_init); +module_exit(dm_dirty_log_exit); + +MODULE_DESCRIPTION(DM_NAME " dirty region log"); +MODULE_AUTHOR("Joe Thornber, Heinz Mauelshagen "); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 64489e7..c4ce011 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1862,15 +1862,9 @@ static int __init dm_mirror_init(void) { int r; - r = dm_dirty_log_init(); - if (r) - return r; - r = dm_register_target(&mirror_target); - if (r < 0) { + if (r < 0) DMERR("Failed to register mirror target"); - dm_dirty_log_exit(); - } return r; } @@ -1882,8 +1876,6 @@ static void __exit dm_mirror_exit(void) r = dm_unregister_target(&mirror_target); if (r < 0) DMERR("unregister failed %d", r); - - dm_dirty_log_exit(); } /* Module hooks */ -- cgit v0.10.2 From 72727bad544b4ce0a3f7853bfd7ae939f398007d Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 24 Apr 2008 21:43:11 +0100 Subject: dm snapshot: store pointer to target instance Save pointer to dm_target in dm_snapshot structure. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 08047fb..08a8cbd 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -536,7 +536,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) s->last_percent = 0; init_rwsem(&s->lock); spin_lock_init(&s->pe_lock); - s->table = ti->table; + s->ti = ti; /* Allocate hash table for COW data */ if (init_hash_tables(s)) { @@ -699,7 +699,7 @@ static void __invalidate_snapshot(struct dm_snapshot *s, int err) s->valid = 0; - dm_table_event(s->table); + dm_table_event(s->ti->table); } static void get_pending_exception(struct dm_snap_pending_exception *pe) @@ -1060,7 +1060,7 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) goto next_snapshot; /* Nothing to do if writing beyond end of snapshot */ - if (bio->bi_sector >= dm_table_get_size(snap->table)) + if (bio->bi_sector >= dm_table_get_size(snap->ti->table)) goto next_snapshot; /* diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 93bce5d..434d1db 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -132,7 +132,7 @@ struct exception_store { struct dm_snapshot { struct rw_semaphore lock; - struct dm_table *table; + struct dm_target *ti; struct dm_dev *origin; struct dm_dev *cow; -- cgit v0.10.2 From e01fd7eeb00f8078103f4ed3e8ef64474c11f300 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 24 Apr 2008 21:43:14 +0100 Subject: dm io: rename error to error_bits Rename 'error' to 'error_bits' for clarity. Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 8f25f62..c56c7eb 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -20,7 +20,7 @@ struct dm_io_client { /* FIXME: can we shrink this ? */ struct io { - unsigned long error; + unsigned long error_bits; atomic_t count; struct task_struct *sleeper; struct dm_io_client *client; @@ -107,14 +107,14 @@ static inline unsigned bio_get_region(struct bio *bio) static void dec_count(struct io *io, unsigned int region, int error) { if (error) - set_bit(region, &io->error); + set_bit(region, &io->error_bits); if (atomic_dec_and_test(&io->count)) { if (io->sleeper) wake_up_process(io->sleeper); else { - unsigned long r = io->error; + unsigned long r = io->error_bits; io_notify_fn fn = io->callback; void *context = io->context; @@ -357,7 +357,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, return -EIO; } - io.error = 0; + io.error_bits = 0; atomic_set(&io.count, 1); /* see dispatch_io() */ io.sleeper = current; io.client = client; @@ -378,9 +378,9 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, return -EINTR; if (error_bits) - *error_bits = io.error; + *error_bits = io.error_bits; - return io.error ? -EIO : 0; + return io.error_bits ? -EIO : 0; } static int async_io(struct dm_io_client *client, unsigned int num_regions, @@ -396,7 +396,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, } io = mempool_alloc(client->pool, GFP_NOIO); - io->error = 0; + io->error_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ io->sleeper = NULL; io->client = client; -- cgit v0.10.2 From 22a1ceb1e6a7fbce95a1531ff10bb4fb036d4a37 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Thu, 24 Apr 2008 21:43:17 +0100 Subject: dm io: clean interface Clean up the dm-io interface to prepare for publishing it in include/linux. Signed-off-by: Heinz Mauelshagen Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index c7d305b..6933301 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -159,7 +159,7 @@ static void free_area(struct pstore *ps) } struct mdata_req { - struct io_region *where; + struct dm_io_region *where; struct dm_io_request *io_req; struct work_struct work; int result; @@ -177,7 +177,7 @@ static void do_metadata(struct work_struct *work) */ static int chunk_io(struct pstore *ps, uint32_t chunk, int rw, int metadata) { - struct io_region where = { + struct dm_io_region where = { .bdev = ps->snap->cow->bdev, .sector = ps->snap->chunk_size * chunk, .count = ps->snap->chunk_size, diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index c56c7eb..978c041 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -6,6 +6,7 @@ */ #include "dm-io.h" +#include "dm.h" #include #include @@ -271,7 +272,7 @@ static void km_dp_init(struct dpages *dp, void *data) /*----------------------------------------------------------------- * IO routines that accept a list of pages. *---------------------------------------------------------------*/ -static void do_region(int rw, unsigned int region, struct io_region *where, +static void do_region(int rw, unsigned region, struct dm_io_region *where, struct dpages *dp, struct io *io) { struct bio *bio; @@ -320,7 +321,7 @@ static void do_region(int rw, unsigned int region, struct io_region *where, } static void dispatch_io(int rw, unsigned int num_regions, - struct io_region *where, struct dpages *dp, + struct dm_io_region *where, struct dpages *dp, struct io *io, int sync) { int i; @@ -347,7 +348,7 @@ static void dispatch_io(int rw, unsigned int num_regions, } static int sync_io(struct dm_io_client *client, unsigned int num_regions, - struct io_region *where, int rw, struct dpages *dp, + struct dm_io_region *where, int rw, struct dpages *dp, unsigned long *error_bits) { struct io io; @@ -384,7 +385,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, } static int async_io(struct dm_io_client *client, unsigned int num_regions, - struct io_region *where, int rw, struct dpages *dp, + struct dm_io_region *where, int rw, struct dpages *dp, io_notify_fn fn, void *context) { struct io *io; @@ -438,7 +439,7 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp) * New collapsed (a)synchronous interface */ int dm_io(struct dm_io_request *io_req, unsigned num_regions, - struct io_region *where, unsigned long *sync_error_bits) + struct dm_io_region *where, unsigned long *sync_error_bits) { int r; struct dpages dp; diff --git a/drivers/md/dm-io.h b/drivers/md/dm-io.h index f647e2c..b6bf17e 100644 --- a/drivers/md/dm-io.h +++ b/drivers/md/dm-io.h @@ -1,15 +1,20 @@ /* * Copyright (C) 2003 Sistina Software + * Copyright (C) 2004 - 2008 Red Hat, Inc. All rights reserved. + * + * Device-Mapper low-level I/O. * * This file is released under the GPL. */ -#ifndef _DM_IO_H -#define _DM_IO_H +#ifndef _LINUX_DM_IO_H +#define _LINUX_DM_IO_H + +#ifdef __KERNEL__ -#include "dm.h" +#include -struct io_region { +struct dm_io_region { struct block_device *bdev; sector_t sector; sector_t count; /* If this is zero the region is ignored. */ @@ -74,6 +79,7 @@ void dm_io_client_destroy(struct dm_io_client *client); * error occurred doing io to the corresponding region. */ int dm_io(struct dm_io_request *io_req, unsigned num_regions, - struct io_region *region, unsigned long *sync_error_bits); + struct dm_io_region *region, unsigned long *sync_error_bits); -#endif +#endif /* __KERNEL__ */ +#endif /* _LINUX_DM_IO_H */ diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index f6b20de..14f785f 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -208,7 +208,7 @@ struct log_c { struct dm_dev *log_dev; struct log_header header; - struct io_region header_location; + struct dm_io_region header_location; struct log_header *disk_header; }; diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index c4ce011..0d4e7e9 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -788,7 +788,7 @@ static int recover(struct mirror_set *ms, struct region *reg) { int r; unsigned int i; - struct io_region from, to[KCOPYD_MAX_REGIONS], *dest; + struct dm_io_region from, to[KCOPYD_MAX_REGIONS], *dest; struct mirror *m; unsigned long flags = 0; @@ -907,7 +907,7 @@ static void map_bio(struct mirror *m, struct bio *bio) bio->bi_sector = map_sector(m, bio); } -static void map_region(struct io_region *io, struct mirror *m, +static void map_region(struct dm_io_region *io, struct mirror *m, struct bio *bio) { io->bdev = m->dev->bdev; @@ -949,7 +949,7 @@ static void read_callback(unsigned long error, void *context) /* Asynchronous read. */ static void read_async_bio(struct mirror *m, struct bio *bio) { - struct io_region io; + struct dm_io_region io; struct dm_io_request io_req = { .bi_rw = READ, .mem.type = DM_IO_BVEC, @@ -1105,7 +1105,7 @@ out: static void do_write(struct mirror_set *ms, struct bio *bio) { unsigned int i; - struct io_region io[ms->nr_mirrors], *dest = io; + struct dm_io_region io[ms->nr_mirrors], *dest = io; struct mirror *m; struct dm_io_request io_req = { .bi_rw = WRITE, diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 08a8cbd..bab159b 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -824,7 +824,7 @@ static void copy_callback(int read_err, unsigned long write_err, void *context) static void start_copy(struct dm_snap_pending_exception *pe) { struct dm_snapshot *s = pe->snap; - struct io_region src, dest; + struct dm_io_region src, dest; struct block_device *bdev = s->origin->bdev; sector_t dev_size; diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index e76b52a..21fea42 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -25,6 +25,7 @@ #include #include "kcopyd.h" +#include "dm.h" static struct workqueue_struct *_kcopyd_wq; static struct work_struct _kcopyd_work; @@ -175,13 +176,13 @@ struct kcopyd_job { * Either READ or WRITE */ int rw; - struct io_region source; + struct dm_io_region source; /* * The destinations for the transfer. */ unsigned int num_dests; - struct io_region dests[KCOPYD_MAX_REGIONS]; + struct dm_io_region dests[KCOPYD_MAX_REGIONS]; sector_t offset; unsigned int nr_pages; @@ -526,8 +527,8 @@ static void split_job(struct kcopyd_job *job) segment_complete(0, 0u, job); } -int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from, - unsigned int num_dests, struct io_region *dests, +int kcopyd_copy(struct kcopyd_client *kc, struct dm_io_region *from, + unsigned int num_dests, struct dm_io_region *dests, unsigned int flags, kcopyd_notify_fn fn, void *context) { struct kcopyd_job *job; diff --git a/drivers/md/kcopyd.h b/drivers/md/kcopyd.h index 4845f2a..588f05d 100644 --- a/drivers/md/kcopyd.h +++ b/drivers/md/kcopyd.h @@ -35,8 +35,8 @@ void kcopyd_client_destroy(struct kcopyd_client *kc); typedef void (*kcopyd_notify_fn)(int read_err, unsigned long write_err, void *context); -int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from, - unsigned int num_dests, struct io_region *dests, +int kcopyd_copy(struct kcopyd_client *kc, struct dm_io_region *from, + unsigned num_dests, struct dm_io_region *dests, unsigned int flags, kcopyd_notify_fn fn, void *context); #endif -- cgit v0.10.2 From eb69aca5d3370b81450d68edeebc2bb9a3eb9689 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Thu, 24 Apr 2008 21:43:19 +0100 Subject: dm kcopyd: clean interface Clean up the kcopyd interface to prepare for publishing it in include/linux. Signed-off-by: Heinz Mauelshagen Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 0d4e7e9..32c7c6d 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -133,7 +133,7 @@ struct mirror_set { struct dm_target *ti; struct list_head list; struct region_hash rh; - struct kcopyd_client *kcopyd_client; + struct dm_kcopyd_client *kcopyd_client; uint64_t features; spinlock_t lock; /* protects the lists */ @@ -788,7 +788,7 @@ static int recover(struct mirror_set *ms, struct region *reg) { int r; unsigned int i; - struct dm_io_region from, to[KCOPYD_MAX_REGIONS], *dest; + struct dm_io_region from, to[DM_KCOPYD_MAX_REGIONS], *dest; struct mirror *m; unsigned long flags = 0; @@ -820,9 +820,9 @@ static int recover(struct mirror_set *ms, struct region *reg) } /* hand to kcopyd */ - set_bit(KCOPYD_IGNORE_ERROR, &flags); - r = kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, flags, - recovery_complete, reg); + set_bit(DM_KCOPYD_IGNORE_ERROR, &flags); + r = dm_kcopyd_copy(ms->kcopyd_client, &from, ms->nr_mirrors - 1, to, + flags, recovery_complete, reg); return r; } @@ -1504,7 +1504,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) argc -= args_used; if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 || - nr_mirrors < 2 || nr_mirrors > KCOPYD_MAX_REGIONS + 1) { + nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) { ti->error = "Invalid number of mirrors"; dm_destroy_dirty_log(dl); return -EINVAL; @@ -1569,7 +1569,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err_destroy_wq; } - r = kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); + r = dm_kcopyd_client_create(DM_IO_PAGES, &ms->kcopyd_client); if (r) goto err_destroy_wq; @@ -1588,7 +1588,7 @@ static void mirror_dtr(struct dm_target *ti) struct mirror_set *ms = (struct mirror_set *) ti->private; flush_workqueue(ms->kmirrord_wq); - kcopyd_client_destroy(ms->kcopyd_client); + dm_kcopyd_client_destroy(ms->kcopyd_client); destroy_workqueue(ms->kmirrord_wq); free_context(ms, ti, ms->nr_mirrors); } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index bab159b..e1dcca9 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -558,7 +558,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad4; } - r = kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); + r = dm_kcopyd_client_create(SNAPSHOT_PAGES, &s->kcopyd_client); if (r) { ti->error = "Could not create kcopyd client"; goto bad5; @@ -591,7 +591,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) return 0; bad6: - kcopyd_client_destroy(s->kcopyd_client); + dm_kcopyd_client_destroy(s->kcopyd_client); bad5: s->store.destroy(&s->store); @@ -613,7 +613,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) static void __free_exceptions(struct dm_snapshot *s) { - kcopyd_client_destroy(s->kcopyd_client); + dm_kcopyd_client_destroy(s->kcopyd_client); s->kcopyd_client = NULL; exit_exception_table(&s->pending, pending_cache); @@ -839,7 +839,7 @@ static void start_copy(struct dm_snap_pending_exception *pe) dest.count = src.count; /* Hand over to kcopyd */ - kcopyd_copy(s->kcopyd_client, + dm_kcopyd_copy(s->kcopyd_client, &src, 1, &dest, 0, copy_callback, pe); } diff --git a/drivers/md/dm-snap.h b/drivers/md/dm-snap.h index 434d1db..24f9fb7 100644 --- a/drivers/md/dm-snap.h +++ b/drivers/md/dm-snap.h @@ -169,7 +169,7 @@ struct dm_snapshot { /* The on disk metadata handler */ struct exception_store store; - struct kcopyd_client *kcopyd_client; + struct dm_kcopyd_client *kcopyd_client; /* Queue of snapshot writes for ksnapd to flush */ struct bio_list queued_bios; diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index 21fea42..4f2c61a 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -9,9 +9,8 @@ * completion notification. */ -#include +#include #include - #include #include #include @@ -39,7 +38,7 @@ static void wake(void) * Each kcopyd client has its own little pool of preallocated * pages for kcopyd io. *---------------------------------------------------------------*/ -struct kcopyd_client { +struct dm_kcopyd_client { struct list_head list; spinlock_t lock; @@ -76,7 +75,7 @@ static void free_pl(struct page_list *pl) kfree(pl); } -static int kcopyd_get_pages(struct kcopyd_client *kc, +static int kcopyd_get_pages(struct dm_kcopyd_client *kc, unsigned int nr, struct page_list **pages) { struct page_list *pl; @@ -99,7 +98,7 @@ static int kcopyd_get_pages(struct kcopyd_client *kc, return 0; } -static void kcopyd_put_pages(struct kcopyd_client *kc, struct page_list *pl) +static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) { struct page_list *cursor; @@ -127,7 +126,7 @@ static void drop_pages(struct page_list *pl) } } -static int client_alloc_pages(struct kcopyd_client *kc, unsigned int nr) +static int client_alloc_pages(struct dm_kcopyd_client *kc, unsigned int nr) { unsigned int i; struct page_list *pl = NULL, *next; @@ -148,7 +147,7 @@ static int client_alloc_pages(struct kcopyd_client *kc, unsigned int nr) return 0; } -static void client_free_pages(struct kcopyd_client *kc) +static void client_free_pages(struct dm_kcopyd_client *kc) { BUG_ON(kc->nr_free_pages != kc->nr_pages); drop_pages(kc->pages); @@ -162,7 +161,7 @@ static void client_free_pages(struct kcopyd_client *kc) * ever having to do io (which could cause a deadlock). *---------------------------------------------------------------*/ struct kcopyd_job { - struct kcopyd_client *kc; + struct dm_kcopyd_client *kc; struct list_head list; unsigned long flags; @@ -182,7 +181,7 @@ struct kcopyd_job { * The destinations for the transfer. */ unsigned int num_dests; - struct dm_io_region dests[KCOPYD_MAX_REGIONS]; + struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; sector_t offset; unsigned int nr_pages; @@ -192,7 +191,7 @@ struct kcopyd_job { * Set this to ensure you are notified when the job has * completed. 'context' is for callback to use. */ - kcopyd_notify_fn fn; + dm_kcopyd_notify_fn fn; void *context; /* @@ -295,8 +294,8 @@ static int run_complete_job(struct kcopyd_job *job) void *context = job->context; int read_err = job->read_err; unsigned long write_err = job->write_err; - kcopyd_notify_fn fn = job->fn; - struct kcopyd_client *kc = job->kc; + dm_kcopyd_notify_fn fn = job->fn; + struct dm_kcopyd_client *kc = job->kc; kcopyd_put_pages(kc, job->pages); mempool_free(job, _job_pool); @@ -318,7 +317,7 @@ static void complete_io(unsigned long error, void *context) else job->read_err = 1; - if (!test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) { + if (!test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) { push(&_complete_jobs, job); wake(); return; @@ -470,7 +469,7 @@ static void segment_complete(int read_err, unsigned long write_err, * Only dispatch more work if there hasn't been an error. */ if ((!job->read_err && !job->write_err) || - test_bit(KCOPYD_IGNORE_ERROR, &job->flags)) { + test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) { /* get the next chunk of work */ progress = job->progress; count = job->source.count - progress; @@ -527,9 +526,9 @@ static void split_job(struct kcopyd_job *job) segment_complete(0, 0u, job); } -int kcopyd_copy(struct kcopyd_client *kc, struct dm_io_region *from, - unsigned int num_dests, struct dm_io_region *dests, - unsigned int flags, kcopyd_notify_fn fn, void *context) +int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, + unsigned int num_dests, struct dm_io_region *dests, + unsigned int flags, dm_kcopyd_notify_fn fn, void *context) { struct kcopyd_job *job; @@ -570,6 +569,7 @@ int kcopyd_copy(struct kcopyd_client *kc, struct dm_io_region *from, return 0; } +EXPORT_SYMBOL(dm_kcopyd_copy); /* * Cancels a kcopyd job, eg. someone might be deactivating a @@ -589,14 +589,14 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) static DEFINE_MUTEX(_client_lock); static LIST_HEAD(_clients); -static void client_add(struct kcopyd_client *kc) +static void client_add(struct dm_kcopyd_client *kc) { mutex_lock(&_client_lock); list_add(&kc->list, &_clients); mutex_unlock(&_client_lock); } -static void client_del(struct kcopyd_client *kc) +static void client_del(struct dm_kcopyd_client *kc) { mutex_lock(&_client_lock); list_del(&kc->list); @@ -650,10 +650,11 @@ static void kcopyd_exit(void) mutex_unlock(&kcopyd_init_lock); } -int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result) +int dm_kcopyd_client_create(unsigned int nr_pages, + struct dm_kcopyd_client **result) { int r = 0; - struct kcopyd_client *kc; + struct dm_kcopyd_client *kc; r = kcopyd_init(); if (r) @@ -691,8 +692,9 @@ int kcopyd_client_create(unsigned int nr_pages, struct kcopyd_client **result) *result = kc; return 0; } +EXPORT_SYMBOL(dm_kcopyd_client_create); -void kcopyd_client_destroy(struct kcopyd_client *kc) +void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) { /* Wait for completion of all jobs submitted by this client. */ wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); @@ -703,7 +705,4 @@ void kcopyd_client_destroy(struct kcopyd_client *kc) kfree(kc); kcopyd_exit(); } - -EXPORT_SYMBOL(kcopyd_client_create); -EXPORT_SYMBOL(kcopyd_client_destroy); -EXPORT_SYMBOL(kcopyd_copy); +EXPORT_SYMBOL(dm_kcopyd_client_destroy); diff --git a/drivers/md/kcopyd.h b/drivers/md/kcopyd.h index 588f05d..d305784 100644 --- a/drivers/md/kcopyd.h +++ b/drivers/md/kcopyd.h @@ -1,29 +1,33 @@ /* - * Copyright (C) 2001 Sistina Software + * Copyright (C) 2001 - 2003 Sistina Software + * Copyright (C) 2004 - 2008 Red Hat, Inc. All rights reserved. * - * This file is released under the GPL. + * kcopyd provides a simple interface for copying an area of one + * block-device to one or more other block-devices, either synchronous + * or with an asynchronous completion notification. * - * Kcopyd provides a simple interface for copying an area of one - * block-device to one or more other block-devices, with an asynchronous - * completion notification. + * This file is released under the GPL. */ -#ifndef DM_KCOPYD_H -#define DM_KCOPYD_H +#ifndef _LINUX_DM_KCOPYD_H +#define _LINUX_DM_KCOPYD_H + +#ifdef __KERNEL__ #include "dm-io.h" /* FIXME: make this configurable */ -#define KCOPYD_MAX_REGIONS 8 +#define DM_KCOPYD_MAX_REGIONS 8 -#define KCOPYD_IGNORE_ERROR 1 +#define DM_KCOPYD_IGNORE_ERROR 1 /* - * To use kcopyd you must first create a kcopyd client object. + * To use kcopyd you must first create a dm_kcopyd_client object. */ -struct kcopyd_client; -int kcopyd_client_create(unsigned int num_pages, struct kcopyd_client **result); -void kcopyd_client_destroy(struct kcopyd_client *kc); +struct dm_kcopyd_client; +int dm_kcopyd_client_create(unsigned num_pages, + struct dm_kcopyd_client **result); +void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); /* * Submit a copy job to kcopyd. This is built on top of the @@ -32,11 +36,12 @@ void kcopyd_client_destroy(struct kcopyd_client *kc); * read_err is a boolean, * write_err is a bitset, with 1 bit for each destination region */ -typedef void (*kcopyd_notify_fn)(int read_err, unsigned long write_err, - void *context); +typedef void (*dm_kcopyd_notify_fn)(int read_err, unsigned long write_err, + void *context); -int kcopyd_copy(struct kcopyd_client *kc, struct dm_io_region *from, - unsigned num_dests, struct dm_io_region *dests, - unsigned int flags, kcopyd_notify_fn fn, void *context); +int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, + unsigned num_dests, struct dm_io_region *dests, + unsigned flags, dm_kcopyd_notify_fn fn, void *context); -#endif +#endif /* __KERNEL__ */ +#endif /* _LINUX_DM_KCOPYD_H */ -- cgit v0.10.2 From 416cd17b1982217bca3dc41b9f00b0b38fdaadad Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Thu, 24 Apr 2008 21:43:35 +0100 Subject: dm log: clean interface Clean up the dm-log interface to prepare for publishing it in include/linux. Signed-off-by: Heinz Mauelshagen Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 14f785f..b776701 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -12,13 +12,14 @@ #include "dm-log.h" #include "dm-io.h" +#include "dm.h" #define DM_MSG_PREFIX "dirty region log" static LIST_HEAD(_log_types); static DEFINE_SPINLOCK(_lock); -int dm_register_dirty_log_type(struct dirty_log_type *type) +int dm_dirty_log_type_register(struct dm_dirty_log_type *type) { spin_lock(&_lock); type->use_count = 0; @@ -27,8 +28,9 @@ int dm_register_dirty_log_type(struct dirty_log_type *type) return 0; } +EXPORT_SYMBOL(dm_dirty_log_type_register); -int dm_unregister_dirty_log_type(struct dirty_log_type *type) +int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type) { spin_lock(&_lock); @@ -41,10 +43,11 @@ int dm_unregister_dirty_log_type(struct dirty_log_type *type) return 0; } +EXPORT_SYMBOL(dm_dirty_log_type_unregister); -static struct dirty_log_type *_get_type(const char *type_name) +static struct dm_dirty_log_type *_get_type(const char *type_name) { - struct dirty_log_type *type; + struct dm_dirty_log_type *type; spin_lock(&_lock); list_for_each_entry (type, &_log_types, list) @@ -79,10 +82,10 @@ static struct dirty_log_type *_get_type(const char *type_name) * * Returns: dirty_log_type* on success, NULL on failure */ -static struct dirty_log_type *get_type(const char *type_name) +static struct dm_dirty_log_type *get_type(const char *type_name) { char *p, *type_name_dup; - struct dirty_log_type *type; + struct dm_dirty_log_type *type; type = _get_type(type_name); if (type) @@ -111,7 +114,7 @@ static struct dirty_log_type *get_type(const char *type_name) return type; } -static void put_type(struct dirty_log_type *type) +static void put_type(struct dm_dirty_log_type *type) { spin_lock(&_lock); if (!--type->use_count) @@ -119,11 +122,12 @@ static void put_type(struct dirty_log_type *type) spin_unlock(&_lock); } -struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *ti, - unsigned int argc, char **argv) +struct dm_dirty_log *dm_dirty_log_create(const char *type_name, + struct dm_target *ti, + unsigned int argc, char **argv) { - struct dirty_log_type *type; - struct dirty_log *log; + struct dm_dirty_log_type *type; + struct dm_dirty_log *log; log = kmalloc(sizeof(*log), GFP_KERNEL); if (!log) @@ -144,13 +148,15 @@ struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *t return log; } +EXPORT_SYMBOL(dm_dirty_log_create); -void dm_destroy_dirty_log(struct dirty_log *log) +void dm_dirty_log_destroy(struct dm_dirty_log *log) { log->type->dtr(log); put_type(log->type); kfree(log); } +EXPORT_SYMBOL(dm_dirty_log_destroy); /*----------------------------------------------------------------- * Persistent and core logs share a lot of their implementation. @@ -216,7 +222,7 @@ struct log_c { * The touched member needs to be updated every time we access * one of the bitsets. */ -static inline int log_test_bit(uint32_t *bs, unsigned bit) +static inline int log_test_bit(uint32_t *bs, unsigned bit) { return ext2_test_bit(bit, (unsigned long *) bs) ? 1 : 0; } @@ -303,7 +309,7 @@ static inline int write_header(struct log_c *log) * argv contains region_size followed optionally by [no]sync *--------------------------------------------------------------*/ #define BYTE_SHIFT 3 -static int create_log_context(struct dirty_log *log, struct dm_target *ti, +static int create_log_context(struct dm_dirty_log *log, struct dm_target *ti, unsigned int argc, char **argv, struct dm_dev *dev) { @@ -435,7 +441,7 @@ static int create_log_context(struct dirty_log *log, struct dm_target *ti, return 0; } -static int core_ctr(struct dirty_log *log, struct dm_target *ti, +static int core_ctr(struct dm_dirty_log *log, struct dm_target *ti, unsigned int argc, char **argv) { return create_log_context(log, ti, argc, argv, NULL); @@ -448,7 +454,7 @@ static void destroy_log_context(struct log_c *lc) kfree(lc); } -static void core_dtr(struct dirty_log *log) +static void core_dtr(struct dm_dirty_log *log) { struct log_c *lc = (struct log_c *) log->context; @@ -461,7 +467,7 @@ static void core_dtr(struct dirty_log *log) * * argv contains log_device region_size followed optionally by [no]sync *--------------------------------------------------------------*/ -static int disk_ctr(struct dirty_log *log, struct dm_target *ti, +static int disk_ctr(struct dm_dirty_log *log, struct dm_target *ti, unsigned int argc, char **argv) { int r; @@ -486,7 +492,7 @@ static int disk_ctr(struct dirty_log *log, struct dm_target *ti, return 0; } -static void disk_dtr(struct dirty_log *log) +static void disk_dtr(struct dm_dirty_log *log) { struct log_c *lc = (struct log_c *) log->context; @@ -515,7 +521,7 @@ static void fail_log_device(struct log_c *lc) dm_table_event(lc->ti->table); } -static int disk_resume(struct dirty_log *log) +static int disk_resume(struct dm_dirty_log *log) { int r; unsigned i; @@ -571,38 +577,38 @@ static int disk_resume(struct dirty_log *log) return r; } -static uint32_t core_get_region_size(struct dirty_log *log) +static uint32_t core_get_region_size(struct dm_dirty_log *log) { struct log_c *lc = (struct log_c *) log->context; return lc->region_size; } -static int core_resume(struct dirty_log *log) +static int core_resume(struct dm_dirty_log *log) { struct log_c *lc = (struct log_c *) log->context; lc->sync_search = 0; return 0; } -static int core_is_clean(struct dirty_log *log, region_t region) +static int core_is_clean(struct dm_dirty_log *log, region_t region) { struct log_c *lc = (struct log_c *) log->context; return log_test_bit(lc->clean_bits, region); } -static int core_in_sync(struct dirty_log *log, region_t region, int block) +static int core_in_sync(struct dm_dirty_log *log, region_t region, int block) { struct log_c *lc = (struct log_c *) log->context; return log_test_bit(lc->sync_bits, region); } -static int core_flush(struct dirty_log *log) +static int core_flush(struct dm_dirty_log *log) { /* no op */ return 0; } -static int disk_flush(struct dirty_log *log) +static int disk_flush(struct dm_dirty_log *log) { int r; struct log_c *lc = (struct log_c *) log->context; @@ -620,19 +626,19 @@ static int disk_flush(struct dirty_log *log) return r; } -static void core_mark_region(struct dirty_log *log, region_t region) +static void core_mark_region(struct dm_dirty_log *log, region_t region) { struct log_c *lc = (struct log_c *) log->context; log_clear_bit(lc, lc->clean_bits, region); } -static void core_clear_region(struct dirty_log *log, region_t region) +static void core_clear_region(struct dm_dirty_log *log, region_t region) { struct log_c *lc = (struct log_c *) log->context; log_set_bit(lc, lc->clean_bits, region); } -static int core_get_resync_work(struct dirty_log *log, region_t *region) +static int core_get_resync_work(struct dm_dirty_log *log, region_t *region) { struct log_c *lc = (struct log_c *) log->context; @@ -655,7 +661,7 @@ static int core_get_resync_work(struct dirty_log *log, region_t *region) return 1; } -static void core_set_region_sync(struct dirty_log *log, region_t region, +static void core_set_region_sync(struct dm_dirty_log *log, region_t region, int in_sync) { struct log_c *lc = (struct log_c *) log->context; @@ -670,7 +676,7 @@ static void core_set_region_sync(struct dirty_log *log, region_t region, } } -static region_t core_get_sync_count(struct dirty_log *log) +static region_t core_get_sync_count(struct dm_dirty_log *log) { struct log_c *lc = (struct log_c *) log->context; @@ -681,7 +687,7 @@ static region_t core_get_sync_count(struct dirty_log *log) if (lc->sync != DEFAULTSYNC) \ DMEMIT("%ssync ", lc->sync == NOSYNC ? "no" : "") -static int core_status(struct dirty_log *log, status_type_t status, +static int core_status(struct dm_dirty_log *log, status_type_t status, char *result, unsigned int maxlen) { int sz = 0; @@ -701,7 +707,7 @@ static int core_status(struct dirty_log *log, status_type_t status, return sz; } -static int disk_status(struct dirty_log *log, status_type_t status, +static int disk_status(struct dm_dirty_log *log, status_type_t status, char *result, unsigned int maxlen) { int sz = 0; @@ -723,7 +729,7 @@ static int disk_status(struct dirty_log *log, status_type_t status, return sz; } -static struct dirty_log_type _core_type = { +static struct dm_dirty_log_type _core_type = { .name = "core", .module = THIS_MODULE, .ctr = core_ctr, @@ -741,7 +747,7 @@ static struct dirty_log_type _core_type = { .status = core_status, }; -static struct dirty_log_type _disk_type = { +static struct dm_dirty_log_type _disk_type = { .name = "disk", .module = THIS_MODULE, .ctr = disk_ctr, @@ -764,14 +770,14 @@ int __init dm_dirty_log_init(void) { int r; - r = dm_register_dirty_log_type(&_core_type); + r = dm_dirty_log_type_register(&_core_type); if (r) DMWARN("couldn't register core log"); - r = dm_register_dirty_log_type(&_disk_type); + r = dm_dirty_log_type_register(&_disk_type); if (r) { DMWARN("couldn't register disk type"); - dm_unregister_dirty_log_type(&_core_type); + dm_dirty_log_type_unregister(&_core_type); } return r; @@ -779,15 +785,10 @@ int __init dm_dirty_log_init(void) void __exit dm_dirty_log_exit(void) { - dm_unregister_dirty_log_type(&_disk_type); - dm_unregister_dirty_log_type(&_core_type); + dm_dirty_log_type_unregister(&_disk_type); + dm_dirty_log_type_unregister(&_core_type); } -EXPORT_SYMBOL(dm_register_dirty_log_type); -EXPORT_SYMBOL(dm_unregister_dirty_log_type); -EXPORT_SYMBOL(dm_create_dirty_log); -EXPORT_SYMBOL(dm_destroy_dirty_log); - module_init(dm_dirty_log_init); module_exit(dm_dirty_log_exit); diff --git a/drivers/md/dm-log.h b/drivers/md/dm-log.h index 3fae87e..2da48a8 100644 --- a/drivers/md/dm-log.h +++ b/drivers/md/dm-log.h @@ -1,52 +1,58 @@ /* * Copyright (C) 2003 Sistina Software + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. + * + * Device-Mapper dirty region log. * * This file is released under the LGPL. */ -#ifndef DM_DIRTY_LOG -#define DM_DIRTY_LOG +#ifndef _LINUX_DM_DIRTY_LOG +#define _LINUX_DM_DIRTY_LOG + +#ifdef __KERNEL__ -#include "dm.h" +#include +#include typedef sector_t region_t; -struct dirty_log_type; +struct dm_dirty_log_type; -struct dirty_log { - struct dirty_log_type *type; +struct dm_dirty_log { + struct dm_dirty_log_type *type; void *context; }; -struct dirty_log_type { +struct dm_dirty_log_type { struct list_head list; const char *name; struct module *module; - unsigned int use_count; + unsigned use_count; - int (*ctr)(struct dirty_log *log, struct dm_target *ti, - unsigned int argc, char **argv); - void (*dtr)(struct dirty_log *log); + int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, + unsigned argc, char **argv); + void (*dtr)(struct dm_dirty_log *log); /* * There are times when we don't want the log to touch * the disk. */ - int (*presuspend)(struct dirty_log *log); - int (*postsuspend)(struct dirty_log *log); - int (*resume)(struct dirty_log *log); + int (*presuspend)(struct dm_dirty_log *log); + int (*postsuspend)(struct dm_dirty_log *log); + int (*resume)(struct dm_dirty_log *log); /* * Retrieves the smallest size of region that the log can * deal with. */ - uint32_t (*get_region_size)(struct dirty_log *log); + uint32_t (*get_region_size)(struct dm_dirty_log *log); - /* + /* * A predicate to say whether a region is clean or not. * May block. */ - int (*is_clean)(struct dirty_log *log, region_t region); + int (*is_clean)(struct dm_dirty_log *log, region_t region); /* * Returns: 0, 1, -EWOULDBLOCK, < 0 @@ -59,13 +65,14 @@ struct dirty_log_type { * passed to a daemon to deal with, since a daemon is * allowed to block. */ - int (*in_sync)(struct dirty_log *log, region_t region, int can_block); + int (*in_sync)(struct dm_dirty_log *log, region_t region, + int can_block); /* * Flush the current log state (eg, to disk). This * function may block. */ - int (*flush)(struct dirty_log *log); + int (*flush)(struct dm_dirty_log *log); /* * Mark an area as clean or dirty. These functions may @@ -73,8 +80,8 @@ struct dirty_log_type { * be extremely rare (eg, allocating another chunk of * memory for some reason). */ - void (*mark_region)(struct dirty_log *log, region_t region); - void (*clear_region)(struct dirty_log *log, region_t region); + void (*mark_region)(struct dm_dirty_log *log, region_t region); + void (*clear_region)(struct dm_dirty_log *log, region_t region); /* * Returns: <0 (error), 0 (no region), 1 (region) @@ -88,44 +95,39 @@ struct dirty_log_type { * tells you if an area is synchronised, the other * assigns recovery work. */ - int (*get_resync_work)(struct dirty_log *log, region_t *region); + int (*get_resync_work)(struct dm_dirty_log *log, region_t *region); /* * This notifies the log that the resync status of a region * has changed. It also clears the region from the recovering * list (if present). */ - void (*set_region_sync)(struct dirty_log *log, + void (*set_region_sync)(struct dm_dirty_log *log, region_t region, int in_sync); - /* + /* * Returns the number of regions that are in sync. - */ - region_t (*get_sync_count)(struct dirty_log *log); + */ + region_t (*get_sync_count)(struct dm_dirty_log *log); /* * Support function for mirror status requests. */ - int (*status)(struct dirty_log *log, status_type_t status_type, - char *result, unsigned int maxlen); + int (*status)(struct dm_dirty_log *log, status_type_t status_type, + char *result, unsigned maxlen); }; -int dm_register_dirty_log_type(struct dirty_log_type *type); -int dm_unregister_dirty_log_type(struct dirty_log_type *type); - +int dm_dirty_log_type_register(struct dm_dirty_log_type *type); +int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type); /* * Make sure you use these two functions, rather than calling * type->constructor/destructor() directly. */ -struct dirty_log *dm_create_dirty_log(const char *type_name, struct dm_target *ti, - unsigned int argc, char **argv); -void dm_destroy_dirty_log(struct dirty_log *log); - -/* - * init/exit functions. - */ -int dm_dirty_log_init(void); -void dm_dirty_log_exit(void); +struct dm_dirty_log *dm_dirty_log_create(const char *type_name, + struct dm_target *ti, + unsigned argc, char **argv); +void dm_dirty_log_destroy(struct dm_dirty_log *log); -#endif +#endif /* __KERNEL__ */ +#endif /* _LINUX_DM_DIRTY_LOG_H */ diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 32c7c6d..5beeced 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -74,7 +74,7 @@ struct region_hash { unsigned region_shift; /* holds persistent region state */ - struct dirty_log *log; + struct dm_dirty_log *log; /* hash table */ rwlock_t hash_lock; @@ -184,7 +184,7 @@ static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw); #define MIN_REGIONS 64 #define MAX_RECOVERY 1 static int rh_init(struct region_hash *rh, struct mirror_set *ms, - struct dirty_log *log, uint32_t region_size, + struct dm_dirty_log *log, uint32_t region_size, region_t nr_regions) { unsigned int nr_buckets, max_buckets; @@ -249,7 +249,7 @@ static void rh_exit(struct region_hash *rh) } if (rh->log) - dm_destroy_dirty_log(rh->log); + dm_dirty_log_destroy(rh->log); if (rh->region_pool) mempool_destroy(rh->region_pool); vfree(rh->buckets); @@ -831,7 +831,7 @@ static void do_recovery(struct mirror_set *ms) { int r; struct region *reg; - struct dirty_log *log = ms->rh.log; + struct dm_dirty_log *log = ms->rh.log; /* * Start quiescing some regions. @@ -1017,7 +1017,7 @@ static void __bio_mark_nosync(struct mirror_set *ms, { unsigned long flags; struct region_hash *rh = &ms->rh; - struct dirty_log *log = ms->rh.log; + struct dm_dirty_log *log = ms->rh.log; struct region *reg; region_t region = bio_to_region(rh, bio); int recovering = 0; @@ -1301,7 +1301,7 @@ static void do_mirror(struct work_struct *work) static struct mirror_set *alloc_context(unsigned int nr_mirrors, uint32_t region_size, struct dm_target *ti, - struct dirty_log *dl) + struct dm_dirty_log *dl) { size_t len; struct mirror_set *ms = NULL; @@ -1401,12 +1401,12 @@ static int get_mirror(struct mirror_set *ms, struct dm_target *ti, /* * Create dirty log: log_type #log_params */ -static struct dirty_log *create_dirty_log(struct dm_target *ti, +static struct dm_dirty_log *create_dirty_log(struct dm_target *ti, unsigned int argc, char **argv, unsigned int *args_used) { unsigned int param_count; - struct dirty_log *dl; + struct dm_dirty_log *dl; if (argc < 2) { ti->error = "Insufficient mirror log arguments"; @@ -1425,7 +1425,7 @@ static struct dirty_log *create_dirty_log(struct dm_target *ti, return NULL; } - dl = dm_create_dirty_log(argv[0], ti, param_count, argv + 2); + dl = dm_dirty_log_create(argv[0], ti, param_count, argv + 2); if (!dl) { ti->error = "Error creating mirror dirty log"; return NULL; @@ -1433,7 +1433,7 @@ static struct dirty_log *create_dirty_log(struct dm_target *ti, if (!_check_region_size(ti, dl->type->get_region_size(dl))) { ti->error = "Invalid region size"; - dm_destroy_dirty_log(dl); + dm_dirty_log_destroy(dl); return NULL; } @@ -1494,7 +1494,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) int r; unsigned int nr_mirrors, m, args_used; struct mirror_set *ms; - struct dirty_log *dl; + struct dm_dirty_log *dl; dl = create_dirty_log(ti, argc, argv, &args_used); if (!dl) @@ -1506,7 +1506,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (!argc || sscanf(argv[0], "%u", &nr_mirrors) != 1 || nr_mirrors < 2 || nr_mirrors > DM_KCOPYD_MAX_REGIONS + 1) { ti->error = "Invalid number of mirrors"; - dm_destroy_dirty_log(dl); + dm_dirty_log_destroy(dl); return -EINVAL; } @@ -1514,13 +1514,13 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (argc < nr_mirrors * 2) { ti->error = "Too few mirror arguments"; - dm_destroy_dirty_log(dl); + dm_dirty_log_destroy(dl); return -EINVAL; } ms = alloc_context(nr_mirrors, dl->type->get_region_size(dl), ti, dl); if (!ms) { - dm_destroy_dirty_log(dl); + dm_dirty_log_destroy(dl); return -ENOMEM; } @@ -1732,7 +1732,7 @@ out: static void mirror_presuspend(struct dm_target *ti) { struct mirror_set *ms = (struct mirror_set *) ti->private; - struct dirty_log *log = ms->rh.log; + struct dm_dirty_log *log = ms->rh.log; atomic_set(&ms->suspend, 1); @@ -1761,7 +1761,7 @@ static void mirror_presuspend(struct dm_target *ti) static void mirror_postsuspend(struct dm_target *ti) { struct mirror_set *ms = ti->private; - struct dirty_log *log = ms->rh.log; + struct dm_dirty_log *log = ms->rh.log; if (log->type->postsuspend && log->type->postsuspend(log)) /* FIXME: need better error handling */ @@ -1771,7 +1771,7 @@ static void mirror_postsuspend(struct dm_target *ti) static void mirror_resume(struct dm_target *ti) { struct mirror_set *ms = ti->private; - struct dirty_log *log = ms->rh.log; + struct dm_dirty_log *log = ms->rh.log; atomic_set(&ms->suspend, 0); if (log->type->resume && log->type->resume(log)) @@ -1809,7 +1809,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type, { unsigned int m, sz = 0; struct mirror_set *ms = (struct mirror_set *) ti->private; - struct dirty_log *log = ms->rh.log; + struct dm_dirty_log *log = ms->rh.log; char buffer[ms->nr_mirrors + 1]; switch (type) { diff --git a/drivers/md/dm.h b/drivers/md/dm.h index b4584a3..17f2d6a 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -189,4 +189,10 @@ int dm_lock_for_deletion(struct mapped_device *md); void dm_kobject_uevent(struct mapped_device *md); +/* + * Dirty log + */ +int dm_dirty_log_init(void); +void dm_dirty_log_exit(void); + #endif diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index cb78457..4db2337 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -10,6 +10,8 @@ #ifdef __KERNEL__ +#include + struct dm_target; struct dm_table; struct dm_dev; -- cgit v0.10.2 From b8206bc3de0b0665d47655d270c18ea46aff5372 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 24 Apr 2008 21:43:38 +0100 Subject: dm log: move register functions Reorder a couple of functions in the file so the next patch is readable. Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index b776701..82df73f 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -19,32 +19,6 @@ static LIST_HEAD(_log_types); static DEFINE_SPINLOCK(_lock); -int dm_dirty_log_type_register(struct dm_dirty_log_type *type) -{ - spin_lock(&_lock); - type->use_count = 0; - list_add(&type->list, &_log_types); - spin_unlock(&_lock); - - return 0; -} -EXPORT_SYMBOL(dm_dirty_log_type_register); - -int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type) -{ - spin_lock(&_lock); - - if (type->use_count) - DMWARN("Attempt to unregister a log type that is still in use"); - else - list_del(&type->list); - - spin_unlock(&_lock); - - return 0; -} -EXPORT_SYMBOL(dm_dirty_log_type_unregister); - static struct dm_dirty_log_type *_get_type(const char *type_name) { struct dm_dirty_log_type *type; @@ -122,6 +96,32 @@ static void put_type(struct dm_dirty_log_type *type) spin_unlock(&_lock); } +int dm_dirty_log_type_register(struct dm_dirty_log_type *type) +{ + spin_lock(&_lock); + type->use_count = 0; + list_add(&type->list, &_log_types); + spin_unlock(&_lock); + + return 0; +} +EXPORT_SYMBOL(dm_dirty_log_type_register); + +int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type) +{ + spin_lock(&_lock); + + if (type->use_count) + DMWARN("Attempt to unregister a log type that is still in use"); + else + list_del(&type->list); + + spin_unlock(&_lock); + + return 0; +} +EXPORT_SYMBOL(dm_dirty_log_type_unregister); + struct dm_dirty_log *dm_dirty_log_create(const char *type_name, struct dm_target *ti, unsigned int argc, char **argv) -- cgit v0.10.2 From 2a23aa1ddb1f0c9eef2c929c89565c387f6bf68b Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Thu, 24 Apr 2008 21:43:41 +0100 Subject: dm log: make module use tracking internal Remove internal module reference fields from the interface. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 82df73f..e6b6a9d 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -16,34 +16,51 @@ #define DM_MSG_PREFIX "dirty region log" +struct dm_dirty_log_internal { + struct dm_dirty_log_type *type; + + struct list_head list; + long use; +}; + static LIST_HEAD(_log_types); static DEFINE_SPINLOCK(_lock); -static struct dm_dirty_log_type *_get_type(const char *type_name) +static struct dm_dirty_log_internal *__find_dirty_log_type(const char *name) { - struct dm_dirty_log_type *type; + struct dm_dirty_log_internal *log_type; + + list_for_each_entry(log_type, &_log_types, list) + if (!strcmp(name, log_type->type->name)) + return log_type; + + return NULL; +} + +static struct dm_dirty_log_internal *_get_dirty_log_type(const char *name) +{ + struct dm_dirty_log_internal *log_type; spin_lock(&_lock); - list_for_each_entry (type, &_log_types, list) - if (!strcmp(type_name, type->name)) { - if (!type->use_count && !try_module_get(type->module)){ - spin_unlock(&_lock); - return NULL; - } - type->use_count++; - spin_unlock(&_lock); - return type; - } + + log_type = __find_dirty_log_type(name); + if (log_type) { + if (!log_type->use && !try_module_get(log_type->type->module)) + log_type = NULL; + else + log_type->use++; + } spin_unlock(&_lock); - return NULL; + + return log_type; } /* * get_type * @type_name * - * Attempt to retrieve the dirty_log_type by name. If not already + * Attempt to retrieve the dm_dirty_log_type by name. If not already * available, attempt to load the appropriate module. * * Log modules are named "dm-log-" followed by the 'type_name'. @@ -59,11 +76,14 @@ static struct dm_dirty_log_type *_get_type(const char *type_name) static struct dm_dirty_log_type *get_type(const char *type_name) { char *p, *type_name_dup; - struct dm_dirty_log_type *type; + struct dm_dirty_log_internal *log_type; - type = _get_type(type_name); - if (type) - return type; + if (!type_name) + return NULL; + + log_type = _get_dirty_log_type(type_name); + if (log_type) + return log_type->type; type_name_dup = kstrdup(type_name, GFP_KERNEL); if (!type_name_dup) { @@ -73,50 +93,95 @@ static struct dm_dirty_log_type *get_type(const char *type_name) } while (request_module("dm-log-%s", type_name_dup) || - !(type = _get_type(type_name))) { + !(log_type = _get_dirty_log_type(type_name))) { p = strrchr(type_name_dup, '-'); if (!p) break; p[0] = '\0'; } - if (!type) + if (!log_type) DMWARN("Module for logging type \"%s\" not found.", type_name); kfree(type_name_dup); - return type; + return log_type ? log_type->type : NULL; } static void put_type(struct dm_dirty_log_type *type) { + struct dm_dirty_log_internal *log_type; + + if (!type) + return; + spin_lock(&_lock); - if (!--type->use_count) + log_type = __find_dirty_log_type(type->name); + if (!log_type) + goto out; + + if (!--log_type->use) module_put(type->module); + + BUG_ON(log_type->use < 0); + +out: spin_unlock(&_lock); } +static struct dm_dirty_log_internal *_alloc_dirty_log_type(struct dm_dirty_log_type *type) +{ + struct dm_dirty_log_internal *log_type = kzalloc(sizeof(*log_type), + GFP_KERNEL); + + if (log_type) + log_type->type = type; + + return log_type; +} + int dm_dirty_log_type_register(struct dm_dirty_log_type *type) { + struct dm_dirty_log_internal *log_type = _alloc_dirty_log_type(type); + int r = 0; + + if (!log_type) + return -ENOMEM; + spin_lock(&_lock); - type->use_count = 0; - list_add(&type->list, &_log_types); + if (!__find_dirty_log_type(type->name)) + list_add(&log_type->list, &_log_types); + else { + kfree(log_type); + r = -EEXIST; + } spin_unlock(&_lock); - return 0; + return r; } EXPORT_SYMBOL(dm_dirty_log_type_register); int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type) { + struct dm_dirty_log_internal *log_type; + spin_lock(&_lock); - if (type->use_count) - DMWARN("Attempt to unregister a log type that is still in use"); - else - list_del(&type->list); + log_type = __find_dirty_log_type(type->name); + if (!log_type) { + spin_unlock(&_lock); + return -EINVAL; + } + + if (log_type->use) { + spin_unlock(&_lock); + return -ETXTBSY; + } + + list_del(&log_type->list); spin_unlock(&_lock); + kfree(log_type); return 0; } diff --git a/drivers/md/dm-log.h b/drivers/md/dm-log.h index 2da48a8..600c5fb 100644 --- a/drivers/md/dm-log.h +++ b/drivers/md/dm-log.h @@ -25,10 +25,8 @@ struct dm_dirty_log { }; struct dm_dirty_log_type { - struct list_head list; const char *name; struct module *module; - unsigned use_count; int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, unsigned argc, char **argv); -- cgit v0.10.2 From 8c0cbc2f79bb222d21b466422fde71fcc9bd37e3 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 24 Apr 2008 21:43:44 +0100 Subject: dm kcopyd: per device Make one kcopyd thread per device. The original shared kcopyd could deadlock. Configuration: diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index 4f2c61a..3fb6c83 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -26,14 +26,6 @@ #include "kcopyd.h" #include "dm.h" -static struct workqueue_struct *_kcopyd_wq; -static struct work_struct _kcopyd_work; - -static void wake(void) -{ - queue_work(_kcopyd_wq, &_kcopyd_work); -} - /*----------------------------------------------------------------- * Each kcopyd client has its own little pool of preallocated * pages for kcopyd io. @@ -50,8 +42,30 @@ struct dm_kcopyd_client { wait_queue_head_t destroyq; atomic_t nr_jobs; + + struct workqueue_struct *kcopyd_wq; + struct work_struct kcopyd_work; + +/* + * We maintain three lists of jobs: + * + * i) jobs waiting for pages + * ii) jobs that have pages, and are waiting for the io to be issued. + * iii) jobs that have completed. + * + * All three of these are protected by job_lock. + */ + spinlock_t job_lock; + struct list_head complete_jobs; + struct list_head io_jobs; + struct list_head pages_jobs; }; +static void wake(struct dm_kcopyd_client *kc) +{ + queue_work(kc->kcopyd_wq, &kc->kcopyd_work); +} + static struct page_list *alloc_pl(void) { struct page_list *pl; @@ -209,21 +223,6 @@ struct kcopyd_job { static struct kmem_cache *_job_cache; static mempool_t *_job_pool; -/* - * We maintain three lists of jobs: - * - * i) jobs waiting for pages - * ii) jobs that have pages, and are waiting for the io to be issued. - * iii) jobs that have completed. - * - * All three of these are protected by job_lock. - */ -static DEFINE_SPINLOCK(_job_lock); - -static LIST_HEAD(_complete_jobs); -static LIST_HEAD(_io_jobs); -static LIST_HEAD(_pages_jobs); - static int jobs_init(void) { _job_cache = KMEM_CACHE(kcopyd_job, 0); @@ -241,10 +240,6 @@ static int jobs_init(void) static void jobs_exit(void) { - BUG_ON(!list_empty(&_complete_jobs)); - BUG_ON(!list_empty(&_io_jobs)); - BUG_ON(!list_empty(&_pages_jobs)); - mempool_destroy(_job_pool); kmem_cache_destroy(_job_cache); _job_pool = NULL; @@ -255,18 +250,19 @@ static void jobs_exit(void) * Functions to push and pop a job onto the head of a given job * list. */ -static struct kcopyd_job *pop(struct list_head *jobs) +static struct kcopyd_job *pop(struct list_head *jobs, + struct dm_kcopyd_client *kc) { struct kcopyd_job *job = NULL; unsigned long flags; - spin_lock_irqsave(&_job_lock, flags); + spin_lock_irqsave(&kc->job_lock, flags); if (!list_empty(jobs)) { job = list_entry(jobs->next, struct kcopyd_job, list); list_del(&job->list); } - spin_unlock_irqrestore(&_job_lock, flags); + spin_unlock_irqrestore(&kc->job_lock, flags); return job; } @@ -274,10 +270,11 @@ static struct kcopyd_job *pop(struct list_head *jobs) static void push(struct list_head *jobs, struct kcopyd_job *job) { unsigned long flags; + struct dm_kcopyd_client *kc = job->kc; - spin_lock_irqsave(&_job_lock, flags); + spin_lock_irqsave(&kc->job_lock, flags); list_add_tail(&job->list, jobs); - spin_unlock_irqrestore(&_job_lock, flags); + spin_unlock_irqrestore(&kc->job_lock, flags); } /* @@ -310,6 +307,7 @@ static int run_complete_job(struct kcopyd_job *job) static void complete_io(unsigned long error, void *context) { struct kcopyd_job *job = (struct kcopyd_job *) context; + struct dm_kcopyd_client *kc = job->kc; if (error) { if (job->rw == WRITE) @@ -318,21 +316,21 @@ static void complete_io(unsigned long error, void *context) job->read_err = 1; if (!test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) { - push(&_complete_jobs, job); - wake(); + push(&kc->complete_jobs, job); + wake(kc); return; } } if (job->rw == WRITE) - push(&_complete_jobs, job); + push(&kc->complete_jobs, job); else { job->rw = WRITE; - push(&_io_jobs, job); + push(&kc->io_jobs, job); } - wake(); + wake(kc); } /* @@ -369,7 +367,7 @@ static int run_pages_job(struct kcopyd_job *job) r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages); if (!r) { /* this job is ready for io */ - push(&_io_jobs, job); + push(&job->kc->io_jobs, job); return 0; } @@ -384,12 +382,13 @@ static int run_pages_job(struct kcopyd_job *job) * Run through a list for as long as possible. Returns the count * of successful jobs. */ -static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) +static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, + int (*fn) (struct kcopyd_job *)) { struct kcopyd_job *job; int r, count = 0; - while ((job = pop(jobs))) { + while ((job = pop(jobs, kc))) { r = fn(job); @@ -399,7 +398,7 @@ static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) job->write_err = (unsigned long) -1L; else job->read_err = 1; - push(&_complete_jobs, job); + push(&kc->complete_jobs, job); break; } @@ -421,8 +420,11 @@ static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *)) /* * kcopyd does this every time it's woken up. */ -static void do_work(struct work_struct *ignored) +static void do_work(struct work_struct *work) { + struct dm_kcopyd_client *kc = container_of(work, + struct dm_kcopyd_client, kcopyd_work); + /* * The order that these are called is *very* important. * complete jobs can free some pages for pages jobs. @@ -430,9 +432,9 @@ static void do_work(struct work_struct *ignored) * list. io jobs call wake when they complete and it all * starts again. */ - process_jobs(&_complete_jobs, run_complete_job); - process_jobs(&_pages_jobs, run_pages_job); - process_jobs(&_io_jobs, run_io_job); + process_jobs(&kc->complete_jobs, kc, run_complete_job); + process_jobs(&kc->pages_jobs, kc, run_pages_job); + process_jobs(&kc->io_jobs, kc, run_io_job); } /* @@ -442,9 +444,10 @@ static void do_work(struct work_struct *ignored) */ static void dispatch_job(struct kcopyd_job *job) { - atomic_inc(&job->kc->nr_jobs); - push(&_pages_jobs, job); - wake(); + struct dm_kcopyd_client *kc = job->kc; + atomic_inc(&kc->nr_jobs); + push(&kc->pages_jobs, job); + wake(kc); } #define SUB_JOB_SIZE 128 @@ -625,15 +628,7 @@ static int kcopyd_init(void) return r; } - _kcopyd_wq = create_singlethread_workqueue("kcopyd"); - if (!_kcopyd_wq) { - jobs_exit(); - mutex_unlock(&kcopyd_init_lock); - return -ENOMEM; - } - kcopyd_clients++; - INIT_WORK(&_kcopyd_work, do_work); mutex_unlock(&kcopyd_init_lock); return 0; } @@ -644,8 +639,6 @@ static void kcopyd_exit(void) kcopyd_clients--; if (!kcopyd_clients) { jobs_exit(); - destroy_workqueue(_kcopyd_wq); - _kcopyd_wq = NULL; } mutex_unlock(&kcopyd_init_lock); } @@ -662,15 +655,31 @@ int dm_kcopyd_client_create(unsigned int nr_pages, kc = kmalloc(sizeof(*kc), GFP_KERNEL); if (!kc) { + r = -ENOMEM; kcopyd_exit(); - return -ENOMEM; + return r; } spin_lock_init(&kc->lock); + spin_lock_init(&kc->job_lock); + INIT_LIST_HEAD(&kc->complete_jobs); + INIT_LIST_HEAD(&kc->io_jobs); + INIT_LIST_HEAD(&kc->pages_jobs); + + INIT_WORK(&kc->kcopyd_work, do_work); + kc->kcopyd_wq = create_singlethread_workqueue("kcopyd"); + if (!kc->kcopyd_wq) { + r = -ENOMEM; + kfree(kc); + kcopyd_exit(); + return r; + } + kc->pages = NULL; kc->nr_pages = kc->nr_free_pages = 0; r = client_alloc_pages(kc, nr_pages); if (r) { + destroy_workqueue(kc->kcopyd_wq); kfree(kc); kcopyd_exit(); return r; @@ -680,6 +689,7 @@ int dm_kcopyd_client_create(unsigned int nr_pages, if (IS_ERR(kc->io_client)) { r = PTR_ERR(kc->io_client); client_free_pages(kc); + destroy_workqueue(kc->kcopyd_wq); kfree(kc); kcopyd_exit(); return r; @@ -699,6 +709,10 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) /* Wait for completion of all jobs submitted by this client. */ wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); + BUG_ON(!list_empty(&kc->complete_jobs)); + BUG_ON(!list_empty(&kc->io_jobs)); + BUG_ON(!list_empty(&kc->pages_jobs)); + destroy_workqueue(kc->kcopyd_wq); dm_io_client_destroy(kc->io_client); client_free_pages(kc); client_del(kc); -- cgit v0.10.2 From 08d8757a4d52d21d825b9170af36f2696d1da1a8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 24 Apr 2008 21:43:46 +0100 Subject: dm kcopyd: private mempool Change the global mempool in kcopyd into a per-device mempool to avoid deadlock possibilities. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index 3fb6c83..0b2907d 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -43,6 +43,8 @@ struct dm_kcopyd_client { wait_queue_head_t destroyq; atomic_t nr_jobs; + mempool_t *job_pool; + struct workqueue_struct *kcopyd_wq; struct work_struct kcopyd_work; @@ -221,7 +223,6 @@ struct kcopyd_job { #define MIN_JOBS 512 static struct kmem_cache *_job_cache; -static mempool_t *_job_pool; static int jobs_init(void) { @@ -229,20 +230,12 @@ static int jobs_init(void) if (!_job_cache) return -ENOMEM; - _job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); - if (!_job_pool) { - kmem_cache_destroy(_job_cache); - return -ENOMEM; - } - return 0; } static void jobs_exit(void) { - mempool_destroy(_job_pool); kmem_cache_destroy(_job_cache); - _job_pool = NULL; _job_cache = NULL; } @@ -295,7 +288,7 @@ static int run_complete_job(struct kcopyd_job *job) struct dm_kcopyd_client *kc = job->kc; kcopyd_put_pages(kc, job->pages); - mempool_free(job, _job_pool); + mempool_free(job, kc->job_pool); fn(read_err, write_err, context); if (atomic_dec_and_test(&kc->nr_jobs)) @@ -487,7 +480,8 @@ static void segment_complete(int read_err, unsigned long write_err, if (count) { int i; - struct kcopyd_job *sub_job = mempool_alloc(_job_pool, GFP_NOIO); + struct kcopyd_job *sub_job = mempool_alloc(job->kc->job_pool, + GFP_NOIO); *sub_job = *job; sub_job->source.sector += progress; @@ -511,7 +505,7 @@ static void segment_complete(int read_err, unsigned long write_err, * after we've completed. */ job->fn(read_err, write_err, job->context); - mempool_free(job, _job_pool); + mempool_free(job, job->kc->job_pool); } } @@ -538,7 +532,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, /* * Allocate a new job. */ - job = mempool_alloc(_job_pool, GFP_NOIO); + job = mempool_alloc(kc->job_pool, GFP_NOIO); /* * set up for the read. @@ -666,10 +660,19 @@ int dm_kcopyd_client_create(unsigned int nr_pages, INIT_LIST_HEAD(&kc->io_jobs); INIT_LIST_HEAD(&kc->pages_jobs); + kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); + if (!kc->job_pool) { + r = -ENOMEM; + kfree(kc); + kcopyd_exit(); + return r; + } + INIT_WORK(&kc->kcopyd_work, do_work); kc->kcopyd_wq = create_singlethread_workqueue("kcopyd"); if (!kc->kcopyd_wq) { r = -ENOMEM; + mempool_destroy(kc->job_pool); kfree(kc); kcopyd_exit(); return r; @@ -680,6 +683,7 @@ int dm_kcopyd_client_create(unsigned int nr_pages, r = client_alloc_pages(kc, nr_pages); if (r) { destroy_workqueue(kc->kcopyd_wq); + mempool_destroy(kc->job_pool); kfree(kc); kcopyd_exit(); return r; @@ -690,6 +694,7 @@ int dm_kcopyd_client_create(unsigned int nr_pages, r = PTR_ERR(kc->io_client); client_free_pages(kc); destroy_workqueue(kc->kcopyd_wq); + mempool_destroy(kc->job_pool); kfree(kc); kcopyd_exit(); return r; @@ -716,6 +721,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) dm_io_client_destroy(kc->io_client); client_free_pages(kc); client_del(kc); + mempool_destroy(kc->job_pool); kfree(kc); kcopyd_exit(); } -- cgit v0.10.2 From 945fa4d283a3a472186c11028f6fea1e77a91d14 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 24 Apr 2008 21:43:49 +0100 Subject: dm kcopyd: remove redundant client counting Remove client counting code that is no longer needed. Initialization and destruction is made globally from dm_init and dm_exit and is not based on client counts. Initialization allocates only one empty slab cache, so there is no negative impact from performing the initialization always, regardless of whether some client uses kcopyd or not. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 6617ce4..11f4ffe 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -204,6 +204,7 @@ static int (*_inits[])(void) __initdata = { dm_target_init, dm_linear_init, dm_stripe_init, + dm_kcopyd_init, dm_interface_init, }; @@ -212,6 +213,7 @@ static void (*_exits[])(void) = { dm_target_exit, dm_linear_exit, dm_stripe_exit, + dm_kcopyd_exit, dm_interface_exit, }; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 17f2d6a..9a6023c 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -195,4 +195,7 @@ void dm_kobject_uevent(struct mapped_device *md); int dm_dirty_log_init(void); void dm_dirty_log_exit(void); +int dm_kcopyd_init(void); +void dm_kcopyd_exit(void); + #endif diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c index 0b2907d..1734584 100644 --- a/drivers/md/kcopyd.c +++ b/drivers/md/kcopyd.c @@ -31,8 +31,6 @@ * pages for kcopyd io. *---------------------------------------------------------------*/ struct dm_kcopyd_client { - struct list_head list; - spinlock_t lock; struct page_list *pages; unsigned int nr_pages; @@ -224,7 +222,7 @@ struct kcopyd_job { static struct kmem_cache *_job_cache; -static int jobs_init(void) +int __init dm_kcopyd_init(void) { _job_cache = KMEM_CACHE(kcopyd_job, 0); if (!_job_cache) @@ -233,7 +231,7 @@ static int jobs_init(void) return 0; } -static void jobs_exit(void) +void dm_kcopyd_exit(void) { kmem_cache_destroy(_job_cache); _job_cache = NULL; @@ -581,78 +579,17 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) #endif /* 0 */ /*----------------------------------------------------------------- - * Unit setup + * Client setup *---------------------------------------------------------------*/ -static DEFINE_MUTEX(_client_lock); -static LIST_HEAD(_clients); - -static void client_add(struct dm_kcopyd_client *kc) -{ - mutex_lock(&_client_lock); - list_add(&kc->list, &_clients); - mutex_unlock(&_client_lock); -} - -static void client_del(struct dm_kcopyd_client *kc) -{ - mutex_lock(&_client_lock); - list_del(&kc->list); - mutex_unlock(&_client_lock); -} - -static DEFINE_MUTEX(kcopyd_init_lock); -static int kcopyd_clients = 0; - -static int kcopyd_init(void) -{ - int r; - - mutex_lock(&kcopyd_init_lock); - - if (kcopyd_clients) { - /* Already initialized. */ - kcopyd_clients++; - mutex_unlock(&kcopyd_init_lock); - return 0; - } - - r = jobs_init(); - if (r) { - mutex_unlock(&kcopyd_init_lock); - return r; - } - - kcopyd_clients++; - mutex_unlock(&kcopyd_init_lock); - return 0; -} - -static void kcopyd_exit(void) -{ - mutex_lock(&kcopyd_init_lock); - kcopyd_clients--; - if (!kcopyd_clients) { - jobs_exit(); - } - mutex_unlock(&kcopyd_init_lock); -} - int dm_kcopyd_client_create(unsigned int nr_pages, struct dm_kcopyd_client **result) { - int r = 0; + int r = -ENOMEM; struct dm_kcopyd_client *kc; - r = kcopyd_init(); - if (r) - return r; - kc = kmalloc(sizeof(*kc), GFP_KERNEL); - if (!kc) { - r = -ENOMEM; - kcopyd_exit(); - return r; - } + if (!kc) + return -ENOMEM; spin_lock_init(&kc->lock); spin_lock_init(&kc->job_lock); @@ -661,51 +598,42 @@ int dm_kcopyd_client_create(unsigned int nr_pages, INIT_LIST_HEAD(&kc->pages_jobs); kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); - if (!kc->job_pool) { - r = -ENOMEM; - kfree(kc); - kcopyd_exit(); - return r; - } + if (!kc->job_pool) + goto bad_slab; INIT_WORK(&kc->kcopyd_work, do_work); kc->kcopyd_wq = create_singlethread_workqueue("kcopyd"); - if (!kc->kcopyd_wq) { - r = -ENOMEM; - mempool_destroy(kc->job_pool); - kfree(kc); - kcopyd_exit(); - return r; - } + if (!kc->kcopyd_wq) + goto bad_workqueue; kc->pages = NULL; kc->nr_pages = kc->nr_free_pages = 0; r = client_alloc_pages(kc, nr_pages); - if (r) { - destroy_workqueue(kc->kcopyd_wq); - mempool_destroy(kc->job_pool); - kfree(kc); - kcopyd_exit(); - return r; - } + if (r) + goto bad_client_pages; kc->io_client = dm_io_client_create(nr_pages); if (IS_ERR(kc->io_client)) { r = PTR_ERR(kc->io_client); - client_free_pages(kc); - destroy_workqueue(kc->kcopyd_wq); - mempool_destroy(kc->job_pool); - kfree(kc); - kcopyd_exit(); - return r; + goto bad_io_client; } init_waitqueue_head(&kc->destroyq); atomic_set(&kc->nr_jobs, 0); - client_add(kc); *result = kc; return 0; + +bad_io_client: + client_free_pages(kc); +bad_client_pages: + destroy_workqueue(kc->kcopyd_wq); +bad_workqueue: + mempool_destroy(kc->job_pool); +bad_slab: + kfree(kc); + + return r; } EXPORT_SYMBOL(dm_kcopyd_client_create); @@ -720,9 +648,7 @@ void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) destroy_workqueue(kc->kcopyd_wq); dm_io_client_destroy(kc->io_client); client_free_pages(kc); - client_del(kc); mempool_destroy(kc->job_pool); kfree(kc); - kcopyd_exit(); } EXPORT_SYMBOL(dm_kcopyd_client_destroy); -- cgit v0.10.2 From 0da336e5fab75c712ba8c67f3135d5a20528465f Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 24 Apr 2008 21:43:52 +0100 Subject: dm: expose macros Make dm.h macros and inlines available in include/linux/device-mapper.h Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 9a6023c..8c03b63 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -16,67 +16,6 @@ #include #include -#define DM_NAME "device-mapper" - -#define DMERR(f, arg...) \ - printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) -#define DMERR_LIMIT(f, arg...) \ - do { \ - if (printk_ratelimit()) \ - printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " \ - f "\n", ## arg); \ - } while (0) - -#define DMWARN(f, arg...) \ - printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) -#define DMWARN_LIMIT(f, arg...) \ - do { \ - if (printk_ratelimit()) \ - printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " \ - f "\n", ## arg); \ - } while (0) - -#define DMINFO(f, arg...) \ - printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) -#define DMINFO_LIMIT(f, arg...) \ - do { \ - if (printk_ratelimit()) \ - printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f \ - "\n", ## arg); \ - } while (0) - -#ifdef CONFIG_DM_DEBUG -# define DMDEBUG(f, arg...) \ - printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX " DEBUG: " f "\n", ## arg) -# define DMDEBUG_LIMIT(f, arg...) \ - do { \ - if (printk_ratelimit()) \ - printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX ": " f \ - "\n", ## arg); \ - } while (0) -#else -# define DMDEBUG(f, arg...) do {} while (0) -# define DMDEBUG_LIMIT(f, arg...) do {} while (0) -#endif - -#define DMEMIT(x...) sz += ((sz >= maxlen) ? \ - 0 : scnprintf(result + sz, maxlen - sz, x)) - -#define SECTOR_SHIFT 9 - -/* - * Definitions of return values from target end_io function. - */ -#define DM_ENDIO_INCOMPLETE 1 -#define DM_ENDIO_REQUEUE 2 - -/* - * Definitions of return values from target map function. - */ -#define DM_MAPIO_SUBMITTED 0 -#define DM_MAPIO_REMAPPED 1 -#define DM_MAPIO_REQUEUE DM_ENDIO_REQUEUE - /* * Suspend feature flags */ @@ -136,34 +75,6 @@ static inline int array_too_big(unsigned long fixed, unsigned long obj, return (num > (ULONG_MAX - fixed) / obj); } -/* - * Ceiling(n / sz) - */ -#define dm_div_up(n, sz) (((n) + (sz) - 1) / (sz)) - -#define dm_sector_div_up(n, sz) ( \ -{ \ - sector_t _r = ((n) + (sz) - 1); \ - sector_div(_r, (sz)); \ - _r; \ -} \ -) - -/* - * ceiling(n / size) * size - */ -#define dm_round_up(n, sz) (dm_div_up((n), (sz)) * (sz)) - -static inline sector_t to_sector(unsigned long n) -{ - return (n >> 9); -} - -static inline unsigned long to_bytes(sector_t n) -{ - return (n << 9); -} - int dm_split_args(int *argc, char ***argvp, char *input); /* diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 4db2337..a68829e 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2001 Sistina Software (UK) Limited. - * Copyright (C) 2004 Red Hat, Inc. All rights reserved. + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. * * This file is released under the LGPL. */ @@ -258,5 +258,97 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *t); */ int dm_create_error_table(struct dm_table **result, struct mapped_device *md); +/*----------------------------------------------------------------- + * Macros. + *---------------------------------------------------------------*/ +#define DM_NAME "device-mapper" + +#define DMERR(f, arg...) \ + printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) +#define DMERR_LIMIT(f, arg...) \ + do { \ + if (printk_ratelimit()) \ + printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " \ + f "\n", ## arg); \ + } while (0) + +#define DMWARN(f, arg...) \ + printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) +#define DMWARN_LIMIT(f, arg...) \ + do { \ + if (printk_ratelimit()) \ + printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " \ + f "\n", ## arg); \ + } while (0) + +#define DMINFO(f, arg...) \ + printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) +#define DMINFO_LIMIT(f, arg...) \ + do { \ + if (printk_ratelimit()) \ + printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f \ + "\n", ## arg); \ + } while (0) + +#ifdef CONFIG_DM_DEBUG +# define DMDEBUG(f, arg...) \ + printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX " DEBUG: " f "\n", ## arg) +# define DMDEBUG_LIMIT(f, arg...) \ + do { \ + if (printk_ratelimit()) \ + printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX ": " f \ + "\n", ## arg); \ + } while (0) +#else +# define DMDEBUG(f, arg...) do {} while (0) +# define DMDEBUG_LIMIT(f, arg...) do {} while (0) +#endif + +#define DMEMIT(x...) sz += ((sz >= maxlen) ? \ + 0 : scnprintf(result + sz, maxlen - sz, x)) + +#define SECTOR_SHIFT 9 + +/* + * Definitions of return values from target end_io function. + */ +#define DM_ENDIO_INCOMPLETE 1 +#define DM_ENDIO_REQUEUE 2 + +/* + * Definitions of return values from target map function. + */ +#define DM_MAPIO_SUBMITTED 0 +#define DM_MAPIO_REMAPPED 1 +#define DM_MAPIO_REQUEUE DM_ENDIO_REQUEUE + +/* + * Ceiling(n / sz) + */ +#define dm_div_up(n, sz) (((n) + (sz) - 1) / (sz)) + +#define dm_sector_div_up(n, sz) ( \ +{ \ + sector_t _r = ((n) + (sz) - 1); \ + sector_div(_r, (sz)); \ + _r; \ +} \ +) + +/* + * ceiling(n / size) * size + */ +#define dm_round_up(n, sz) (dm_div_up((n), (sz)) * (sz)) + +static inline sector_t to_sector(unsigned long n) +{ + return (n >> SECTOR_SHIFT); +} + +static inline unsigned long to_bytes(sector_t n) +{ + return (n << SECTOR_SHIFT); +} + #endif /* __KERNEL__ */ #endif /* _LINUX_DEVICE_MAPPER_H */ -- cgit v0.10.2 From 2d1e580afe23287871529ce54429e249809525a1 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 24 Apr 2008 21:55:00 +0100 Subject: dm kcopyd: rename Rename kcopyd.[ch] to dm-kcopyd.[ch]. Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c new file mode 100644 index 0000000..1734584 --- /dev/null +++ b/drivers/md/dm-kcopyd.c @@ -0,0 +1,654 @@ +/* + * Copyright (C) 2002 Sistina Software (UK) Limited. + * Copyright (C) 2006 Red Hat GmbH + * + * This file is released under the GPL. + * + * Kcopyd provides a simple interface for copying an area of one + * block-device to one or more other block-devices, with an asynchronous + * completion notification. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kcopyd.h" +#include "dm.h" + +/*----------------------------------------------------------------- + * Each kcopyd client has its own little pool of preallocated + * pages for kcopyd io. + *---------------------------------------------------------------*/ +struct dm_kcopyd_client { + spinlock_t lock; + struct page_list *pages; + unsigned int nr_pages; + unsigned int nr_free_pages; + + struct dm_io_client *io_client; + + wait_queue_head_t destroyq; + atomic_t nr_jobs; + + mempool_t *job_pool; + + struct workqueue_struct *kcopyd_wq; + struct work_struct kcopyd_work; + +/* + * We maintain three lists of jobs: + * + * i) jobs waiting for pages + * ii) jobs that have pages, and are waiting for the io to be issued. + * iii) jobs that have completed. + * + * All three of these are protected by job_lock. + */ + spinlock_t job_lock; + struct list_head complete_jobs; + struct list_head io_jobs; + struct list_head pages_jobs; +}; + +static void wake(struct dm_kcopyd_client *kc) +{ + queue_work(kc->kcopyd_wq, &kc->kcopyd_work); +} + +static struct page_list *alloc_pl(void) +{ + struct page_list *pl; + + pl = kmalloc(sizeof(*pl), GFP_KERNEL); + if (!pl) + return NULL; + + pl->page = alloc_page(GFP_KERNEL); + if (!pl->page) { + kfree(pl); + return NULL; + } + + return pl; +} + +static void free_pl(struct page_list *pl) +{ + __free_page(pl->page); + kfree(pl); +} + +static int kcopyd_get_pages(struct dm_kcopyd_client *kc, + unsigned int nr, struct page_list **pages) +{ + struct page_list *pl; + + spin_lock(&kc->lock); + if (kc->nr_free_pages < nr) { + spin_unlock(&kc->lock); + return -ENOMEM; + } + + kc->nr_free_pages -= nr; + for (*pages = pl = kc->pages; --nr; pl = pl->next) + ; + + kc->pages = pl->next; + pl->next = NULL; + + spin_unlock(&kc->lock); + + return 0; +} + +static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) +{ + struct page_list *cursor; + + spin_lock(&kc->lock); + for (cursor = pl; cursor->next; cursor = cursor->next) + kc->nr_free_pages++; + + kc->nr_free_pages++; + cursor->next = kc->pages; + kc->pages = pl; + spin_unlock(&kc->lock); +} + +/* + * These three functions resize the page pool. + */ +static void drop_pages(struct page_list *pl) +{ + struct page_list *next; + + while (pl) { + next = pl->next; + free_pl(pl); + pl = next; + } +} + +static int client_alloc_pages(struct dm_kcopyd_client *kc, unsigned int nr) +{ + unsigned int i; + struct page_list *pl = NULL, *next; + + for (i = 0; i < nr; i++) { + next = alloc_pl(); + if (!next) { + if (pl) + drop_pages(pl); + return -ENOMEM; + } + next->next = pl; + pl = next; + } + + kcopyd_put_pages(kc, pl); + kc->nr_pages += nr; + return 0; +} + +static void client_free_pages(struct dm_kcopyd_client *kc) +{ + BUG_ON(kc->nr_free_pages != kc->nr_pages); + drop_pages(kc->pages); + kc->pages = NULL; + kc->nr_free_pages = kc->nr_pages = 0; +} + +/*----------------------------------------------------------------- + * kcopyd_jobs need to be allocated by the *clients* of kcopyd, + * for this reason we use a mempool to prevent the client from + * ever having to do io (which could cause a deadlock). + *---------------------------------------------------------------*/ +struct kcopyd_job { + struct dm_kcopyd_client *kc; + struct list_head list; + unsigned long flags; + + /* + * Error state of the job. + */ + int read_err; + unsigned long write_err; + + /* + * Either READ or WRITE + */ + int rw; + struct dm_io_region source; + + /* + * The destinations for the transfer. + */ + unsigned int num_dests; + struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; + + sector_t offset; + unsigned int nr_pages; + struct page_list *pages; + + /* + * Set this to ensure you are notified when the job has + * completed. 'context' is for callback to use. + */ + dm_kcopyd_notify_fn fn; + void *context; + + /* + * These fields are only used if the job has been split + * into more manageable parts. + */ + struct mutex lock; + atomic_t sub_jobs; + sector_t progress; +}; + +/* FIXME: this should scale with the number of pages */ +#define MIN_JOBS 512 + +static struct kmem_cache *_job_cache; + +int __init dm_kcopyd_init(void) +{ + _job_cache = KMEM_CACHE(kcopyd_job, 0); + if (!_job_cache) + return -ENOMEM; + + return 0; +} + +void dm_kcopyd_exit(void) +{ + kmem_cache_destroy(_job_cache); + _job_cache = NULL; +} + +/* + * Functions to push and pop a job onto the head of a given job + * list. + */ +static struct kcopyd_job *pop(struct list_head *jobs, + struct dm_kcopyd_client *kc) +{ + struct kcopyd_job *job = NULL; + unsigned long flags; + + spin_lock_irqsave(&kc->job_lock, flags); + + if (!list_empty(jobs)) { + job = list_entry(jobs->next, struct kcopyd_job, list); + list_del(&job->list); + } + spin_unlock_irqrestore(&kc->job_lock, flags); + + return job; +} + +static void push(struct list_head *jobs, struct kcopyd_job *job) +{ + unsigned long flags; + struct dm_kcopyd_client *kc = job->kc; + + spin_lock_irqsave(&kc->job_lock, flags); + list_add_tail(&job->list, jobs); + spin_unlock_irqrestore(&kc->job_lock, flags); +} + +/* + * These three functions process 1 item from the corresponding + * job list. + * + * They return: + * < 0: error + * 0: success + * > 0: can't process yet. + */ +static int run_complete_job(struct kcopyd_job *job) +{ + void *context = job->context; + int read_err = job->read_err; + unsigned long write_err = job->write_err; + dm_kcopyd_notify_fn fn = job->fn; + struct dm_kcopyd_client *kc = job->kc; + + kcopyd_put_pages(kc, job->pages); + mempool_free(job, kc->job_pool); + fn(read_err, write_err, context); + + if (atomic_dec_and_test(&kc->nr_jobs)) + wake_up(&kc->destroyq); + + return 0; +} + +static void complete_io(unsigned long error, void *context) +{ + struct kcopyd_job *job = (struct kcopyd_job *) context; + struct dm_kcopyd_client *kc = job->kc; + + if (error) { + if (job->rw == WRITE) + job->write_err |= error; + else + job->read_err = 1; + + if (!test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) { + push(&kc->complete_jobs, job); + wake(kc); + return; + } + } + + if (job->rw == WRITE) + push(&kc->complete_jobs, job); + + else { + job->rw = WRITE; + push(&kc->io_jobs, job); + } + + wake(kc); +} + +/* + * Request io on as many buffer heads as we can currently get for + * a particular job. + */ +static int run_io_job(struct kcopyd_job *job) +{ + int r; + struct dm_io_request io_req = { + .bi_rw = job->rw, + .mem.type = DM_IO_PAGE_LIST, + .mem.ptr.pl = job->pages, + .mem.offset = job->offset, + .notify.fn = complete_io, + .notify.context = job, + .client = job->kc->io_client, + }; + + if (job->rw == READ) + r = dm_io(&io_req, 1, &job->source, NULL); + else + r = dm_io(&io_req, job->num_dests, job->dests, NULL); + + return r; +} + +static int run_pages_job(struct kcopyd_job *job) +{ + int r; + + job->nr_pages = dm_div_up(job->dests[0].count + job->offset, + PAGE_SIZE >> 9); + r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages); + if (!r) { + /* this job is ready for io */ + push(&job->kc->io_jobs, job); + return 0; + } + + if (r == -ENOMEM) + /* can't complete now */ + return 1; + + return r; +} + +/* + * Run through a list for as long as possible. Returns the count + * of successful jobs. + */ +static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, + int (*fn) (struct kcopyd_job *)) +{ + struct kcopyd_job *job; + int r, count = 0; + + while ((job = pop(jobs, kc))) { + + r = fn(job); + + if (r < 0) { + /* error this rogue job */ + if (job->rw == WRITE) + job->write_err = (unsigned long) -1L; + else + job->read_err = 1; + push(&kc->complete_jobs, job); + break; + } + + if (r > 0) { + /* + * We couldn't service this job ATM, so + * push this job back onto the list. + */ + push(jobs, job); + break; + } + + count++; + } + + return count; +} + +/* + * kcopyd does this every time it's woken up. + */ +static void do_work(struct work_struct *work) +{ + struct dm_kcopyd_client *kc = container_of(work, + struct dm_kcopyd_client, kcopyd_work); + + /* + * The order that these are called is *very* important. + * complete jobs can free some pages for pages jobs. + * Pages jobs when successful will jump onto the io jobs + * list. io jobs call wake when they complete and it all + * starts again. + */ + process_jobs(&kc->complete_jobs, kc, run_complete_job); + process_jobs(&kc->pages_jobs, kc, run_pages_job); + process_jobs(&kc->io_jobs, kc, run_io_job); +} + +/* + * If we are copying a small region we just dispatch a single job + * to do the copy, otherwise the io has to be split up into many + * jobs. + */ +static void dispatch_job(struct kcopyd_job *job) +{ + struct dm_kcopyd_client *kc = job->kc; + atomic_inc(&kc->nr_jobs); + push(&kc->pages_jobs, job); + wake(kc); +} + +#define SUB_JOB_SIZE 128 +static void segment_complete(int read_err, unsigned long write_err, + void *context) +{ + /* FIXME: tidy this function */ + sector_t progress = 0; + sector_t count = 0; + struct kcopyd_job *job = (struct kcopyd_job *) context; + + mutex_lock(&job->lock); + + /* update the error */ + if (read_err) + job->read_err = 1; + + if (write_err) + job->write_err |= write_err; + + /* + * Only dispatch more work if there hasn't been an error. + */ + if ((!job->read_err && !job->write_err) || + test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) { + /* get the next chunk of work */ + progress = job->progress; + count = job->source.count - progress; + if (count) { + if (count > SUB_JOB_SIZE) + count = SUB_JOB_SIZE; + + job->progress += count; + } + } + mutex_unlock(&job->lock); + + if (count) { + int i; + struct kcopyd_job *sub_job = mempool_alloc(job->kc->job_pool, + GFP_NOIO); + + *sub_job = *job; + sub_job->source.sector += progress; + sub_job->source.count = count; + + for (i = 0; i < job->num_dests; i++) { + sub_job->dests[i].sector += progress; + sub_job->dests[i].count = count; + } + + sub_job->fn = segment_complete; + sub_job->context = job; + dispatch_job(sub_job); + + } else if (atomic_dec_and_test(&job->sub_jobs)) { + + /* + * To avoid a race we must keep the job around + * until after the notify function has completed. + * Otherwise the client may try and stop the job + * after we've completed. + */ + job->fn(read_err, write_err, job->context); + mempool_free(job, job->kc->job_pool); + } +} + +/* + * Create some little jobs that will do the move between + * them. + */ +#define SPLIT_COUNT 8 +static void split_job(struct kcopyd_job *job) +{ + int i; + + atomic_set(&job->sub_jobs, SPLIT_COUNT); + for (i = 0; i < SPLIT_COUNT; i++) + segment_complete(0, 0u, job); +} + +int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, + unsigned int num_dests, struct dm_io_region *dests, + unsigned int flags, dm_kcopyd_notify_fn fn, void *context) +{ + struct kcopyd_job *job; + + /* + * Allocate a new job. + */ + job = mempool_alloc(kc->job_pool, GFP_NOIO); + + /* + * set up for the read. + */ + job->kc = kc; + job->flags = flags; + job->read_err = 0; + job->write_err = 0; + job->rw = READ; + + job->source = *from; + + job->num_dests = num_dests; + memcpy(&job->dests, dests, sizeof(*dests) * num_dests); + + job->offset = 0; + job->nr_pages = 0; + job->pages = NULL; + + job->fn = fn; + job->context = context; + + if (job->source.count < SUB_JOB_SIZE) + dispatch_job(job); + + else { + mutex_init(&job->lock); + job->progress = 0; + split_job(job); + } + + return 0; +} +EXPORT_SYMBOL(dm_kcopyd_copy); + +/* + * Cancels a kcopyd job, eg. someone might be deactivating a + * mirror. + */ +#if 0 +int kcopyd_cancel(struct kcopyd_job *job, int block) +{ + /* FIXME: finish */ + return -1; +} +#endif /* 0 */ + +/*----------------------------------------------------------------- + * Client setup + *---------------------------------------------------------------*/ +int dm_kcopyd_client_create(unsigned int nr_pages, + struct dm_kcopyd_client **result) +{ + int r = -ENOMEM; + struct dm_kcopyd_client *kc; + + kc = kmalloc(sizeof(*kc), GFP_KERNEL); + if (!kc) + return -ENOMEM; + + spin_lock_init(&kc->lock); + spin_lock_init(&kc->job_lock); + INIT_LIST_HEAD(&kc->complete_jobs); + INIT_LIST_HEAD(&kc->io_jobs); + INIT_LIST_HEAD(&kc->pages_jobs); + + kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); + if (!kc->job_pool) + goto bad_slab; + + INIT_WORK(&kc->kcopyd_work, do_work); + kc->kcopyd_wq = create_singlethread_workqueue("kcopyd"); + if (!kc->kcopyd_wq) + goto bad_workqueue; + + kc->pages = NULL; + kc->nr_pages = kc->nr_free_pages = 0; + r = client_alloc_pages(kc, nr_pages); + if (r) + goto bad_client_pages; + + kc->io_client = dm_io_client_create(nr_pages); + if (IS_ERR(kc->io_client)) { + r = PTR_ERR(kc->io_client); + goto bad_io_client; + } + + init_waitqueue_head(&kc->destroyq); + atomic_set(&kc->nr_jobs, 0); + + *result = kc; + return 0; + +bad_io_client: + client_free_pages(kc); +bad_client_pages: + destroy_workqueue(kc->kcopyd_wq); +bad_workqueue: + mempool_destroy(kc->job_pool); +bad_slab: + kfree(kc); + + return r; +} +EXPORT_SYMBOL(dm_kcopyd_client_create); + +void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) +{ + /* Wait for completion of all jobs submitted by this client. */ + wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); + + BUG_ON(!list_empty(&kc->complete_jobs)); + BUG_ON(!list_empty(&kc->io_jobs)); + BUG_ON(!list_empty(&kc->pages_jobs)); + destroy_workqueue(kc->kcopyd_wq); + dm_io_client_destroy(kc->io_client); + client_free_pages(kc); + mempool_destroy(kc->job_pool); + kfree(kc); +} +EXPORT_SYMBOL(dm_kcopyd_client_destroy); diff --git a/drivers/md/dm-kcopyd.h b/drivers/md/dm-kcopyd.h new file mode 100644 index 0000000..d305784 --- /dev/null +++ b/drivers/md/dm-kcopyd.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2001 - 2003 Sistina Software + * Copyright (C) 2004 - 2008 Red Hat, Inc. All rights reserved. + * + * kcopyd provides a simple interface for copying an area of one + * block-device to one or more other block-devices, either synchronous + * or with an asynchronous completion notification. + * + * This file is released under the GPL. + */ + +#ifndef _LINUX_DM_KCOPYD_H +#define _LINUX_DM_KCOPYD_H + +#ifdef __KERNEL__ + +#include "dm-io.h" + +/* FIXME: make this configurable */ +#define DM_KCOPYD_MAX_REGIONS 8 + +#define DM_KCOPYD_IGNORE_ERROR 1 + +/* + * To use kcopyd you must first create a dm_kcopyd_client object. + */ +struct dm_kcopyd_client; +int dm_kcopyd_client_create(unsigned num_pages, + struct dm_kcopyd_client **result); +void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); + +/* + * Submit a copy job to kcopyd. This is built on top of the + * previous three fns. + * + * read_err is a boolean, + * write_err is a bitset, with 1 bit for each destination region + */ +typedef void (*dm_kcopyd_notify_fn)(int read_err, unsigned long write_err, + void *context); + +int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, + unsigned num_dests, struct dm_io_region *dests, + unsigned flags, dm_kcopyd_notify_fn fn, void *context); + +#endif /* __KERNEL__ */ +#endif /* _LINUX_DM_KCOPYD_H */ diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c deleted file mode 100644 index 1734584..0000000 --- a/drivers/md/kcopyd.c +++ /dev/null @@ -1,654 +0,0 @@ -/* - * Copyright (C) 2002 Sistina Software (UK) Limited. - * Copyright (C) 2006 Red Hat GmbH - * - * This file is released under the GPL. - * - * Kcopyd provides a simple interface for copying an area of one - * block-device to one or more other block-devices, with an asynchronous - * completion notification. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "kcopyd.h" -#include "dm.h" - -/*----------------------------------------------------------------- - * Each kcopyd client has its own little pool of preallocated - * pages for kcopyd io. - *---------------------------------------------------------------*/ -struct dm_kcopyd_client { - spinlock_t lock; - struct page_list *pages; - unsigned int nr_pages; - unsigned int nr_free_pages; - - struct dm_io_client *io_client; - - wait_queue_head_t destroyq; - atomic_t nr_jobs; - - mempool_t *job_pool; - - struct workqueue_struct *kcopyd_wq; - struct work_struct kcopyd_work; - -/* - * We maintain three lists of jobs: - * - * i) jobs waiting for pages - * ii) jobs that have pages, and are waiting for the io to be issued. - * iii) jobs that have completed. - * - * All three of these are protected by job_lock. - */ - spinlock_t job_lock; - struct list_head complete_jobs; - struct list_head io_jobs; - struct list_head pages_jobs; -}; - -static void wake(struct dm_kcopyd_client *kc) -{ - queue_work(kc->kcopyd_wq, &kc->kcopyd_work); -} - -static struct page_list *alloc_pl(void) -{ - struct page_list *pl; - - pl = kmalloc(sizeof(*pl), GFP_KERNEL); - if (!pl) - return NULL; - - pl->page = alloc_page(GFP_KERNEL); - if (!pl->page) { - kfree(pl); - return NULL; - } - - return pl; -} - -static void free_pl(struct page_list *pl) -{ - __free_page(pl->page); - kfree(pl); -} - -static int kcopyd_get_pages(struct dm_kcopyd_client *kc, - unsigned int nr, struct page_list **pages) -{ - struct page_list *pl; - - spin_lock(&kc->lock); - if (kc->nr_free_pages < nr) { - spin_unlock(&kc->lock); - return -ENOMEM; - } - - kc->nr_free_pages -= nr; - for (*pages = pl = kc->pages; --nr; pl = pl->next) - ; - - kc->pages = pl->next; - pl->next = NULL; - - spin_unlock(&kc->lock); - - return 0; -} - -static void kcopyd_put_pages(struct dm_kcopyd_client *kc, struct page_list *pl) -{ - struct page_list *cursor; - - spin_lock(&kc->lock); - for (cursor = pl; cursor->next; cursor = cursor->next) - kc->nr_free_pages++; - - kc->nr_free_pages++; - cursor->next = kc->pages; - kc->pages = pl; - spin_unlock(&kc->lock); -} - -/* - * These three functions resize the page pool. - */ -static void drop_pages(struct page_list *pl) -{ - struct page_list *next; - - while (pl) { - next = pl->next; - free_pl(pl); - pl = next; - } -} - -static int client_alloc_pages(struct dm_kcopyd_client *kc, unsigned int nr) -{ - unsigned int i; - struct page_list *pl = NULL, *next; - - for (i = 0; i < nr; i++) { - next = alloc_pl(); - if (!next) { - if (pl) - drop_pages(pl); - return -ENOMEM; - } - next->next = pl; - pl = next; - } - - kcopyd_put_pages(kc, pl); - kc->nr_pages += nr; - return 0; -} - -static void client_free_pages(struct dm_kcopyd_client *kc) -{ - BUG_ON(kc->nr_free_pages != kc->nr_pages); - drop_pages(kc->pages); - kc->pages = NULL; - kc->nr_free_pages = kc->nr_pages = 0; -} - -/*----------------------------------------------------------------- - * kcopyd_jobs need to be allocated by the *clients* of kcopyd, - * for this reason we use a mempool to prevent the client from - * ever having to do io (which could cause a deadlock). - *---------------------------------------------------------------*/ -struct kcopyd_job { - struct dm_kcopyd_client *kc; - struct list_head list; - unsigned long flags; - - /* - * Error state of the job. - */ - int read_err; - unsigned long write_err; - - /* - * Either READ or WRITE - */ - int rw; - struct dm_io_region source; - - /* - * The destinations for the transfer. - */ - unsigned int num_dests; - struct dm_io_region dests[DM_KCOPYD_MAX_REGIONS]; - - sector_t offset; - unsigned int nr_pages; - struct page_list *pages; - - /* - * Set this to ensure you are notified when the job has - * completed. 'context' is for callback to use. - */ - dm_kcopyd_notify_fn fn; - void *context; - - /* - * These fields are only used if the job has been split - * into more manageable parts. - */ - struct mutex lock; - atomic_t sub_jobs; - sector_t progress; -}; - -/* FIXME: this should scale with the number of pages */ -#define MIN_JOBS 512 - -static struct kmem_cache *_job_cache; - -int __init dm_kcopyd_init(void) -{ - _job_cache = KMEM_CACHE(kcopyd_job, 0); - if (!_job_cache) - return -ENOMEM; - - return 0; -} - -void dm_kcopyd_exit(void) -{ - kmem_cache_destroy(_job_cache); - _job_cache = NULL; -} - -/* - * Functions to push and pop a job onto the head of a given job - * list. - */ -static struct kcopyd_job *pop(struct list_head *jobs, - struct dm_kcopyd_client *kc) -{ - struct kcopyd_job *job = NULL; - unsigned long flags; - - spin_lock_irqsave(&kc->job_lock, flags); - - if (!list_empty(jobs)) { - job = list_entry(jobs->next, struct kcopyd_job, list); - list_del(&job->list); - } - spin_unlock_irqrestore(&kc->job_lock, flags); - - return job; -} - -static void push(struct list_head *jobs, struct kcopyd_job *job) -{ - unsigned long flags; - struct dm_kcopyd_client *kc = job->kc; - - spin_lock_irqsave(&kc->job_lock, flags); - list_add_tail(&job->list, jobs); - spin_unlock_irqrestore(&kc->job_lock, flags); -} - -/* - * These three functions process 1 item from the corresponding - * job list. - * - * They return: - * < 0: error - * 0: success - * > 0: can't process yet. - */ -static int run_complete_job(struct kcopyd_job *job) -{ - void *context = job->context; - int read_err = job->read_err; - unsigned long write_err = job->write_err; - dm_kcopyd_notify_fn fn = job->fn; - struct dm_kcopyd_client *kc = job->kc; - - kcopyd_put_pages(kc, job->pages); - mempool_free(job, kc->job_pool); - fn(read_err, write_err, context); - - if (atomic_dec_and_test(&kc->nr_jobs)) - wake_up(&kc->destroyq); - - return 0; -} - -static void complete_io(unsigned long error, void *context) -{ - struct kcopyd_job *job = (struct kcopyd_job *) context; - struct dm_kcopyd_client *kc = job->kc; - - if (error) { - if (job->rw == WRITE) - job->write_err |= error; - else - job->read_err = 1; - - if (!test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) { - push(&kc->complete_jobs, job); - wake(kc); - return; - } - } - - if (job->rw == WRITE) - push(&kc->complete_jobs, job); - - else { - job->rw = WRITE; - push(&kc->io_jobs, job); - } - - wake(kc); -} - -/* - * Request io on as many buffer heads as we can currently get for - * a particular job. - */ -static int run_io_job(struct kcopyd_job *job) -{ - int r; - struct dm_io_request io_req = { - .bi_rw = job->rw, - .mem.type = DM_IO_PAGE_LIST, - .mem.ptr.pl = job->pages, - .mem.offset = job->offset, - .notify.fn = complete_io, - .notify.context = job, - .client = job->kc->io_client, - }; - - if (job->rw == READ) - r = dm_io(&io_req, 1, &job->source, NULL); - else - r = dm_io(&io_req, job->num_dests, job->dests, NULL); - - return r; -} - -static int run_pages_job(struct kcopyd_job *job) -{ - int r; - - job->nr_pages = dm_div_up(job->dests[0].count + job->offset, - PAGE_SIZE >> 9); - r = kcopyd_get_pages(job->kc, job->nr_pages, &job->pages); - if (!r) { - /* this job is ready for io */ - push(&job->kc->io_jobs, job); - return 0; - } - - if (r == -ENOMEM) - /* can't complete now */ - return 1; - - return r; -} - -/* - * Run through a list for as long as possible. Returns the count - * of successful jobs. - */ -static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, - int (*fn) (struct kcopyd_job *)) -{ - struct kcopyd_job *job; - int r, count = 0; - - while ((job = pop(jobs, kc))) { - - r = fn(job); - - if (r < 0) { - /* error this rogue job */ - if (job->rw == WRITE) - job->write_err = (unsigned long) -1L; - else - job->read_err = 1; - push(&kc->complete_jobs, job); - break; - } - - if (r > 0) { - /* - * We couldn't service this job ATM, so - * push this job back onto the list. - */ - push(jobs, job); - break; - } - - count++; - } - - return count; -} - -/* - * kcopyd does this every time it's woken up. - */ -static void do_work(struct work_struct *work) -{ - struct dm_kcopyd_client *kc = container_of(work, - struct dm_kcopyd_client, kcopyd_work); - - /* - * The order that these are called is *very* important. - * complete jobs can free some pages for pages jobs. - * Pages jobs when successful will jump onto the io jobs - * list. io jobs call wake when they complete and it all - * starts again. - */ - process_jobs(&kc->complete_jobs, kc, run_complete_job); - process_jobs(&kc->pages_jobs, kc, run_pages_job); - process_jobs(&kc->io_jobs, kc, run_io_job); -} - -/* - * If we are copying a small region we just dispatch a single job - * to do the copy, otherwise the io has to be split up into many - * jobs. - */ -static void dispatch_job(struct kcopyd_job *job) -{ - struct dm_kcopyd_client *kc = job->kc; - atomic_inc(&kc->nr_jobs); - push(&kc->pages_jobs, job); - wake(kc); -} - -#define SUB_JOB_SIZE 128 -static void segment_complete(int read_err, unsigned long write_err, - void *context) -{ - /* FIXME: tidy this function */ - sector_t progress = 0; - sector_t count = 0; - struct kcopyd_job *job = (struct kcopyd_job *) context; - - mutex_lock(&job->lock); - - /* update the error */ - if (read_err) - job->read_err = 1; - - if (write_err) - job->write_err |= write_err; - - /* - * Only dispatch more work if there hasn't been an error. - */ - if ((!job->read_err && !job->write_err) || - test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) { - /* get the next chunk of work */ - progress = job->progress; - count = job->source.count - progress; - if (count) { - if (count > SUB_JOB_SIZE) - count = SUB_JOB_SIZE; - - job->progress += count; - } - } - mutex_unlock(&job->lock); - - if (count) { - int i; - struct kcopyd_job *sub_job = mempool_alloc(job->kc->job_pool, - GFP_NOIO); - - *sub_job = *job; - sub_job->source.sector += progress; - sub_job->source.count = count; - - for (i = 0; i < job->num_dests; i++) { - sub_job->dests[i].sector += progress; - sub_job->dests[i].count = count; - } - - sub_job->fn = segment_complete; - sub_job->context = job; - dispatch_job(sub_job); - - } else if (atomic_dec_and_test(&job->sub_jobs)) { - - /* - * To avoid a race we must keep the job around - * until after the notify function has completed. - * Otherwise the client may try and stop the job - * after we've completed. - */ - job->fn(read_err, write_err, job->context); - mempool_free(job, job->kc->job_pool); - } -} - -/* - * Create some little jobs that will do the move between - * them. - */ -#define SPLIT_COUNT 8 -static void split_job(struct kcopyd_job *job) -{ - int i; - - atomic_set(&job->sub_jobs, SPLIT_COUNT); - for (i = 0; i < SPLIT_COUNT; i++) - segment_complete(0, 0u, job); -} - -int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, - unsigned int num_dests, struct dm_io_region *dests, - unsigned int flags, dm_kcopyd_notify_fn fn, void *context) -{ - struct kcopyd_job *job; - - /* - * Allocate a new job. - */ - job = mempool_alloc(kc->job_pool, GFP_NOIO); - - /* - * set up for the read. - */ - job->kc = kc; - job->flags = flags; - job->read_err = 0; - job->write_err = 0; - job->rw = READ; - - job->source = *from; - - job->num_dests = num_dests; - memcpy(&job->dests, dests, sizeof(*dests) * num_dests); - - job->offset = 0; - job->nr_pages = 0; - job->pages = NULL; - - job->fn = fn; - job->context = context; - - if (job->source.count < SUB_JOB_SIZE) - dispatch_job(job); - - else { - mutex_init(&job->lock); - job->progress = 0; - split_job(job); - } - - return 0; -} -EXPORT_SYMBOL(dm_kcopyd_copy); - -/* - * Cancels a kcopyd job, eg. someone might be deactivating a - * mirror. - */ -#if 0 -int kcopyd_cancel(struct kcopyd_job *job, int block) -{ - /* FIXME: finish */ - return -1; -} -#endif /* 0 */ - -/*----------------------------------------------------------------- - * Client setup - *---------------------------------------------------------------*/ -int dm_kcopyd_client_create(unsigned int nr_pages, - struct dm_kcopyd_client **result) -{ - int r = -ENOMEM; - struct dm_kcopyd_client *kc; - - kc = kmalloc(sizeof(*kc), GFP_KERNEL); - if (!kc) - return -ENOMEM; - - spin_lock_init(&kc->lock); - spin_lock_init(&kc->job_lock); - INIT_LIST_HEAD(&kc->complete_jobs); - INIT_LIST_HEAD(&kc->io_jobs); - INIT_LIST_HEAD(&kc->pages_jobs); - - kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); - if (!kc->job_pool) - goto bad_slab; - - INIT_WORK(&kc->kcopyd_work, do_work); - kc->kcopyd_wq = create_singlethread_workqueue("kcopyd"); - if (!kc->kcopyd_wq) - goto bad_workqueue; - - kc->pages = NULL; - kc->nr_pages = kc->nr_free_pages = 0; - r = client_alloc_pages(kc, nr_pages); - if (r) - goto bad_client_pages; - - kc->io_client = dm_io_client_create(nr_pages); - if (IS_ERR(kc->io_client)) { - r = PTR_ERR(kc->io_client); - goto bad_io_client; - } - - init_waitqueue_head(&kc->destroyq); - atomic_set(&kc->nr_jobs, 0); - - *result = kc; - return 0; - -bad_io_client: - client_free_pages(kc); -bad_client_pages: - destroy_workqueue(kc->kcopyd_wq); -bad_workqueue: - mempool_destroy(kc->job_pool); -bad_slab: - kfree(kc); - - return r; -} -EXPORT_SYMBOL(dm_kcopyd_client_create); - -void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc) -{ - /* Wait for completion of all jobs submitted by this client. */ - wait_event(kc->destroyq, !atomic_read(&kc->nr_jobs)); - - BUG_ON(!list_empty(&kc->complete_jobs)); - BUG_ON(!list_empty(&kc->io_jobs)); - BUG_ON(!list_empty(&kc->pages_jobs)); - destroy_workqueue(kc->kcopyd_wq); - dm_io_client_destroy(kc->io_client); - client_free_pages(kc); - mempool_destroy(kc->job_pool); - kfree(kc); -} -EXPORT_SYMBOL(dm_kcopyd_client_destroy); diff --git a/drivers/md/kcopyd.h b/drivers/md/kcopyd.h deleted file mode 100644 index d305784..0000000 --- a/drivers/md/kcopyd.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (C) 2001 - 2003 Sistina Software - * Copyright (C) 2004 - 2008 Red Hat, Inc. All rights reserved. - * - * kcopyd provides a simple interface for copying an area of one - * block-device to one or more other block-devices, either synchronous - * or with an asynchronous completion notification. - * - * This file is released under the GPL. - */ - -#ifndef _LINUX_DM_KCOPYD_H -#define _LINUX_DM_KCOPYD_H - -#ifdef __KERNEL__ - -#include "dm-io.h" - -/* FIXME: make this configurable */ -#define DM_KCOPYD_MAX_REGIONS 8 - -#define DM_KCOPYD_IGNORE_ERROR 1 - -/* - * To use kcopyd you must first create a dm_kcopyd_client object. - */ -struct dm_kcopyd_client; -int dm_kcopyd_client_create(unsigned num_pages, - struct dm_kcopyd_client **result); -void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); - -/* - * Submit a copy job to kcopyd. This is built on top of the - * previous three fns. - * - * read_err is a boolean, - * write_err is a bitset, with 1 bit for each destination region - */ -typedef void (*dm_kcopyd_notify_fn)(int read_err, unsigned long write_err, - void *context); - -int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, - unsigned num_dests, struct dm_io_region *dests, - unsigned flags, dm_kcopyd_notify_fn fn, void *context); - -#endif /* __KERNEL__ */ -#endif /* _LINUX_DM_KCOPYD_H */ -- cgit v0.10.2 From a765e20eeb423d0fa6a02ffab51141e53bbd93cb Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 24 Apr 2008 22:02:01 +0100 Subject: dm: move include files Publish the dm-io, dm-log and dm-kcopyd headers in include/linux. Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/Makefile b/drivers/md/Makefile index be4b069..7be09ee 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -3,7 +3,7 @@ # dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ - dm-ioctl.o dm-io.o kcopyd.o + dm-ioctl.o dm-io.o dm-kcopyd.o dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o dm-snapshot-objs := dm-snap.o dm-exception-store.o dm-mirror-objs := dm-raid1.o diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c index 6933301..41f4080 100644 --- a/drivers/md/dm-exception-store.c +++ b/drivers/md/dm-exception-store.c @@ -9,13 +9,13 @@ #include "dm.h" #include "dm-snap.h" -#include "dm-io.h" -#include "kcopyd.h" #include #include #include #include +#include +#include #define DM_MSG_PREFIX "snapshots" #define DM_CHUNK_SIZE_DEFAULT_SECTORS 32 /* 16KB */ diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 978c041..ed9c86c 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -5,7 +5,6 @@ * This file is released under the GPL. */ -#include "dm-io.h" #include "dm.h" #include @@ -13,6 +12,7 @@ #include #include #include +#include struct dm_io_client { mempool_t *pool; diff --git a/drivers/md/dm-io.h b/drivers/md/dm-io.h deleted file mode 100644 index b6bf17e..0000000 --- a/drivers/md/dm-io.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (C) 2003 Sistina Software - * Copyright (C) 2004 - 2008 Red Hat, Inc. All rights reserved. - * - * Device-Mapper low-level I/O. - * - * This file is released under the GPL. - */ - -#ifndef _LINUX_DM_IO_H -#define _LINUX_DM_IO_H - -#ifdef __KERNEL__ - -#include - -struct dm_io_region { - struct block_device *bdev; - sector_t sector; - sector_t count; /* If this is zero the region is ignored. */ -}; - -struct page_list { - struct page_list *next; - struct page *page; -}; - -typedef void (*io_notify_fn)(unsigned long error, void *context); - -enum dm_io_mem_type { - DM_IO_PAGE_LIST,/* Page list */ - DM_IO_BVEC, /* Bio vector */ - DM_IO_VMA, /* Virtual memory area */ - DM_IO_KMEM, /* Kernel memory */ -}; - -struct dm_io_memory { - enum dm_io_mem_type type; - - union { - struct page_list *pl; - struct bio_vec *bvec; - void *vma; - void *addr; - } ptr; - - unsigned offset; -}; - -struct dm_io_notify { - io_notify_fn fn; /* Callback for asynchronous requests */ - void *context; /* Passed to callback */ -}; - -/* - * IO request structure - */ -struct dm_io_client; -struct dm_io_request { - int bi_rw; /* READ|WRITE - not READA */ - struct dm_io_memory mem; /* Memory to use for io */ - struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */ - struct dm_io_client *client; /* Client memory handler */ -}; - -/* - * For async io calls, users can alternatively use the dm_io() function below - * and dm_io_client_create() to create private mempools for the client. - * - * Create/destroy may block. - */ -struct dm_io_client *dm_io_client_create(unsigned num_pages); -int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client); -void dm_io_client_destroy(struct dm_io_client *client); - -/* - * IO interface using private per-client pools. - * Each bit in the optional 'sync_error_bits' bitset indicates whether an - * error occurred doing io to the corresponding region. - */ -int dm_io(struct dm_io_request *io_req, unsigned num_regions, - struct dm_io_region *region, unsigned long *sync_error_bits); - -#endif /* __KERNEL__ */ -#endif /* _LINUX_DM_IO_H */ diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 1734584..ee9583b 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -22,8 +22,8 @@ #include #include #include +#include -#include "kcopyd.h" #include "dm.h" /*----------------------------------------------------------------- diff --git a/drivers/md/dm-kcopyd.h b/drivers/md/dm-kcopyd.h deleted file mode 100644 index d305784..0000000 --- a/drivers/md/dm-kcopyd.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (C) 2001 - 2003 Sistina Software - * Copyright (C) 2004 - 2008 Red Hat, Inc. All rights reserved. - * - * kcopyd provides a simple interface for copying an area of one - * block-device to one or more other block-devices, either synchronous - * or with an asynchronous completion notification. - * - * This file is released under the GPL. - */ - -#ifndef _LINUX_DM_KCOPYD_H -#define _LINUX_DM_KCOPYD_H - -#ifdef __KERNEL__ - -#include "dm-io.h" - -/* FIXME: make this configurable */ -#define DM_KCOPYD_MAX_REGIONS 8 - -#define DM_KCOPYD_IGNORE_ERROR 1 - -/* - * To use kcopyd you must first create a dm_kcopyd_client object. - */ -struct dm_kcopyd_client; -int dm_kcopyd_client_create(unsigned num_pages, - struct dm_kcopyd_client **result); -void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); - -/* - * Submit a copy job to kcopyd. This is built on top of the - * previous three fns. - * - * read_err is a boolean, - * write_err is a bitset, with 1 bit for each destination region - */ -typedef void (*dm_kcopyd_notify_fn)(int read_err, unsigned long write_err, - void *context); - -int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, - unsigned num_dests, struct dm_io_region *dests, - unsigned flags, dm_kcopyd_notify_fn fn, void *context); - -#endif /* __KERNEL__ */ -#endif /* _LINUX_DM_KCOPYD_H */ diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index e6b6a9d..67a6f31 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -9,9 +9,9 @@ #include #include #include +#include +#include -#include "dm-log.h" -#include "dm-io.h" #include "dm.h" #define DM_MSG_PREFIX "dirty region log" diff --git a/drivers/md/dm-log.h b/drivers/md/dm-log.h deleted file mode 100644 index 600c5fb..0000000 --- a/drivers/md/dm-log.h +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Copyright (C) 2003 Sistina Software - * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. - * - * Device-Mapper dirty region log. - * - * This file is released under the LGPL. - */ - -#ifndef _LINUX_DM_DIRTY_LOG -#define _LINUX_DM_DIRTY_LOG - -#ifdef __KERNEL__ - -#include -#include - -typedef sector_t region_t; - -struct dm_dirty_log_type; - -struct dm_dirty_log { - struct dm_dirty_log_type *type; - void *context; -}; - -struct dm_dirty_log_type { - const char *name; - struct module *module; - - int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, - unsigned argc, char **argv); - void (*dtr)(struct dm_dirty_log *log); - - /* - * There are times when we don't want the log to touch - * the disk. - */ - int (*presuspend)(struct dm_dirty_log *log); - int (*postsuspend)(struct dm_dirty_log *log); - int (*resume)(struct dm_dirty_log *log); - - /* - * Retrieves the smallest size of region that the log can - * deal with. - */ - uint32_t (*get_region_size)(struct dm_dirty_log *log); - - /* - * A predicate to say whether a region is clean or not. - * May block. - */ - int (*is_clean)(struct dm_dirty_log *log, region_t region); - - /* - * Returns: 0, 1, -EWOULDBLOCK, < 0 - * - * A predicate function to check the area given by - * [sector, sector + len) is in sync. - * - * If -EWOULDBLOCK is returned the state of the region is - * unknown, typically this will result in a read being - * passed to a daemon to deal with, since a daemon is - * allowed to block. - */ - int (*in_sync)(struct dm_dirty_log *log, region_t region, - int can_block); - - /* - * Flush the current log state (eg, to disk). This - * function may block. - */ - int (*flush)(struct dm_dirty_log *log); - - /* - * Mark an area as clean or dirty. These functions may - * block, though for performance reasons blocking should - * be extremely rare (eg, allocating another chunk of - * memory for some reason). - */ - void (*mark_region)(struct dm_dirty_log *log, region_t region); - void (*clear_region)(struct dm_dirty_log *log, region_t region); - - /* - * Returns: <0 (error), 0 (no region), 1 (region) - * - * The mirrord will need perform recovery on regions of - * the mirror that are in the NOSYNC state. This - * function asks the log to tell the caller about the - * next region that this machine should recover. - * - * Do not confuse this function with 'in_sync()', one - * tells you if an area is synchronised, the other - * assigns recovery work. - */ - int (*get_resync_work)(struct dm_dirty_log *log, region_t *region); - - /* - * This notifies the log that the resync status of a region - * has changed. It also clears the region from the recovering - * list (if present). - */ - void (*set_region_sync)(struct dm_dirty_log *log, - region_t region, int in_sync); - - /* - * Returns the number of regions that are in sync. - */ - region_t (*get_sync_count)(struct dm_dirty_log *log); - - /* - * Support function for mirror status requests. - */ - int (*status)(struct dm_dirty_log *log, status_type_t status_type, - char *result, unsigned maxlen); -}; - -int dm_dirty_log_type_register(struct dm_dirty_log_type *type); -int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type); - -/* - * Make sure you use these two functions, rather than calling - * type->constructor/destructor() directly. - */ -struct dm_dirty_log *dm_dirty_log_create(const char *type_name, - struct dm_target *ti, - unsigned argc, char **argv); -void dm_dirty_log_destroy(struct dm_dirty_log *log); - -#endif /* __KERNEL__ */ -#endif /* _LINUX_DM_DIRTY_LOG_H */ diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 5beeced..6692e5a 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -7,9 +7,6 @@ #include "dm.h" #include "dm-bio-list.h" #include "dm-bio-record.h" -#include "dm-io.h" -#include "dm-log.h" -#include "kcopyd.h" #include #include @@ -22,6 +19,9 @@ #include #include #include +#include +#include +#include #define DM_MSG_PREFIX "raid1" #define DM_IO_PAGES 64 diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index e1dcca9..1ba8a47 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -18,10 +18,10 @@ #include #include #include +#include #include "dm-snap.h" #include "dm-bio-list.h" -#include "kcopyd.h" #define DM_MSG_PREFIX "snapshots" diff --git a/include/linux/dm-dirty-log.h b/include/linux/dm-dirty-log.h new file mode 100644 index 0000000..600c5fb --- /dev/null +++ b/include/linux/dm-dirty-log.h @@ -0,0 +1,131 @@ +/* + * Copyright (C) 2003 Sistina Software + * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. + * + * Device-Mapper dirty region log. + * + * This file is released under the LGPL. + */ + +#ifndef _LINUX_DM_DIRTY_LOG +#define _LINUX_DM_DIRTY_LOG + +#ifdef __KERNEL__ + +#include +#include + +typedef sector_t region_t; + +struct dm_dirty_log_type; + +struct dm_dirty_log { + struct dm_dirty_log_type *type; + void *context; +}; + +struct dm_dirty_log_type { + const char *name; + struct module *module; + + int (*ctr)(struct dm_dirty_log *log, struct dm_target *ti, + unsigned argc, char **argv); + void (*dtr)(struct dm_dirty_log *log); + + /* + * There are times when we don't want the log to touch + * the disk. + */ + int (*presuspend)(struct dm_dirty_log *log); + int (*postsuspend)(struct dm_dirty_log *log); + int (*resume)(struct dm_dirty_log *log); + + /* + * Retrieves the smallest size of region that the log can + * deal with. + */ + uint32_t (*get_region_size)(struct dm_dirty_log *log); + + /* + * A predicate to say whether a region is clean or not. + * May block. + */ + int (*is_clean)(struct dm_dirty_log *log, region_t region); + + /* + * Returns: 0, 1, -EWOULDBLOCK, < 0 + * + * A predicate function to check the area given by + * [sector, sector + len) is in sync. + * + * If -EWOULDBLOCK is returned the state of the region is + * unknown, typically this will result in a read being + * passed to a daemon to deal with, since a daemon is + * allowed to block. + */ + int (*in_sync)(struct dm_dirty_log *log, region_t region, + int can_block); + + /* + * Flush the current log state (eg, to disk). This + * function may block. + */ + int (*flush)(struct dm_dirty_log *log); + + /* + * Mark an area as clean or dirty. These functions may + * block, though for performance reasons blocking should + * be extremely rare (eg, allocating another chunk of + * memory for some reason). + */ + void (*mark_region)(struct dm_dirty_log *log, region_t region); + void (*clear_region)(struct dm_dirty_log *log, region_t region); + + /* + * Returns: <0 (error), 0 (no region), 1 (region) + * + * The mirrord will need perform recovery on regions of + * the mirror that are in the NOSYNC state. This + * function asks the log to tell the caller about the + * next region that this machine should recover. + * + * Do not confuse this function with 'in_sync()', one + * tells you if an area is synchronised, the other + * assigns recovery work. + */ + int (*get_resync_work)(struct dm_dirty_log *log, region_t *region); + + /* + * This notifies the log that the resync status of a region + * has changed. It also clears the region from the recovering + * list (if present). + */ + void (*set_region_sync)(struct dm_dirty_log *log, + region_t region, int in_sync); + + /* + * Returns the number of regions that are in sync. + */ + region_t (*get_sync_count)(struct dm_dirty_log *log); + + /* + * Support function for mirror status requests. + */ + int (*status)(struct dm_dirty_log *log, status_type_t status_type, + char *result, unsigned maxlen); +}; + +int dm_dirty_log_type_register(struct dm_dirty_log_type *type); +int dm_dirty_log_type_unregister(struct dm_dirty_log_type *type); + +/* + * Make sure you use these two functions, rather than calling + * type->constructor/destructor() directly. + */ +struct dm_dirty_log *dm_dirty_log_create(const char *type_name, + struct dm_target *ti, + unsigned argc, char **argv); +void dm_dirty_log_destroy(struct dm_dirty_log *log); + +#endif /* __KERNEL__ */ +#endif /* _LINUX_DM_DIRTY_LOG_H */ diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h new file mode 100644 index 0000000..b6bf17e --- /dev/null +++ b/include/linux/dm-io.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2003 Sistina Software + * Copyright (C) 2004 - 2008 Red Hat, Inc. All rights reserved. + * + * Device-Mapper low-level I/O. + * + * This file is released under the GPL. + */ + +#ifndef _LINUX_DM_IO_H +#define _LINUX_DM_IO_H + +#ifdef __KERNEL__ + +#include + +struct dm_io_region { + struct block_device *bdev; + sector_t sector; + sector_t count; /* If this is zero the region is ignored. */ +}; + +struct page_list { + struct page_list *next; + struct page *page; +}; + +typedef void (*io_notify_fn)(unsigned long error, void *context); + +enum dm_io_mem_type { + DM_IO_PAGE_LIST,/* Page list */ + DM_IO_BVEC, /* Bio vector */ + DM_IO_VMA, /* Virtual memory area */ + DM_IO_KMEM, /* Kernel memory */ +}; + +struct dm_io_memory { + enum dm_io_mem_type type; + + union { + struct page_list *pl; + struct bio_vec *bvec; + void *vma; + void *addr; + } ptr; + + unsigned offset; +}; + +struct dm_io_notify { + io_notify_fn fn; /* Callback for asynchronous requests */ + void *context; /* Passed to callback */ +}; + +/* + * IO request structure + */ +struct dm_io_client; +struct dm_io_request { + int bi_rw; /* READ|WRITE - not READA */ + struct dm_io_memory mem; /* Memory to use for io */ + struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */ + struct dm_io_client *client; /* Client memory handler */ +}; + +/* + * For async io calls, users can alternatively use the dm_io() function below + * and dm_io_client_create() to create private mempools for the client. + * + * Create/destroy may block. + */ +struct dm_io_client *dm_io_client_create(unsigned num_pages); +int dm_io_client_resize(unsigned num_pages, struct dm_io_client *client); +void dm_io_client_destroy(struct dm_io_client *client); + +/* + * IO interface using private per-client pools. + * Each bit in the optional 'sync_error_bits' bitset indicates whether an + * error occurred doing io to the corresponding region. + */ +int dm_io(struct dm_io_request *io_req, unsigned num_regions, + struct dm_io_region *region, unsigned long *sync_error_bits); + +#endif /* __KERNEL__ */ +#endif /* _LINUX_DM_IO_H */ diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h new file mode 100644 index 0000000..5db2163 --- /dev/null +++ b/include/linux/dm-kcopyd.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2001 - 2003 Sistina Software + * Copyright (C) 2004 - 2008 Red Hat, Inc. All rights reserved. + * + * kcopyd provides a simple interface for copying an area of one + * block-device to one or more other block-devices, either synchronous + * or with an asynchronous completion notification. + * + * This file is released under the GPL. + */ + +#ifndef _LINUX_DM_KCOPYD_H +#define _LINUX_DM_KCOPYD_H + +#ifdef __KERNEL__ + +#include + +/* FIXME: make this configurable */ +#define DM_KCOPYD_MAX_REGIONS 8 + +#define DM_KCOPYD_IGNORE_ERROR 1 + +/* + * To use kcopyd you must first create a dm_kcopyd_client object. + */ +struct dm_kcopyd_client; +int dm_kcopyd_client_create(unsigned num_pages, + struct dm_kcopyd_client **result); +void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); + +/* + * Submit a copy job to kcopyd. This is built on top of the + * previous three fns. + * + * read_err is a boolean, + * write_err is a bitset, with 1 bit for each destination region + */ +typedef void (*dm_kcopyd_notify_fn)(int read_err, unsigned long write_err, + void *context); + +int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, + unsigned num_dests, struct dm_io_region *dests, + unsigned flags, dm_kcopyd_notify_fn fn, void *context); + +#endif /* __KERNEL__ */ +#endif /* _LINUX_DM_KCOPYD_H */ -- cgit v0.10.2 From a2aebe03be60ae4da03507a00d60211d5e0327c3 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 24 Apr 2008 22:10:42 +0100 Subject: dm raid1: use timer This patch replaces the schedule() in the main kmirrord thread with a timer. The schedule() could introduce an unwanted delay when work is ready to be processed. The code instead calls wake() when there's work to be done immediately, and delayed_wake() after a failure to give a short delay before retrying. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 6692e5a..3b9532f 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -154,6 +154,9 @@ struct mirror_set { struct workqueue_struct *kmirrord_wq; struct work_struct kmirrord_work; + struct timer_list timer; + unsigned long timer_pending; + struct work_struct trigger_event; unsigned int nr_mirrors; @@ -178,6 +181,25 @@ static void wake(struct mirror_set *ms) queue_work(ms->kmirrord_wq, &ms->kmirrord_work); } +static void delayed_wake_fn(unsigned long data) +{ + struct mirror_set *ms = (struct mirror_set *) data; + + clear_bit(0, &ms->timer_pending); + wake(ms); +} + +static void delayed_wake(struct mirror_set *ms) +{ + if (test_and_set_bit(0, &ms->timer_pending)) + return; + + ms->timer.expires = jiffies + HZ / 5; + ms->timer.data = (unsigned long) ms; + ms->timer.function = delayed_wake_fn; + add_timer(&ms->timer); +} + /* FIXME move this */ static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw); @@ -1180,6 +1202,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes) spin_lock_irq(&ms->lock); bio_list_merge(&ms->failures, &sync); spin_unlock_irq(&ms->lock); + wake(ms); } else while ((bio = bio_list_pop(&sync))) do_write(ms, bio); @@ -1239,7 +1262,7 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures) bio_list_merge(&ms->failures, failures); spin_unlock_irq(&ms->lock); - wake(ms); + delayed_wake(ms); } static void trigger_event(struct work_struct *work) @@ -1253,7 +1276,7 @@ static void trigger_event(struct work_struct *work) /*----------------------------------------------------------------- * kmirrord *---------------------------------------------------------------*/ -static int _do_mirror(struct work_struct *work) +static void do_mirror(struct work_struct *work) { struct mirror_set *ms =container_of(work, struct mirror_set, kmirrord_work); @@ -1274,24 +1297,6 @@ static int _do_mirror(struct work_struct *work) do_reads(ms, &reads); do_writes(ms, &writes); do_failures(ms, &failures); - - return (ms->failures.head) ? 1 : 0; -} - -static void do_mirror(struct work_struct *work) -{ - /* - * If _do_mirror returns 1, we give it - * another shot. This helps for cases like - * 'suspend' where we call flush_workqueue - * and expect all work to be finished. If - * a failure happens during a suspend, we - * couldn't issue a 'wake' because it would - * not be honored. Therefore, we return '1' - * from _do_mirror, and retry here. - */ - while (_do_mirror(work)) - schedule(); } @@ -1545,6 +1550,8 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err_free_context; } INIT_WORK(&ms->kmirrord_work, do_mirror); + init_timer(&ms->timer); + ms->timer_pending = 0; INIT_WORK(&ms->trigger_event, trigger_event); r = parse_features(ms, argc, argv, &args_used); @@ -1587,6 +1594,7 @@ static void mirror_dtr(struct dm_target *ti) { struct mirror_set *ms = (struct mirror_set *) ti->private; + del_timer_sync(&ms->timer); flush_workqueue(ms->kmirrord_wq); dm_kcopyd_client_destroy(ms->kcopyd_client); destroy_workqueue(ms->kmirrord_wq); -- cgit v0.10.2 From 7ff14a36159d947872870e7a3e9dcaebc46b23eb Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 24 Apr 2008 22:10:47 +0100 Subject: dm: unplug queues in threads Remove an avoidable 3ms delay on some dm-raid1 and kcopyd I/O. It is specified that any submitted bio without BIO_RW_SYNC flag may plug the queue (i.e. block the requests from being dispatched to the physical device). The queue is unplugged when the caller calls blk_unplug() function. Usually, the sequence is that someone calls submit_bh to submit IO on a buffer. The IO plugs the queue and waits (to be possibly joined with other adjacent bios). Then, when the caller calls wait_on_buffer(), it unplugs the queue and submits the IOs to the disk. This was happenning: When doing O_SYNC writes, function fsync_buffers_list() submits a list of bios to dm_raid1, the bios are added to dm_raid1 write queue and kmirrord is woken up. fsync_buffers_list() calls wait_on_buffer(). That unplugs the queue, but there are no bios on the device queue as they are still in the dm_raid1 queue. wait_on_buffer() starts waiting until the IO is finished. kmirrord is scheduled, kmirrord takes bios and submits them to the devices. The submitted bio plugs the harddisk queue but there is no one to unplug it. (The process that called wait_on_buffer() is already sleeping.) So there is a 3ms timeout, after which the queues on the harddisks are unplugged and requests are processed. This 3ms timeout meant that in certain workloads (e.g. O_SYNC, 8kb writes), dm-raid1 is 10 times slower than md raid1. Every time we submit something asynchronously via dm_io, we must unplug the queue actually to send the request to the device. This patch adds an unplug call to kmirrord - while processing requests, it keeps the queue plugged (so that adjacent bios can be merged); when it finishes processing all the bios, it unplugs the queue to submit the bios. It also fixes kcopyd which has the same potential problem. All kcopyd requests are submitted with BIO_RW_SYNC. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon Acked-by: Jens Axboe diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index ed9c86c..4789c42 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -353,7 +353,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, { struct io io; - if (num_regions > 1 && rw != WRITE) { + if (num_regions > 1 && (rw & RW_MASK) != WRITE) { WARN_ON(1); return -EIO; } @@ -390,7 +390,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, { struct io *io; - if (num_regions > 1 && rw != WRITE) { + if (num_regions > 1 && (rw & RW_MASK) != WRITE) { WARN_ON(1); fn(1, context); return -EIO; @@ -436,7 +436,12 @@ static int dp_init(struct dm_io_request *io_req, struct dpages *dp) } /* - * New collapsed (a)synchronous interface + * New collapsed (a)synchronous interface. + * + * If the IO is asynchronous (i.e. it has notify.fn), you must either unplug + * the queue with blk_unplug() some time later or set the BIO_RW_SYNC bit in + * io_req->bi_rw. If you fail to do one of these, the IO will be submitted to + * the disk after q->unplug_delay, which defaults to 3ms in blk-settings.c. */ int dm_io(struct dm_io_request *io_req, unsigned num_regions, struct dm_io_region *where, unsigned long *sync_error_bits) diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index ee9583b..996802b 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -332,7 +332,7 @@ static int run_io_job(struct kcopyd_job *job) { int r; struct dm_io_request io_req = { - .bi_rw = job->rw, + .bi_rw = job->rw | (1 << BIO_RW_SYNC), .mem.type = DM_IO_PAGE_LIST, .mem.ptr.pl = job->pages, .mem.offset = job->offset, diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 3b9532f..ff05fe8 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1297,6 +1297,8 @@ static void do_mirror(struct work_struct *work) do_reads(ms, &reads); do_writes(ms, &writes); do_failures(ms, &failures); + + dm_table_unplug_all(ms->ti->table); } -- cgit v0.10.2 From e8488d08586e6df7fab3db7881631bb13619311b Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 24 Apr 2008 22:10:51 +0100 Subject: dm table: drop void suspend_targets return void returning functions returned the return value of another void returning function... Spotted by sparse. Signed-off-by: Adrian Bunk Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e75b143..fc261c8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -954,7 +954,7 @@ void dm_table_presuspend_targets(struct dm_table *t) if (!t) return; - return suspend_targets(t, 0); + suspend_targets(t, 0); } void dm_table_postsuspend_targets(struct dm_table *t) @@ -962,7 +962,7 @@ void dm_table_postsuspend_targets(struct dm_table *t) if (!t) return; - return suspend_targets(t, 1); + suspend_targets(t, 1); } int dm_table_resume_targets(struct dm_table *t) -- cgit v0.10.2 From 4fdfe401e9d7e30029972d568c667234c0c1d828 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 24 Apr 2008 22:10:56 +0100 Subject: dm table: remove unused dm_create_error_table dm_create_error_table() was added in kernel 2.6.18 and never used... Signed-off-by: Adrian Bunk Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index fc261c8..51be533 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -245,44 +245,6 @@ int dm_table_create(struct dm_table **result, int mode, return 0; } -int dm_create_error_table(struct dm_table **result, struct mapped_device *md) -{ - struct dm_table *t; - sector_t dev_size = 1; - int r; - - /* - * Find current size of device. - * Default to 1 sector if inactive. - */ - t = dm_get_table(md); - if (t) { - dev_size = dm_table_get_size(t); - dm_table_put(t); - } - - r = dm_table_create(&t, FMODE_READ, 1, md); - if (r) - return r; - - r = dm_table_add_target(t, "error", 0, dev_size, NULL); - if (r) - goto out; - - r = dm_table_complete(t); - if (r) - goto out; - - *result = t; - -out: - if (r) - dm_table_put(t); - - return r; -} -EXPORT_SYMBOL_GPL(dm_create_error_table); - static void free_devices(struct list_head *devices) { struct list_head *tmp, *next; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a68829e..ad3b787 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -252,12 +252,6 @@ void dm_table_event(struct dm_table *t); */ int dm_swap_table(struct mapped_device *md, struct dm_table *t); -/* - * Prepare a table for a device that will error all I/O. - * To make it active, call dm_suspend(), dm_swap_table() then dm_resume(). - */ -int dm_create_error_table(struct dm_table **result, struct mapped_device *md); - /*----------------------------------------------------------------- * Macros. *---------------------------------------------------------------*/ -- cgit v0.10.2 From cf13ab8e02d452e2236d0b5fda9972b3b7f503cb Mon Sep 17 00:00:00 2001 From: Frederik Deweerdt Date: Thu, 24 Apr 2008 22:10:59 +0100 Subject: dm: remove md argument from specific_minor The small patch below: - Removes the unused md argument from both specific_minor() and next_free_minor() - Folds kmalloc + memset(0) into a single kzalloc call in alloc_dev() This has been compile tested on x86. Signed-off-by: Frederik Deweerdt Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 11f4ffe..372369b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -924,7 +924,7 @@ static void free_minor(int minor) /* * See if the device with a specific minor # is free. */ -static int specific_minor(struct mapped_device *md, int minor) +static int specific_minor(int minor) { int r, m; @@ -957,7 +957,7 @@ out: return r; } -static int next_free_minor(struct mapped_device *md, int *minor) +static int next_free_minor(int *minor) { int r, m; @@ -968,9 +968,8 @@ static int next_free_minor(struct mapped_device *md, int *minor) spin_lock(&_minor_lock); r = idr_get_new(&_minor_idr, MINOR_ALLOCED, &m); - if (r) { + if (r) goto out; - } if (m >= (1 << MINORBITS)) { idr_remove(&_minor_idr, m); @@ -993,7 +992,7 @@ static struct block_device_operations dm_blk_dops; static struct mapped_device *alloc_dev(int minor) { int r; - struct mapped_device *md = kmalloc(sizeof(*md), GFP_KERNEL); + struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL); void *old_md; if (!md) { @@ -1006,13 +1005,12 @@ static struct mapped_device *alloc_dev(int minor) /* get a minor number for the dev */ if (minor == DM_ANY_MINOR) - r = next_free_minor(md, &minor); + r = next_free_minor(&minor); else - r = specific_minor(md, minor); + r = specific_minor(minor); if (r < 0) goto bad_minor; - memset(md, 0, sizeof(*md)); init_rwsem(&md->io_lock); mutex_init(&md->suspend_lock); spin_lock_init(&md->pushback_lock); -- cgit v0.10.2 From e3dcc5a387fc38e9c3c6c4f857cd9a7f71a8553a Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Thu, 24 Apr 2008 22:11:03 +0100 Subject: dm crypt: add documentation Add description of dm-crypt to device-mapper documentation. Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon diff --git a/Documentation/device-mapper/dm-crypt.txt b/Documentation/device-mapper/dm-crypt.txt new file mode 100644 index 0000000..6680cab --- /dev/null +++ b/Documentation/device-mapper/dm-crypt.txt @@ -0,0 +1,52 @@ +dm-crypt +========= + +Device-Mapper's "crypt" target provides transparent encryption of block devices +using the kernel crypto API. + +Parameters: + + + Encryption cipher and an optional IV generation mode. + (In format cipher-chainmode-ivopts:ivmode). + Examples: + des + aes-cbc-essiv:sha256 + twofish-ecb + + /proc/crypto contains supported crypto modes + + + Key used for encryption. It is encoded as a hexadecimal number. + You can only use key sizes that are valid for the selected cipher. + + + The IV offset is a sector count that is added to the sector number + before creating the IV. + + + This is the device that is going to be used as backend and contains the + encrypted data. You can specify it as a path like /dev/xxx or a device + number :. + + + Starting sector within the device where the encrypted data begins. + +Example scripts +=============== +LUKS (Linux Unified Key Setup) is now the preferred way to set up disk +encryption with dm-crypt using the 'cryptsetup' utility, see +http://luks.endorphin.org/ + +[[ +#!/bin/sh +# Create a crypt device using dmsetup +dmsetup create crypt1 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0" +]] + +[[ +#!/bin/sh +# Create a crypt device using cryptsetup and LUKS header with default cipher +cryptsetup luksFormat $1 +cryptsetup luksOpen $1 crypt1 +]] -- cgit v0.10.2