From 550929faf89e2e2cdb3e9945ea87d383989274cf Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:30 +0000 Subject: dm persistent data: rename node to btree_node This patch fixes a compilation failure on sparc32 by renaming struct node. struct node is already defined in include/linux/node.h. On sparc32, it happens to be included through other dependencies and persistent-data doesn't compile because of conflicting declarations. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/persistent-data/dm-btree-internal.h b/drivers/md/persistent-data/dm-btree-internal.h index 5709bfe..accbb05 100644 --- a/drivers/md/persistent-data/dm-btree-internal.h +++ b/drivers/md/persistent-data/dm-btree-internal.h @@ -36,13 +36,13 @@ struct node_header { __le32 padding; } __packed; -struct node { +struct btree_node { struct node_header header; __le64 keys[0]; } __packed; -void inc_children(struct dm_transaction_manager *tm, struct node *n, +void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt); int new_block(struct dm_btree_info *info, struct dm_block **result); @@ -64,7 +64,7 @@ struct ro_spine { void init_ro_spine(struct ro_spine *s, struct dm_btree_info *info); int exit_ro_spine(struct ro_spine *s); int ro_step(struct ro_spine *s, dm_block_t new_child); -struct node *ro_node(struct ro_spine *s); +struct btree_node *ro_node(struct ro_spine *s); struct shadow_spine { struct dm_btree_info *info; @@ -98,17 +98,17 @@ int shadow_root(struct shadow_spine *s); /* * Some inlines. */ -static inline __le64 *key_ptr(struct node *n, uint32_t index) +static inline __le64 *key_ptr(struct btree_node *n, uint32_t index) { return n->keys + index; } -static inline void *value_base(struct node *n) +static inline void *value_base(struct btree_node *n) { return &n->keys[le32_to_cpu(n->header.max_entries)]; } -static inline void *value_ptr(struct node *n, uint32_t index) +static inline void *value_ptr(struct btree_node *n, uint32_t index) { uint32_t value_size = le32_to_cpu(n->header.value_size); return value_base(n) + (value_size * index); @@ -117,7 +117,7 @@ static inline void *value_ptr(struct node *n, uint32_t index) /* * Assumes the values are suitably-aligned and converts to core format. */ -static inline uint64_t value64(struct node *n, uint32_t index) +static inline uint64_t value64(struct btree_node *n, uint32_t index) { __le64 *values_le = value_base(n); @@ -127,7 +127,7 @@ static inline uint64_t value64(struct node *n, uint32_t index) /* * Searching for a key within a single node. */ -int lower_bound(struct node *n, uint64_t key); +int lower_bound(struct btree_node *n, uint64_t key); extern struct dm_block_validator btree_node_validator; diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index aa71e23..c4f2813 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -53,7 +53,7 @@ /* * Some little utilities for moving node data around. */ -static void node_shift(struct node *n, int shift) +static void node_shift(struct btree_node *n, int shift) { uint32_t nr_entries = le32_to_cpu(n->header.nr_entries); uint32_t value_size = le32_to_cpu(n->header.value_size); @@ -79,7 +79,7 @@ static void node_shift(struct node *n, int shift) } } -static void node_copy(struct node *left, struct node *right, int shift) +static void node_copy(struct btree_node *left, struct btree_node *right, int shift) { uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t value_size = le32_to_cpu(left->header.value_size); @@ -108,7 +108,7 @@ static void node_copy(struct node *left, struct node *right, int shift) /* * Delete a specific entry from a leaf node. */ -static void delete_at(struct node *n, unsigned index) +static void delete_at(struct btree_node *n, unsigned index) { unsigned nr_entries = le32_to_cpu(n->header.nr_entries); unsigned nr_to_copy = nr_entries - (index + 1); @@ -128,7 +128,7 @@ static void delete_at(struct node *n, unsigned index) n->header.nr_entries = cpu_to_le32(nr_entries - 1); } -static unsigned merge_threshold(struct node *n) +static unsigned merge_threshold(struct btree_node *n) { return le32_to_cpu(n->header.max_entries) / 3; } @@ -136,7 +136,7 @@ static unsigned merge_threshold(struct node *n) struct child { unsigned index; struct dm_block *block; - struct node *n; + struct btree_node *n; }; static struct dm_btree_value_type le64_type = { @@ -147,7 +147,7 @@ static struct dm_btree_value_type le64_type = { .equal = NULL }; -static int init_child(struct dm_btree_info *info, struct node *parent, +static int init_child(struct dm_btree_info *info, struct btree_node *parent, unsigned index, struct child *result) { int r, inc; @@ -177,7 +177,7 @@ static int exit_child(struct dm_btree_info *info, struct child *c) return dm_tm_unlock(info->tm, c->block); } -static void shift(struct node *left, struct node *right, int count) +static void shift(struct btree_node *left, struct btree_node *right, int count) { uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); @@ -203,11 +203,11 @@ static void shift(struct node *left, struct node *right, int count) right->header.nr_entries = cpu_to_le32(nr_right + count); } -static void __rebalance2(struct dm_btree_info *info, struct node *parent, +static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *r) { - struct node *left = l->n; - struct node *right = r->n; + struct btree_node *left = l->n; + struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_right = le32_to_cpu(right->header.nr_entries); unsigned threshold = 2 * merge_threshold(left) + 1; @@ -239,7 +239,7 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, unsigned left_index) { int r; - struct node *parent; + struct btree_node *parent; struct child left, right; parent = dm_block_data(shadow_current(s)); @@ -270,9 +270,9 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, * in right, then rebalance2. This wastes some cpu, but I want something * simple atm. */ -static void delete_center_node(struct dm_btree_info *info, struct node *parent, +static void delete_center_node(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r, - struct node *left, struct node *center, struct node *right, + struct btree_node *left, struct btree_node *center, struct btree_node *right, uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) { uint32_t max_entries = le32_to_cpu(left->header.max_entries); @@ -301,9 +301,9 @@ static void delete_center_node(struct dm_btree_info *info, struct node *parent, /* * Redistributes entries among 3 sibling nodes. */ -static void redistribute3(struct dm_btree_info *info, struct node *parent, +static void redistribute3(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r, - struct node *left, struct node *center, struct node *right, + struct btree_node *left, struct btree_node *center, struct btree_node *right, uint32_t nr_left, uint32_t nr_center, uint32_t nr_right) { int s; @@ -343,12 +343,12 @@ static void redistribute3(struct dm_btree_info *info, struct node *parent, *key_ptr(parent, r->index) = right->keys[0]; } -static void __rebalance3(struct dm_btree_info *info, struct node *parent, +static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent, struct child *l, struct child *c, struct child *r) { - struct node *left = l->n; - struct node *center = c->n; - struct node *right = r->n; + struct btree_node *left = l->n; + struct btree_node *center = c->n; + struct btree_node *right = r->n; uint32_t nr_left = le32_to_cpu(left->header.nr_entries); uint32_t nr_center = le32_to_cpu(center->header.nr_entries); @@ -371,7 +371,7 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, unsigned left_index) { int r; - struct node *parent = dm_block_data(shadow_current(s)); + struct btree_node *parent = dm_block_data(shadow_current(s)); struct child left, center, right; /* @@ -421,7 +421,7 @@ static int get_nr_entries(struct dm_transaction_manager *tm, { int r; struct dm_block *block; - struct node *n; + struct btree_node *n; r = dm_tm_read_lock(tm, b, &btree_node_validator, &block); if (r) @@ -438,7 +438,7 @@ static int rebalance_children(struct shadow_spine *s, { int i, r, has_left_sibling, has_right_sibling; uint32_t child_entries; - struct node *n; + struct btree_node *n; n = dm_block_data(shadow_current(s)); @@ -483,7 +483,7 @@ static int rebalance_children(struct shadow_spine *s, return r; } -static int do_leaf(struct node *n, uint64_t key, unsigned *index) +static int do_leaf(struct btree_node *n, uint64_t key, unsigned *index) { int i = lower_bound(n, key); @@ -506,7 +506,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, uint64_t key, unsigned *index) { int i = *index, r; - struct node *n; + struct btree_node *n; for (;;) { r = shadow_step(s, root, vt); @@ -556,7 +556,7 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, unsigned level, last_level = info->levels - 1; int index = 0, r = 0; struct shadow_spine spine; - struct node *n; + struct btree_node *n; init_shadow_spine(&spine, info); for (level = 0; level < info->levels; level++) { diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index d9a7912..2f0805c 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -23,7 +23,7 @@ static void node_prepare_for_write(struct dm_block_validator *v, struct dm_block *b, size_t block_size) { - struct node *n = dm_block_data(b); + struct btree_node *n = dm_block_data(b); struct node_header *h = &n->header; h->blocknr = cpu_to_le64(dm_block_location(b)); @@ -38,7 +38,7 @@ static int node_check(struct dm_block_validator *v, struct dm_block *b, size_t block_size) { - struct node *n = dm_block_data(b); + struct btree_node *n = dm_block_data(b); struct node_header *h = &n->header; size_t value_size; __le32 csum_disk; @@ -164,7 +164,7 @@ int ro_step(struct ro_spine *s, dm_block_t new_child) return r; } -struct node *ro_node(struct ro_spine *s) +struct btree_node *ro_node(struct ro_spine *s) { struct dm_block *block; diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index d12b2cc..371f3d4 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -38,7 +38,7 @@ static void array_insert(void *base, size_t elt_size, unsigned nr_elts, /*----------------------------------------------------------------*/ /* makes the assumption that no two keys are the same. */ -static int bsearch(struct node *n, uint64_t key, int want_hi) +static int bsearch(struct btree_node *n, uint64_t key, int want_hi) { int lo = -1, hi = le32_to_cpu(n->header.nr_entries); @@ -58,12 +58,12 @@ static int bsearch(struct node *n, uint64_t key, int want_hi) return want_hi ? hi : lo; } -int lower_bound(struct node *n, uint64_t key) +int lower_bound(struct btree_node *n, uint64_t key) { return bsearch(n, key, 0); } -void inc_children(struct dm_transaction_manager *tm, struct node *n, +void inc_children(struct dm_transaction_manager *tm, struct btree_node *n, struct dm_btree_value_type *vt) { unsigned i; @@ -77,7 +77,7 @@ void inc_children(struct dm_transaction_manager *tm, struct node *n, vt->inc(vt->context, value_ptr(n, i)); } -static int insert_at(size_t value_size, struct node *node, unsigned index, +static int insert_at(size_t value_size, struct btree_node *node, unsigned index, uint64_t key, void *value) __dm_written_to_disk(value) { @@ -122,7 +122,7 @@ int dm_btree_empty(struct dm_btree_info *info, dm_block_t *root) { int r; struct dm_block *b; - struct node *n; + struct btree_node *n; size_t block_size; uint32_t max_entries; @@ -154,7 +154,7 @@ EXPORT_SYMBOL_GPL(dm_btree_empty); #define MAX_SPINE_DEPTH 64 struct frame { struct dm_block *b; - struct node *n; + struct btree_node *n; unsigned level; unsigned nr_children; unsigned current_child; @@ -295,7 +295,7 @@ EXPORT_SYMBOL_GPL(dm_btree_del); /*----------------------------------------------------------------*/ static int btree_lookup_raw(struct ro_spine *s, dm_block_t block, uint64_t key, - int (*search_fn)(struct node *, uint64_t), + int (*search_fn)(struct btree_node *, uint64_t), uint64_t *result_key, void *v, size_t value_size) { int i, r; @@ -406,7 +406,7 @@ static int btree_split_sibling(struct shadow_spine *s, dm_block_t root, size_t size; unsigned nr_left, nr_right; struct dm_block *left, *right, *parent; - struct node *ln, *rn, *pn; + struct btree_node *ln, *rn, *pn; __le64 location; left = shadow_current(s); @@ -491,7 +491,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key) size_t size; unsigned nr_left, nr_right; struct dm_block *left, *right, *new_parent; - struct node *pn, *ln, *rn; + struct btree_node *pn, *ln, *rn; __le64 val; new_parent = shadow_current(s); @@ -576,7 +576,7 @@ static int btree_insert_raw(struct shadow_spine *s, dm_block_t root, uint64_t key, unsigned *index) { int r, i = *index, top = 1; - struct node *node; + struct btree_node *node; for (;;) { r = shadow_step(s, root, vt); @@ -643,7 +643,7 @@ static int insert(struct dm_btree_info *info, dm_block_t root, unsigned level, index = -1, last_level = info->levels - 1; dm_block_t block = root; struct shadow_spine spine; - struct node *n; + struct btree_node *n; struct dm_btree_value_type le64_type; le64_type.context = NULL; -- cgit v0.10.2 From e910d7ebecd1aac43125944a8641b6cb1a0dfabe Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 21 Dec 2012 20:23:30 +0000 Subject: dm ioctl: prevent unsafe change to dm_ioctl data_size Abort dm ioctl processing if userspace changes the data_size parameter after we validated it but before we finished copying the data buffer from userspace. The dm ioctl parameters are processed in the following sequence: 1. ctl_ioctl() calls copy_params(); 2. copy_params() makes a first copy of the fixed-sized portion of the userspace parameters into the local variable "tmp"; 3. copy_params() then validates tmp.data_size and allocates a new structure big enough to hold the complete data and copies the whole userspace buffer there; 4. ctl_ioctl() reads userspace data the second time and copies the whole buffer into the pointer "param"; 5. ctl_ioctl() reads param->data_size without any validation and stores it in the variable "input_param_size"; 6. "input_param_size" is further used as the authoritative size of the kernel buffer. The problem is that userspace code could change the contents of user memory between steps 2 and 4. In particular, the data_size parameter can be changed to an invalid value after the kernel has validated it. This lets userspace force the kernel to access invalid kernel memory. The fix is to ensure that the size has not changed at step 4. This patch shouldn't have a security impact because CAP_SYS_ADMIN is required to run this code, but it should be fixed anyway. Reported-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon Cc: stable@kernel.org diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index afd9598..a651d52 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1566,6 +1566,14 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) if (copy_from_user(dmi, user, tmp.data_size)) goto bad; + /* + * Abort if something changed the ioctl data while it was being copied. + */ + if (dmi->data_size != tmp.data_size) { + DMERR("rejecting ioctl: data size modified while processing parameters"); + goto bad; + } + /* Wipe the user buffer so we do not return it to userspace */ if (secure_data && clear_user(user, tmp.data_size)) goto bad; -- cgit v0.10.2 From c1a94672a830e01d58c7c7e8de530c3f136d6ff2 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 21 Dec 2012 20:23:30 +0000 Subject: dm: disable WRITE SAME WRITE SAME bios are not yet handled correctly by device-mapper so disable their use on device-mapper devices by setting max_write_same_sectors to zero. As an example, a ciphertext device is incompatible because the data gets changed according to the location at which it written and so the dm crypt target cannot support it. Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org Cc: Milan Broz Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 100368e..fa29557 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1445,6 +1445,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); + q->limits.max_write_same_sectors = 0; + dm_table_set_integrity(t); /* -- cgit v0.10.2 From b7ca9c9273e5eebd63880dd8a6e4e5c18fc7901d Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 21 Dec 2012 20:23:31 +0000 Subject: dm thin: replace dm_cell_release_singleton with cell_defer_except Change existing users of the function dm_cell_release_singleton to share cell_defer_except instead, and then remove the now-unused function. Everywhere that calls dm_cell_release_singleton, the bio in question is the holder of the cell. If there are no non-holder entries in the cell then cell_defer_except behaves exactly like dm_cell_release_singleton. Conversely, if there *are* non-holder entries then dm_cell_release_singleton must not be used because those entries would need to be deferred. Consequently, it is safe to replace use of dm_cell_release_singleton with cell_defer_except. This patch is a pre-requisite for "dm thin: fix race between simultaneous io and discards to same block". Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index e4e8415..aefb78e 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -208,31 +208,6 @@ void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) EXPORT_SYMBOL_GPL(dm_cell_release); /* - * There are a couple of places where we put a bio into a cell briefly - * before taking it out again. In these situations we know that no other - * bio may be in the cell. This function releases the cell, and also does - * a sanity check. - */ -static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - BUG_ON(cell->holder != bio); - BUG_ON(!bio_list_empty(&cell->bios)); - - __cell_release(cell, NULL); -} - -void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - unsigned long flags; - struct dm_bio_prison *prison = cell->prison; - - spin_lock_irqsave(&prison->lock, flags); - __cell_release_singleton(cell, bio); - spin_unlock_irqrestore(&prison->lock, flags); -} -EXPORT_SYMBOL_GPL(dm_cell_release_singleton); - -/* * Sometimes we don't want the holder, just the additional bios. */ static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h index 4e0ac376..53d1a7a 100644 --- a/drivers/md/dm-bio-prison.h +++ b/drivers/md/dm-bio-prison.h @@ -44,7 +44,6 @@ int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, struct bio *inmate, struct dm_bio_prison_cell **ref); void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios); -void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio); // FIXME: bio arg not needed void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates); void dm_cell_error(struct dm_bio_prison_cell *cell); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 058acf3..25dfd23 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -513,8 +513,7 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, } /* - * Same as cell_defer above, except it omits one particular detainee, - * a write bio that covers the block and has already been processed. + * Same as cell_defer except it omits the original holder of the cell. */ static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell) { @@ -936,7 +935,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) */ build_data_key(tc->td, lookup_result.block, &key2); if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) { - dm_cell_release_singleton(cell, bio); + cell_defer_except(tc, cell); break; } @@ -967,8 +966,8 @@ static void process_discard(struct thin_c *tc, struct bio *bio) * a block boundary. So we submit the discard of a * partial block appropriately. */ - dm_cell_release_singleton(cell, bio); - dm_cell_release_singleton(cell2, bio); + cell_defer_except(tc, cell); + cell_defer_except(tc, cell2); if ((!lookup_result.shared) && pool->pf.discard_passdown) remap_and_issue(tc, bio, lookup_result.block); else @@ -980,13 +979,13 @@ static void process_discard(struct thin_c *tc, struct bio *bio) /* * It isn't provisioned, just forget it. */ - dm_cell_release_singleton(cell, bio); + cell_defer_except(tc, cell); bio_endio(bio, 0); break; default: DMERR("discard: find block unexpectedly returned %d", r); - dm_cell_release_singleton(cell, bio); + cell_defer_except(tc, cell); bio_io_error(bio); break; } @@ -1041,7 +1040,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); - dm_cell_release_singleton(cell, bio); + cell_defer_except(tc, cell); remap_and_issue(tc, bio, lookup_result->block); } } @@ -1056,7 +1055,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block * Remap empty bios (flushes) immediately, without provisioning. */ if (!bio->bi_size) { - dm_cell_release_singleton(cell, bio); + cell_defer_except(tc, cell); remap_and_issue(tc, bio, 0); return; } @@ -1066,7 +1065,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block */ if (bio_data_dir(bio) == READ) { zero_fill_bio(bio); - dm_cell_release_singleton(cell, bio); + cell_defer_except(tc, cell); bio_endio(bio, 0); return; } @@ -1120,7 +1119,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) * TODO: this will probably have to change when discard goes * back in. */ - dm_cell_release_singleton(cell, bio); + cell_defer_except(tc, cell); if (lookup_result.shared) process_shared_bio(tc, bio, block, &lookup_result); @@ -1130,7 +1129,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) case -ENODATA: if (bio_data_dir(bio) == READ && tc->origin_dev) { - dm_cell_release_singleton(cell, bio); + cell_defer_except(tc, cell); remap_to_origin_and_issue(tc, bio); } else provision_block(tc, bio, block, cell); @@ -1138,7 +1137,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) default: DMERR("dm_thin_find_block() failed, error = %d", r); - dm_cell_release_singleton(cell, bio); + cell_defer_except(tc, cell); bio_io_error(bio); break; } -- cgit v0.10.2 From e8088073c9610af017fd47fddd104a2c3afb32e8 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 21 Dec 2012 20:23:31 +0000 Subject: dm thin: fix race between simultaneous io and discards to same block There is a race when discard bios and non-discard bios are issued simultaneously to the same block. Discard support is expensive for all thin devices precisely because you have to be careful to quiesce the area you're discarding. DM thin must handle this conflicting IO pattern (simultaneous non-discard vs discard) even though a sane application shouldn't be issuing such IO. The race manifests as follows: 1. A non-discard bio is mapped in thin_bio_map. This doesn't lock out parallel activity to the same block. 2. A discard bio is issued to the same block as the non-discard bio. 3. The discard bio is locked in a dm_bio_prison_cell in process_discard to lock out parallel activity against the same block. 4. The non-discard bio's mapping continues and its all_io_entry is incremented so the bio is accounted for in the thin pool's all_io_ds which is a dm_deferred_set used to track time locality of non-discard IO. 5. The non-discard bio is finally locked in a dm_bio_prison_cell in process_bio. The race can result in deadlock, leaving the block layer hanging waiting for completion of a discard bio that never completes, e.g.: INFO: task ruby:15354 blocked for more than 120 seconds. "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. ruby D ffffffff8160f0e0 0 15354 15314 0x00000000 ffff8802fb08bc58 0000000000000082 ffff8802fb08bfd8 0000000000012900 ffff8802fb08a010 0000000000012900 0000000000012900 0000000000012900 ffff8802fb08bfd8 0000000000012900 ffff8803324b9480 ffff88032c6f14c0 Call Trace: [] schedule+0x29/0x70 [] schedule_timeout+0x195/0x220 [] ? _dm_request+0x111/0x160 [dm_mod] [] wait_for_common+0x11e/0x190 [] ? try_to_wake_up+0x2b0/0x2b0 [] wait_for_completion+0x1d/0x20 [] blkdev_issue_discard+0x219/0x260 [] blkdev_ioctl+0x6e9/0x7b0 [] block_ioctl+0x3c/0x40 [] do_vfs_ioctl+0x8c/0x340 [] ? block_llseek+0x67/0xb0 [] sys_ioctl+0xa1/0xb0 [] ? sys_rt_sigprocmask+0x86/0xd0 [] system_call_fastpath+0x16/0x1b The thinp-test-suite's test_discard_random_sectors reliably hits this deadlock on fast SSD storage. The fix for this race is that the all_io_entry for a bio must be incremented whilst the dm_bio_prison_cell is held for the bio's associated virtual and physical blocks. That cell locking wasn't occurring early enough in thin_bio_map. This patch fixes this. Care is taken to always call the new function inc_all_io_entry() with the relevant cells locked, but they are generally unlocked before calling issue() to try to avoid holding the cells locked across generic_submit_request. Also, now that thin_bio_map may lock bios in a cell, process_bio() is no longer the only thread that will do so. Because of this we must be sure to use cell_defer_except() to release all non-holder entries, that were added by the other thread, because they must be deferred. This patch depends on "dm thin: replace dm_cell_release_singleton with cell_defer_except". Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon Cc: stable@vger.kernel.org diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 25dfd23..41c9e81 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -368,6 +368,17 @@ static int bio_triggers_commit(struct thin_c *tc, struct bio *bio) dm_thin_changed_this_transaction(tc->td); } +static void inc_all_io_entry(struct pool *pool, struct bio *bio) +{ + struct dm_thin_endio_hook *h; + + if (bio->bi_rw & REQ_DISCARD) + return; + + h = dm_get_mapinfo(bio)->ptr; + h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds); +} + static void issue(struct thin_c *tc, struct bio *bio) { struct pool *pool = tc->pool; @@ -596,13 +607,15 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) { struct thin_c *tc = m->tc; + inc_all_io_entry(tc->pool, m->bio); + cell_defer_except(tc, m->cell); + cell_defer_except(tc, m->cell2); + if (m->pass_discard) remap_and_issue(tc, m->bio, m->data_block); else bio_endio(m->bio, 0); - cell_defer_except(tc, m->cell); - cell_defer_except(tc, m->cell2); mempool_free(m, tc->pool->mapping_pool); } @@ -710,6 +723,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, h->overwrite_mapping = m; m->bio = bio; save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); + inc_all_io_entry(pool, bio); remap_and_issue(tc, bio, data_dest); } else { struct dm_io_region from, to; @@ -779,6 +793,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, h->overwrite_mapping = m; m->bio = bio; save_and_set_endio(bio, &m->saved_bi_end_io, overwrite_endio); + inc_all_io_entry(pool, bio); remap_and_issue(tc, bio, data_block); } else { int r; @@ -961,13 +976,15 @@ static void process_discard(struct thin_c *tc, struct bio *bio) wake_worker(pool); } } else { + inc_all_io_entry(pool, bio); + cell_defer_except(tc, cell); + cell_defer_except(tc, cell2); + /* * The DM core makes sure that the discard doesn't span * a block boundary. So we submit the discard of a * partial block appropriately. */ - cell_defer_except(tc, cell); - cell_defer_except(tc, cell2); if ((!lookup_result.shared) && pool->pf.discard_passdown) remap_and_issue(tc, bio, lookup_result.block); else @@ -1039,8 +1056,9 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); - + inc_all_io_entry(pool, bio); cell_defer_except(tc, cell); + remap_and_issue(tc, bio, lookup_result->block); } } @@ -1055,7 +1073,9 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block * Remap empty bios (flushes) immediately, without provisioning. */ if (!bio->bi_size) { + inc_all_io_entry(tc->pool, bio); cell_defer_except(tc, cell); + remap_and_issue(tc, bio, 0); return; } @@ -1110,26 +1130,22 @@ static void process_bio(struct thin_c *tc, struct bio *bio) r = dm_thin_find_block(tc->td, block, 1, &lookup_result); switch (r) { case 0: - /* - * We can release this cell now. This thread is the only - * one that puts bios into a cell, and we know there were - * no preceding bios. - */ - /* - * TODO: this will probably have to change when discard goes - * back in. - */ - cell_defer_except(tc, cell); - - if (lookup_result.shared) + if (lookup_result.shared) { process_shared_bio(tc, bio, block, &lookup_result); - else + cell_defer_except(tc, cell); + } else { + inc_all_io_entry(tc->pool, bio); + cell_defer_except(tc, cell); + remap_and_issue(tc, bio, lookup_result.block); + } break; case -ENODATA: if (bio_data_dir(bio) == READ && tc->origin_dev) { + inc_all_io_entry(tc->pool, bio); cell_defer_except(tc, cell); + remap_to_origin_and_issue(tc, bio); } else provision_block(tc, bio, block, cell); @@ -1155,8 +1171,10 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) case 0: if (lookup_result.shared && (rw == WRITE) && bio->bi_size) bio_io_error(bio); - else + else { + inc_all_io_entry(tc->pool, bio); remap_and_issue(tc, bio, lookup_result.block); + } break; case -ENODATA: @@ -1166,6 +1184,7 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) } if (tc->origin_dev) { + inc_all_io_entry(tc->pool, bio); remap_to_origin_and_issue(tc, bio); break; } @@ -1346,7 +1365,7 @@ static struct dm_thin_endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *b h->tc = tc; h->shared_read_entry = NULL; - h->all_io_entry = bio->bi_rw & REQ_DISCARD ? NULL : dm_deferred_entry_inc(pool->all_io_ds); + h->all_io_entry = NULL; h->overwrite_mapping = NULL; return h; @@ -1363,6 +1382,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, dm_block_t block = get_bio_block(tc, bio); struct dm_thin_device *td = tc->td; struct dm_thin_lookup_result result; + struct dm_bio_prison_cell *cell1, *cell2; + struct dm_cell_key key; map_context->ptr = thin_hook_bio(tc, bio); @@ -1399,12 +1420,25 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * shared flag will be set in their case. */ thin_defer_bio(tc, bio); - r = DM_MAPIO_SUBMITTED; - } else { - remap(tc, bio, result.block); - r = DM_MAPIO_REMAPPED; + return DM_MAPIO_SUBMITTED; } - break; + + build_virtual_key(tc->td, block, &key); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell1)) + return DM_MAPIO_SUBMITTED; + + build_data_key(tc->td, result.block, &key); + if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) { + cell_defer_except(tc, cell1); + return DM_MAPIO_SUBMITTED; + } + + inc_all_io_entry(tc->pool, bio); + cell_defer_except(tc, cell2); + cell_defer_except(tc, cell1); + + remap(tc, bio, result.block); + return DM_MAPIO_REMAPPED; case -ENODATA: if (get_pool_mode(tc->pool) == PM_READ_ONLY) { -- cgit v0.10.2 From 563af186df08002d2600c4e718ad8f3bde109f53 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 21 Dec 2012 20:23:31 +0000 Subject: dm thin: wake worker when discard is prepared When discards are prepared it is best to directly wake the worker that will process them. The worker will be woken anyway, via periodic commit, but there is no reason to not wake_worker here. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 41c9e81..aeb9e20 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2676,10 +2676,13 @@ static int thin_endio(struct dm_target *ti, if (h->all_io_entry) { INIT_LIST_HEAD(&work); dm_deferred_entry_dec(h->all_io_entry, &work); - spin_lock_irqsave(&pool->lock, flags); - list_for_each_entry_safe(m, tmp, &work, list) - list_add(&m->list, &pool->prepared_discards); - spin_unlock_irqrestore(&pool->lock, flags); + if (!list_empty(&work)) { + spin_lock_irqsave(&pool->lock, flags); + list_for_each_entry_safe(m, tmp, &work, list) + list_add(&m->list, &pool->prepared_discards); + spin_unlock_irqrestore(&pool->lock, flags); + wake_worker(pool); + } } mempool_free(h, pool->endio_hook_pool); -- cgit v0.10.2 From e3cbf94513c21516fbb44561857b155d1c599625 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 21 Dec 2012 20:23:32 +0000 Subject: dm persistent data: fix nested btree deletion When deleting nested btrees, the code forgets to delete the innermost btree. The thin-metadata code serendipitously compensates for this by claiming there is one extra layer in the tree. This patch corrects both problems. Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 693e149..4d6e853 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -408,7 +408,7 @@ static void __setup_btree_details(struct dm_pool_metadata *pmd) pmd->tl_info.tm = pmd->tm; pmd->tl_info.levels = 1; - pmd->tl_info.value_type.context = &pmd->info; + pmd->tl_info.value_type.context = &pmd->bl_info; pmd->tl_info.value_type.size = sizeof(__le64); pmd->tl_info.value_type.inc = subtree_inc; pmd->tl_info.value_type.dec = subtree_dec; diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 371f3d4..4caf669 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -230,6 +230,11 @@ static void pop_frame(struct del_stack *s) dm_tm_unlock(s->tm, f->b); } +static bool is_internal_level(struct dm_btree_info *info, struct frame *f) +{ + return f->level < (info->levels - 1); +} + int dm_btree_del(struct dm_btree_info *info, dm_block_t root) { int r; @@ -241,7 +246,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) s->tm = info->tm; s->top = -1; - r = push_frame(s, root, 1); + r = push_frame(s, root, 0); if (r) goto out; @@ -267,7 +272,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) if (r) goto out; - } else if (f->level != (info->levels - 1)) { + } else if (is_internal_level(info, f)) { b = value64(f->n, f->current_child); f->current_child++; r = push_frame(s, b, f->level + 1); -- cgit v0.10.2 From 018debea8de9b8f17a9637e07c98c61517eb2a6b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 21 Dec 2012 20:23:32 +0000 Subject: dm thin: emit ignore_discard in status when discards disabled If "ignore_discard" is specified when creating the thin pool device then discard support is disabled for that device. The pool device's status should reflect this fact rather than stating "no_discard_passdown" (which implies discards are enabled but passdown is disabled). Reported-by: Zdenek Kabelac Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index aeb9e20..063fe04 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2391,7 +2391,9 @@ static int pool_status(struct dm_target *ti, status_type_t type, else DMEMIT("rw "); - if (pool->pf.discard_enabled && pool->pf.discard_passdown) + if (!pool->pf.discard_enabled) + DMEMIT("ignore_discard"); + else if (pool->pf.discard_passdown) DMEMIT("discard_passdown"); else DMEMIT("no_discard_passdown"); @@ -2487,7 +2489,7 @@ static struct target_type pool_target = { .name = "thin-pool", .features = DM_TARGET_SINGLETON | DM_TARGET_ALWAYS_WRITEABLE | DM_TARGET_IMMUTABLE, - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = pool_ctr, .dtr = pool_dtr, -- cgit v0.10.2 From 7c27213b20f060111d5b6fa14bcdbab6588ae351 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:32 +0000 Subject: dm raid1: remove impossible mempool_alloc error test mempool_alloc can't fail if __GFP_WAIT is specified, so the condition that tests if read_record is non-NULL is always true. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index fd61f98..b37467618 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1195,11 +1195,9 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, return -EIO; read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO); - if (likely(read_record)) { - dm_bio_record(&read_record->details, bio); - map_context->ptr = read_record; - read_record->m = m; - } + dm_bio_record(&read_record->details, bio); + map_context->ptr = read_record; + read_record->m = m; map_bio(m, bio); -- cgit v0.10.2 From 19cbbc60c680e2e404bf55458ad2c4ff60d16076 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:32 +0000 Subject: dm raid: use DM_ENDIO_INCOMPLETE Use a defined macro DM_ENDIO_INCOMPLETE instead of a numeric constant. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index b37467618..cec5f9b 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1257,7 +1257,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, mempool_free(read_record, ms->read_record_pool); map_context->ptr = NULL; queue_bio(ms, bio, rw); - return 1; + return DM_ENDIO_INCOMPLETE; } DMERR("All replicated volumes dead, failing I/O"); } -- cgit v0.10.2 From 9aa0c0e60ffc2594acaad23127dbea9f3b61821c Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:33 +0000 Subject: dm snapshot: optimize track_chunk track_chunk is always called with interrupts enabled. Consequently, we do not need to save and restore interrupt state in "flags" variable. This patch changes spin_lock_irqsave to spin_lock_irq and spin_unlock_irqrestore to spin_unlock_irq. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index a143921..223e7eb 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -198,14 +198,13 @@ static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, { struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, GFP_NOIO); - unsigned long flags; c->chunk = chunk; - spin_lock_irqsave(&s->tracked_chunk_lock, flags); + spin_lock_irq(&s->tracked_chunk_lock); hlist_add_head(&c->node, &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); - spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); + spin_unlock_irq(&s->tracked_chunk_lock); return c; } -- cgit v0.10.2 From f286ba0eede3d8f211e6029c2b52f2dbee7d7d35 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 21 Dec 2012 20:23:33 +0000 Subject: dm thin: rename cell_defer_except to cell_defer_no_holder Rename cell_defer_except() to cell_defer_no_holder() which describes its function more clearly. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 063fe04..2cb81bf 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -526,7 +526,7 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, /* * Same as cell_defer except it omits the original holder of the cell. */ -static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell) +static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell) { struct bio_list bios; struct pool *pool = tc->pool; @@ -583,7 +583,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) * the bios in the cell. */ if (bio) { - cell_defer_except(tc, m->cell); + cell_defer_no_holder(tc, m->cell); bio_endio(bio, 0); } else cell_defer(tc, m->cell, m->data_block); @@ -598,8 +598,8 @@ static void process_prepared_discard_fail(struct dm_thin_new_mapping *m) struct thin_c *tc = m->tc; bio_io_error(m->bio); - cell_defer_except(tc, m->cell); - cell_defer_except(tc, m->cell2); + cell_defer_no_holder(tc, m->cell); + cell_defer_no_holder(tc, m->cell2); mempool_free(m, tc->pool->mapping_pool); } @@ -608,8 +608,8 @@ static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m) struct thin_c *tc = m->tc; inc_all_io_entry(tc->pool, m->bio); - cell_defer_except(tc, m->cell); - cell_defer_except(tc, m->cell2); + cell_defer_no_holder(tc, m->cell); + cell_defer_no_holder(tc, m->cell2); if (m->pass_discard) remap_and_issue(tc, m->bio, m->data_block); @@ -950,7 +950,7 @@ static void process_discard(struct thin_c *tc, struct bio *bio) */ build_data_key(tc->td, lookup_result.block, &key2); if (dm_bio_detain(tc->pool->prison, &key2, bio, &cell2)) { - cell_defer_except(tc, cell); + cell_defer_no_holder(tc, cell); break; } @@ -977,8 +977,8 @@ static void process_discard(struct thin_c *tc, struct bio *bio) } } else { inc_all_io_entry(pool, bio); - cell_defer_except(tc, cell); - cell_defer_except(tc, cell2); + cell_defer_no_holder(tc, cell); + cell_defer_no_holder(tc, cell2); /* * The DM core makes sure that the discard doesn't span @@ -996,13 +996,13 @@ static void process_discard(struct thin_c *tc, struct bio *bio) /* * It isn't provisioned, just forget it. */ - cell_defer_except(tc, cell); + cell_defer_no_holder(tc, cell); bio_endio(bio, 0); break; default: DMERR("discard: find block unexpectedly returned %d", r); - cell_defer_except(tc, cell); + cell_defer_no_holder(tc, cell); bio_io_error(bio); break; } @@ -1057,7 +1057,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); inc_all_io_entry(pool, bio); - cell_defer_except(tc, cell); + cell_defer_no_holder(tc, cell); remap_and_issue(tc, bio, lookup_result->block); } @@ -1074,7 +1074,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block */ if (!bio->bi_size) { inc_all_io_entry(tc->pool, bio); - cell_defer_except(tc, cell); + cell_defer_no_holder(tc, cell); remap_and_issue(tc, bio, 0); return; @@ -1085,7 +1085,7 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block */ if (bio_data_dir(bio) == READ) { zero_fill_bio(bio); - cell_defer_except(tc, cell); + cell_defer_no_holder(tc, cell); bio_endio(bio, 0); return; } @@ -1132,10 +1132,10 @@ static void process_bio(struct thin_c *tc, struct bio *bio) case 0: if (lookup_result.shared) { process_shared_bio(tc, bio, block, &lookup_result); - cell_defer_except(tc, cell); + cell_defer_no_holder(tc, cell); } else { inc_all_io_entry(tc->pool, bio); - cell_defer_except(tc, cell); + cell_defer_no_holder(tc, cell); remap_and_issue(tc, bio, lookup_result.block); } @@ -1144,7 +1144,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) case -ENODATA: if (bio_data_dir(bio) == READ && tc->origin_dev) { inc_all_io_entry(tc->pool, bio); - cell_defer_except(tc, cell); + cell_defer_no_holder(tc, cell); remap_to_origin_and_issue(tc, bio); } else @@ -1153,7 +1153,7 @@ static void process_bio(struct thin_c *tc, struct bio *bio) default: DMERR("dm_thin_find_block() failed, error = %d", r); - cell_defer_except(tc, cell); + cell_defer_no_holder(tc, cell); bio_io_error(bio); break; } @@ -1429,13 +1429,13 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, build_data_key(tc->td, result.block, &key); if (dm_bio_detain(tc->pool->prison, &key, bio, &cell2)) { - cell_defer_except(tc, cell1); + cell_defer_no_holder(tc, cell1); return DM_MAPIO_SUBMITTED; } inc_all_io_entry(tc->pool, bio); - cell_defer_except(tc, cell2); - cell_defer_except(tc, cell1); + cell_defer_no_holder(tc, cell2); + cell_defer_no_holder(tc, cell1); remap(tc, bio, result.block); return DM_MAPIO_REMAPPED; -- cgit v0.10.2 From 2aab38502d0e1bf6cf98183769e35a9ff999dcb1 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 21 Dec 2012 20:23:33 +0000 Subject: dm thin: cleanup dead code Remove unused @data_block parameter from cell_defer. Change thin_bio_map to use many returns rather than setting a variable. Signed-off-by: Joe Thornber Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 2cb81bf..6b9322e 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -510,8 +510,7 @@ static void overwrite_endio(struct bio *bio, int err) /* * This sends the bios in the cell back to the deferred_bios list. */ -static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, - dm_block_t data_block) +static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell) { struct pool *pool = tc->pool; unsigned long flags; @@ -528,12 +527,9 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, */ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *cell) { - struct bio_list bios; struct pool *pool = tc->pool; unsigned long flags; - bio_list_init(&bios); - spin_lock_irqsave(&pool->lock, flags); dm_cell_release_no_holder(cell, &pool->deferred_bios); spin_unlock_irqrestore(&pool->lock, flags); @@ -586,7 +582,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) cell_defer_no_holder(tc, m->cell); bio_endio(bio, 0); } else - cell_defer(tc, m->cell, m->data_block); + cell_defer(tc, m->cell); out: list_del(&m->list); @@ -1447,8 +1443,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * of doing so. Just error it. */ bio_io_error(bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; } /* fall through */ @@ -1458,8 +1453,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * provide the hint to load the metadata into cache. */ thin_defer_bio(tc, bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; default: /* @@ -1468,11 +1462,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, * pool is switched to fail-io mode. */ bio_io_error(bio); - r = DM_MAPIO_SUBMITTED; - break; + return DM_MAPIO_SUBMITTED; } - - return r; } static int pool_is_congested(struct dm_target_callbacks *cb, int bdi_bits) -- cgit v0.10.2 From 3a0f9aaee02857609d79b20c809c02a8b7c39d06 Mon Sep 17 00:00:00 2001 From: Jonathan Brassow Date: Fri, 21 Dec 2012 20:23:33 +0000 Subject: dm raid: round region_size to power of two If the user does not supply a bitmap region_size to the dm raid target, a reasonable size is computed automatically. If this is not a power of 2, the md code will report an error later. This patch catches the problem early and rounds the region_size to the next power of two. Signed-off-by: Jonathan Brassow Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 45d94a7..4a20bf8 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -295,9 +295,11 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) * Choose a reasonable default. All figures in sectors. */ if (min_region_size > (1 << 13)) { + /* If not a power of 2, make it the next power of 2 */ + if (min_region_size & (min_region_size - 1)) + region_size = 1 << fls(region_size); DMINFO("Choosing default region size of %lu sectors", region_size); - region_size = min_region_size; } else { DMINFO("Choosing default region size of 4MiB"); region_size = 1 << 13; /* sectors */ -- cgit v0.10.2 From a5bd968aeb87af4a3a96ecdd78912832997e8a9d Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 21 Dec 2012 20:23:34 +0000 Subject: dm block manager: reinstate message when validator fails Reinstate a useful error message when the block manager buffer validator fails. This was mistakenly eliminated when the block manager was converted to use dm-bufio. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 5ba2777..47b683e 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -428,8 +428,11 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm, if (!v) return 0; r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); - if (unlikely(r)) + if (unlikely(r)) { + DMERR("%s validator check failed for block %llu", v->name, + (unsigned long long) dm_bufio_get_block_number(buf)); return r; + } aux->validator = v; } else { if (unlikely(aux->validator != v)) { -- cgit v0.10.2 From 89ddeb8cb1383bfdfaaeb458f1c4a91e890cc60b Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 21 Dec 2012 20:23:34 +0000 Subject: dm persistent data: use DMERR_LIMIT for errors Nearly all of persistent-data is in the IO path so throttle error messages with DMERR_LIMIT to limit the amount logged when something has gone wrong. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 47b683e..ec4cb3c 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -429,17 +429,16 @@ static int dm_bm_validate_buffer(struct dm_block_manager *bm, return 0; r = v->check(v, (struct dm_block *) buf, dm_bufio_get_block_size(bm->bufio)); if (unlikely(r)) { - DMERR("%s validator check failed for block %llu", v->name, - (unsigned long long) dm_bufio_get_block_number(buf)); + DMERR_LIMIT("%s validator check failed for block %llu", v->name, + (unsigned long long) dm_bufio_get_block_number(buf)); return r; } aux->validator = v; } else { if (unlikely(aux->validator != v)) { - DMERR("validator mismatch (old=%s vs new=%s) for block %llu", - aux->validator->name, v ? v->name : "NULL", - (unsigned long long) - dm_bufio_get_block_number(buf)); + DMERR_LIMIT("validator mismatch (old=%s vs new=%s) for block %llu", + aux->validator->name, v ? v->name : "NULL", + (unsigned long long) dm_bufio_get_block_number(buf)); return -EINVAL; } } diff --git a/drivers/md/persistent-data/dm-btree-spine.c b/drivers/md/persistent-data/dm-btree-spine.c index 2f0805c..f199a0c 100644 --- a/drivers/md/persistent-data/dm-btree-spine.c +++ b/drivers/md/persistent-data/dm-btree-spine.c @@ -45,8 +45,8 @@ static int node_check(struct dm_block_validator *v, uint32_t flags; if (dm_block_location(b) != le64_to_cpu(h->blocknr)) { - DMERR("node_check failed blocknr %llu wanted %llu", - le64_to_cpu(h->blocknr), dm_block_location(b)); + DMERR_LIMIT("node_check failed: blocknr %llu != wanted %llu", + le64_to_cpu(h->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -54,8 +54,8 @@ static int node_check(struct dm_block_validator *v, block_size - sizeof(__le32), BTREE_CSUM_XOR)); if (csum_disk != h->csum) { - DMERR("node_check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(h->csum)); + DMERR_LIMIT("node_check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(h->csum)); return -EILSEQ; } @@ -63,12 +63,12 @@ static int node_check(struct dm_block_validator *v, if (sizeof(struct node_header) + (sizeof(__le64) + value_size) * le32_to_cpu(h->max_entries) > block_size) { - DMERR("node_check failed: max_entries too large"); + DMERR_LIMIT("node_check failed: max_entries too large"); return -EILSEQ; } if (le32_to_cpu(h->nr_entries) > le32_to_cpu(h->max_entries)) { - DMERR("node_check failed, too many entries"); + DMERR_LIMIT("node_check failed: too many entries"); return -EILSEQ; } @@ -77,7 +77,7 @@ static int node_check(struct dm_block_validator *v, */ flags = le32_to_cpu(h->flags); if (!(flags & INTERNAL_NODE) && !(flags & LEAF_NODE)) { - DMERR("node_check failed, node is neither INTERNAL or LEAF"); + DMERR_LIMIT("node_check failed: node is neither INTERNAL or LEAF"); return -EILSEQ; } diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index f3a9af8..3e7a88d 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -39,8 +39,8 @@ static int index_check(struct dm_block_validator *v, __le32 csum_disk; if (dm_block_location(b) != le64_to_cpu(mi_le->blocknr)) { - DMERR("index_check failed blocknr %llu wanted %llu", - le64_to_cpu(mi_le->blocknr), dm_block_location(b)); + DMERR_LIMIT("index_check failed: blocknr %llu != wanted %llu", + le64_to_cpu(mi_le->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -48,8 +48,8 @@ static int index_check(struct dm_block_validator *v, block_size - sizeof(__le32), INDEX_CSUM_XOR)); if (csum_disk != mi_le->csum) { - DMERR("index_check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); + DMERR_LIMIT("index_check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(mi_le->csum)); return -EILSEQ; } @@ -89,8 +89,8 @@ static int bitmap_check(struct dm_block_validator *v, __le32 csum_disk; if (dm_block_location(b) != le64_to_cpu(disk_header->blocknr)) { - DMERR("bitmap check failed blocknr %llu wanted %llu", - le64_to_cpu(disk_header->blocknr), dm_block_location(b)); + DMERR_LIMIT("bitmap check failed: blocknr %llu != wanted %llu", + le64_to_cpu(disk_header->blocknr), dm_block_location(b)); return -ENOTBLK; } @@ -98,8 +98,8 @@ static int bitmap_check(struct dm_block_validator *v, block_size - sizeof(__le32), BITMAP_CSUM_XOR)); if (csum_disk != disk_header->csum) { - DMERR("bitmap check failed csum %u wanted %u", - le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); + DMERR_LIMIT("bitmap check failed: csum %u != wanted %u", + le32_to_cpu(csum_disk), le32_to_cpu(disk_header->csum)); return -EILSEQ; } -- cgit v0.10.2 From c397741c7645de7f2ead1f076f1a40e169875fe3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 21 Dec 2012 20:23:34 +0000 Subject: dm thin: use DMERR_LIMIT for errors Throttle all errors logged from the IO path by dm thin. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 6b9322e..fba378f 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -567,7 +567,7 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) */ r = dm_thin_insert_block(tc->td, m->virt_block, m->data_block); if (r) { - DMERR("dm_thin_insert_block() failed"); + DMERR_LIMIT("dm_thin_insert_block() failed"); dm_cell_error(m->cell); goto out; } @@ -622,7 +622,7 @@ static void process_prepared_discard(struct dm_thin_new_mapping *m) r = dm_thin_remove_block(tc->td, m->virt_block); if (r) - DMERR("dm_thin_remove_block() failed"); + DMERR_LIMIT("dm_thin_remove_block() failed"); process_prepared_discard_passdown(m); } @@ -736,7 +736,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, 0, copy_complete, m); if (r < 0) { mempool_free(m, pool->mapping_pool); - DMERR("dm_kcopyd_copy() failed"); + DMERR_LIMIT("dm_kcopyd_copy() failed"); dm_cell_error(cell); } } @@ -802,7 +802,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, r = dm_kcopyd_zero(pool->copier, 1, &to, 0, copy_complete, m); if (r < 0) { mempool_free(m, pool->mapping_pool); - DMERR("dm_kcopyd_zero() failed"); + DMERR_LIMIT("dm_kcopyd_zero() failed"); dm_cell_error(cell); } } @@ -814,7 +814,7 @@ static int commit(struct pool *pool) r = dm_pool_commit_metadata(pool->pmd); if (r) - DMERR("commit failed, error = %d", r); + DMERR_LIMIT("commit failed: error = %d", r); return r; } @@ -997,7 +997,8 @@ static void process_discard(struct thin_c *tc, struct bio *bio) break; default: - DMERR("discard: find block unexpectedly returned %d", r); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); cell_defer_no_holder(tc, cell); bio_io_error(bio); break; @@ -1024,7 +1025,8 @@ static void break_sharing(struct thin_c *tc, struct bio *bio, dm_block_t block, break; default: - DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); + DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", + __func__, r); dm_cell_error(cell); break; } @@ -1100,7 +1102,8 @@ static void provision_block(struct thin_c *tc, struct bio *bio, dm_block_t block break; default: - DMERR("%s: alloc_data_block() failed, error = %d", __func__, r); + DMERR_LIMIT("%s: alloc_data_block() failed: error = %d", + __func__, r); set_pool_mode(tc->pool, PM_READ_ONLY); dm_cell_error(cell); break; @@ -1148,7 +1151,8 @@ static void process_bio(struct thin_c *tc, struct bio *bio) break; default: - DMERR("dm_thin_find_block() failed, error = %d", r); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); cell_defer_no_holder(tc, cell); bio_io_error(bio); break; @@ -1190,7 +1194,8 @@ static void process_bio_read_only(struct thin_c *tc, struct bio *bio) break; default: - DMERR("dm_thin_find_block() failed, error = %d", r); + DMERR_LIMIT("%s: dm_thin_find_block() failed: error = %d", + __func__, r); bio_io_error(bio); break; } -- cgit v0.10.2 From 7960123f2d335ded2ac4c510bcd2a236b293b1b1 Mon Sep 17 00:00:00 2001 From: Joe Thornber Date: Fri, 21 Dec 2012 20:23:36 +0000 Subject: dm persistent data: improve improve space map block alloc failure message Improve space map error message when unable to allocate a new metadata block. Signed-off-by: Joe Thornber Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index e89ae5e..906cf3d 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -337,7 +337,7 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) { int r = sm_metadata_new_block_(sm, b); if (r) - DMERR("out of metadata space"); + DMERR("unable to allocate new metadata block"); return r; } -- cgit v0.10.2 From 5023e5cf58e1dae904e2e8b5b9779c33512b75a1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:36 +0000 Subject: dm ioctl: remove PF_MEMALLOC When allocating memory for the userspace ioctl data, set some appropriate GPF flags directly instead of using PF_MEMALLOC. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index a651d52..a37aeba 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1556,7 +1556,10 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) secure_data = tmp.flags & DM_SECURE_DATA_FLAG; - dmi = vmalloc(tmp.data_size); + /* + * Try to avoid low memory issues when a device is suspended. + */ + dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); if (!dmi) { if (secure_data && clear_user(user, tmp.data_size)) return -EFAULT; @@ -1657,18 +1660,10 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) } /* - * Trying to avoid low memory issues when a device is - * suspended. - */ - current->flags |= PF_MEMALLOC; - - /* * Copy the parameters into kernel space. */ r = copy_params(user, ¶m); - current->flags &= ~PF_MEMALLOC; - if (r) return r; diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 91e3a36..539b179 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -268,8 +268,8 @@ enum { #define DM_VERSION_MAJOR 4 #define DM_VERSION_MINOR 23 -#define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2012-07-25)" +#define DM_VERSION_PATCHLEVEL 1 +#define DM_VERSION_EXTRA "-ioctl (2012-12-18)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v0.10.2 From 9c5091f2eeeffe5eca2ffe8a1bc28d312c8a5083 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:36 +0000 Subject: dm ioctl: use kmalloc if possible If the parameter buffer is small enough, try to allocate it with kmalloc() rather than vmalloc(). vmalloc is noticeably slower than kmalloc because it has to manipulate page tables. In my tests, on PA-RISC this patch speeds up activation 13 times. On Opteron this patch speeds up activation by 5%. This patch introduces a new function free_params() to free the parameters and this uses new flags that record whether or not vmalloc() was used and whether or not the input buffer must be wiped after use. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index a37aeba..0666b5d 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1543,7 +1543,21 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user) return r; } -static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) +#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */ +#define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */ + +static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags) +{ + if (param_flags & DM_WIPE_BUFFER) + memset(param, 0, param_size); + + if (param_flags & DM_PARAMS_VMALLOC) + vfree(param); + else + kfree(param); +} + +static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags) { struct dm_ioctl tmp, *dmi; int secure_data; @@ -1556,10 +1570,21 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) secure_data = tmp.flags & DM_SECURE_DATA_FLAG; + *param_flags = secure_data ? DM_WIPE_BUFFER : 0; + /* * Try to avoid low memory issues when a device is suspended. + * Use kmalloc() rather than vmalloc() when we can. */ - dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); + dmi = NULL; + if (tmp.data_size <= KMALLOC_MAX_SIZE) + dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + + if (!dmi) { + dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); + *param_flags |= DM_PARAMS_VMALLOC; + } + if (!dmi) { if (secure_data && clear_user(user, tmp.data_size)) return -EFAULT; @@ -1585,9 +1610,8 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) return 0; bad: - if (secure_data) - memset(dmi, 0, tmp.data_size); - vfree(dmi); + free_params(dmi, tmp.data_size, *param_flags); + return -EFAULT; } @@ -1624,7 +1648,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param) static int ctl_ioctl(uint command, struct dm_ioctl __user *user) { int r = 0; - int wipe_buffer; + int param_flags; unsigned int cmd; struct dm_ioctl *uninitialized_var(param); ioctl_fn fn = NULL; @@ -1662,14 +1686,12 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) /* * Copy the parameters into kernel space. */ - r = copy_params(user, ¶m); + r = copy_params(user, ¶m, ¶m_flags); if (r) return r; input_param_size = param->data_size; - wipe_buffer = param->flags & DM_SECURE_DATA_FLAG; - r = validate_params(cmd, param); if (r) goto out; @@ -1684,10 +1706,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) r = -EFAULT; out: - if (wipe_buffer) - memset(param, 0, input_param_size); - - vfree(param); + free_params(param, input_param_size, param_flags); return r; } -- cgit v0.10.2 From d54eaa5a0fde0a202e4e91f200f818edcef15bee Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 21 Dec 2012 20:23:36 +0000 Subject: dm: prepare to support WRITE SAME Allow targets to opt in to WRITE SAME support by setting 'num_write_same_requests' in the dm_target structure. A dm device will only advertise WRITE SAME support if all its targets and all its underlying devices support it. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index fa29557..6be58b6 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -1414,6 +1414,33 @@ static bool dm_table_all_devices_attribute(struct dm_table *t, return 1; } +static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + + return q && !q->limits.max_write_same_sectors; +} + +static bool dm_table_supports_write_same(struct dm_table *t) +{ + struct dm_target *ti; + unsigned i = 0; + + while (i < dm_table_get_num_targets(t)) { + ti = dm_table_get_target(t, i++); + + if (!ti->num_write_same_requests) + return false; + + if (!ti->type->iterate_devices || + !ti->type->iterate_devices(ti, device_not_write_same_capable, NULL)) + return false; + } + + return true; +} + void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1445,7 +1472,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, else queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, q); - q->limits.max_write_same_sectors = 0; + if (!dm_table_supports_write_same(t)) + q->limits.max_write_same_sectors = 0; dm_table_set_integrity(t); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 38d27a1..d1f6cd8 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -205,6 +205,11 @@ struct dm_target { */ unsigned num_discard_requests; + /* + * The number of WRITE SAME requests that will be submitted to the target. + */ + unsigned num_write_same_requests; + /* target specific data */ void *private; -- cgit v0.10.2 From 23508a96cd2e857d57044a2ed7d305f2d9daf441 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 21 Dec 2012 20:23:37 +0000 Subject: dm: add WRITE SAME support WRITE SAME bios have a payload that contain a single page. When cloning WRITE SAME bios DM has no need to modify the bi_io_vec attributes (and doing so would be detrimental). DM need only alter the start and end of the WRITE SAME bio accordingly. Rather than duplicate __clone_and_map_discard, factor out a common function that is also used by __clone_and_map_write_same. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 77e6eff..5401cdc 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1174,7 +1174,28 @@ static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) ci->sector_count = 0; } -static int __clone_and_map_discard(struct clone_info *ci) +typedef unsigned (*get_num_requests_fn)(struct dm_target *ti); + +static unsigned get_num_discard_requests(struct dm_target *ti) +{ + return ti->num_discard_requests; +} + +static unsigned get_num_write_same_requests(struct dm_target *ti) +{ + return ti->num_write_same_requests; +} + +typedef bool (*is_split_required_fn)(struct dm_target *ti); + +static bool is_split_required_for_discard(struct dm_target *ti) +{ + return ti->split_discard_requests; +} + +static int __clone_and_map_changing_extent_only(struct clone_info *ci, + get_num_requests_fn get_num_requests, + is_split_required_fn is_split_required) { struct dm_target *ti; sector_t len; @@ -1185,15 +1206,15 @@ static int __clone_and_map_discard(struct clone_info *ci) return -EIO; /* - * Even though the device advertised discard support, - * that does not mean every target supports it, and + * Even though the device advertised support for this type of + * request, that does not mean every target supports it, and * reconfiguration might also have changed that since the * check was performed. */ - if (!ti->num_discard_requests) + if (!get_num_requests || !get_num_requests(ti)) return -EOPNOTSUPP; - if (!ti->split_discard_requests) + if (is_split_required && !is_split_required(ti)) len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); else len = min(ci->sector_count, max_io_len(ci->sector, ti)); @@ -1206,6 +1227,17 @@ static int __clone_and_map_discard(struct clone_info *ci) return 0; } +static int __clone_and_map_discard(struct clone_info *ci) +{ + return __clone_and_map_changing_extent_only(ci, get_num_discard_requests, + is_split_required_for_discard); +} + +static int __clone_and_map_write_same(struct clone_info *ci) +{ + return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL); +} + static int __clone_and_map(struct clone_info *ci) { struct bio *bio = ci->bio; @@ -1215,6 +1247,8 @@ static int __clone_and_map(struct clone_info *ci) if (unlikely(bio->bi_rw & REQ_DISCARD)) return __clone_and_map_discard(ci); + else if (unlikely(bio->bi_rw & REQ_WRITE_SAME)) + return __clone_and_map_write_same(ci); ti = dm_table_find_target(ci->map, ci->sector); if (!dm_target_is_valid(ti)) -- cgit v0.10.2 From 4f0b70b0479101522b8645ddc1f5ee7137821db3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 21 Dec 2012 20:23:37 +0000 Subject: dm linear: add WRITE SAME support The linear target can already support WRITE SAME requests so signal this by setting num_write_same_requests to 1. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 1bf19a9..82222a8 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -55,6 +55,7 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->num_write_same_requests = 1; ti->private = lc; return 0; @@ -155,7 +156,7 @@ static int linear_iterate_devices(struct dm_target *ti, static struct target_type linear_target = { .name = "linear", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = linear_ctr, .dtr = linear_dtr, -- cgit v0.10.2 From 70d6c400acc386ea910c77318688541fc32e7ce8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 21 Dec 2012 20:23:37 +0000 Subject: dm kcopyd: add WRITE SAME support to dm_kcopyd_zero Add WRITE SAME support to dm-io and make it accessible to dm_kcopyd_zero(). dm_kcopyd_zero() provides an asynchronous interface whereas the blkdev_issue_write_same() interface is synchronous. WRITE SAME is a SCSI command that can be leveraged for more efficient zeroing of a specified logical extent of a device which supports it. Only a single zeroed logical block is transfered to the target for each WRITE SAME and the target then writes that same block across the specified extent. The dm thin target uses this. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 1c46f97..ea49834 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -287,7 +287,8 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, unsigned num_bvecs; sector_t remaining = where->count; struct request_queue *q = bdev_get_queue(where->bdev); - sector_t discard_sectors; + unsigned short logical_block_size = queue_logical_block_size(q); + sector_t num_sectors; /* * where->count may be zero if rw holds a flush and we need to @@ -297,7 +298,7 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, /* * Allocate a suitably sized-bio. */ - if (rw & REQ_DISCARD) + if ((rw & REQ_DISCARD) || (rw & REQ_WRITE_SAME)) num_bvecs = 1; else num_bvecs = min_t(int, bio_get_nr_vecs(where->bdev), @@ -310,9 +311,21 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, store_io_and_region_in_bio(bio, io, region); if (rw & REQ_DISCARD) { - discard_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); - bio->bi_size = discard_sectors << SECTOR_SHIFT; - remaining -= discard_sectors; + num_sectors = min_t(sector_t, q->limits.max_discard_sectors, remaining); + bio->bi_size = num_sectors << SECTOR_SHIFT; + remaining -= num_sectors; + } else if (rw & REQ_WRITE_SAME) { + /* + * WRITE SAME only uses a single page. + */ + dp->get_page(dp, &page, &len, &offset); + bio_add_page(bio, page, logical_block_size, offset); + num_sectors = min_t(sector_t, q->limits.max_write_same_sectors, remaining); + bio->bi_size = num_sectors << SECTOR_SHIFT; + + offset = 0; + remaining -= num_sectors; + dp->next_page(dp); } else while (remaining) { /* * Try and add as many pages as possible. diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index bed444c..68c0267 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -349,7 +349,7 @@ static void complete_io(unsigned long error, void *context) struct dm_kcopyd_client *kc = job->kc; if (error) { - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err |= error; else job->read_err = 1; @@ -361,7 +361,7 @@ static void complete_io(unsigned long error, void *context) } } - if (job->rw == WRITE) + if (job->rw & WRITE) push(&kc->complete_jobs, job); else { @@ -432,7 +432,7 @@ static int process_jobs(struct list_head *jobs, struct dm_kcopyd_client *kc, if (r < 0) { /* error this rogue job */ - if (job->rw == WRITE) + if (job->rw & WRITE) job->write_err = (unsigned long) -1L; else job->read_err = 1; @@ -585,6 +585,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, unsigned int flags, dm_kcopyd_notify_fn fn, void *context) { struct kcopyd_job *job; + int i; /* * Allocate an array of jobs consisting of one master job @@ -611,7 +612,16 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, memset(&job->source, 0, sizeof job->source); job->source.count = job->dests[0].count; job->pages = &zero_page_list; - job->rw = WRITE; + + /* + * Use WRITE SAME to optimize zeroing if all dests support it. + */ + job->rw = WRITE | REQ_WRITE_SAME; + for (i = 0; i < job->num_dests; i++) + if (!bdev_write_same(job->dests[i].bdev)) { + job->rw = WRITE; + break; + } } job->fn = fn; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index fba378f..4b94074 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -2779,7 +2779,7 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits) static struct target_type thin_target = { .name = "thin", - .version = {1, 5, 0}, + .version = {1, 6, 0}, .module = THIS_MODULE, .ctr = thin_ctr, .dtr = thin_dtr, -- cgit v0.10.2 From c0820cf5ad09522bdd9ff68e84841a09c9f339d8 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:38 +0000 Subject: dm: introduce per_bio_data Introduce a field per_bio_data_size in struct dm_target. Targets can set this field in the constructor. If a target sets this field to a non-zero value, "per_bio_data_size" bytes of auxiliary data are allocated for each bio submitted to the target. These data can be used for any purpose by the target and help us improve performance by removing some per-target mempools. Per-bio data is accessed with dm_per_bio_data. The argument data_size must be the same as the value per_bio_data_size in dm_target. If the target has a pointer to per_bio_data, it can get a pointer to the bio with dm_bio_from_per_bio_data() function (data_size must be the same as the value passed to dm_per_bio_data). Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 6be58b6..daf25d0 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -967,13 +967,22 @@ bool dm_table_request_based(struct dm_table *t) int dm_table_alloc_md_mempools(struct dm_table *t) { unsigned type = dm_table_get_type(t); + unsigned per_bio_data_size = 0; + struct dm_target *tgt; + unsigned i; if (unlikely(type == DM_TYPE_NONE)) { DMWARN("no table type is set, can't allocate mempools"); return -EINVAL; } - t->mempools = dm_alloc_md_mempools(type, t->integrity_supported); + if (type == DM_TYPE_BIO_BASED) + for (i = 0; i < t->num_targets; i++) { + tgt = t->targets + i; + per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size); + } + + t->mempools = dm_alloc_md_mempools(type, t->integrity_supported, per_bio_data_size); if (!t->mempools) return -ENOMEM; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5401cdc..2765cf2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -63,18 +63,6 @@ struct dm_io { }; /* - * For bio-based dm. - * One of these is allocated per target within a bio. Hopefully - * this will be simplified out one day. - */ -struct dm_target_io { - struct dm_io *io; - struct dm_target *ti; - union map_info info; - struct bio clone; -}; - -/* * For request-based dm. * One of these is allocated per request. */ @@ -1980,13 +1968,20 @@ static void free_dev(struct mapped_device *md) static void __bind_mempools(struct mapped_device *md, struct dm_table *t) { - struct dm_md_mempools *p; + struct dm_md_mempools *p = dm_table_get_md_mempools(t); - if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) - /* the md already has necessary mempools */ + if (md->io_pool && (md->tio_pool || dm_table_get_type(t) == DM_TYPE_BIO_BASED) && md->bs) { + /* + * The md already has necessary mempools. Reload just the + * bioset because front_pad may have changed because + * a different table was loaded. + */ + bioset_free(md->bs); + md->bs = p->bs; + p->bs = NULL; goto out; + } - p = dm_table_get_md_mempools(t); BUG_ON(!p || md->io_pool || md->tio_pool || md->bs); md->io_pool = p->io_pool; @@ -2745,7 +2740,7 @@ int dm_noflush_suspending(struct dm_target *ti) } EXPORT_SYMBOL_GPL(dm_noflush_suspending); -struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) +struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size) { struct dm_md_mempools *pools = kmalloc(sizeof(*pools), GFP_KERNEL); unsigned int pool_size = (type == DM_TYPE_BIO_BASED) ? 16 : MIN_IOS; @@ -2753,6 +2748,8 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) if (!pools) return NULL; + per_bio_data_size = roundup(per_bio_data_size, __alignof__(struct dm_target_io)); + pools->io_pool = (type == DM_TYPE_BIO_BASED) ? mempool_create_slab_pool(MIN_IOS, _io_cache) : mempool_create_slab_pool(MIN_IOS, _rq_bio_info_cache); @@ -2768,7 +2765,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) pools->bs = (type == DM_TYPE_BIO_BASED) ? bioset_create(pool_size, - offsetof(struct dm_target_io, clone)) : + per_bio_data_size + offsetof(struct dm_target_io, clone)) : bioset_create(pool_size, offsetof(struct dm_rq_clone_bio_info, clone)); if (!pools->bs) diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 6a99fef..45b97da 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -159,7 +159,7 @@ void dm_kcopyd_exit(void); /* * Mempool operations */ -struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity); +struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size); void dm_free_md_mempools(struct dm_md_mempools *pools); #endif diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index d1f6cd8..6f0e73b 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -210,6 +210,12 @@ struct dm_target { */ unsigned num_write_same_requests; + /* + * The minimum number of extra bytes allocated in each bio for the + * target to use. dm_per_bio_data returns the data location. + */ + unsigned per_bio_data_size; + /* target specific data */ void *private; @@ -246,6 +252,30 @@ struct dm_target_callbacks { int (*congested_fn) (struct dm_target_callbacks *, int); }; +/* + * For bio-based dm. + * One of these is allocated for each bio. + * This structure shouldn't be touched directly by target drivers. + * It is here so that we can inline dm_per_bio_data and + * dm_bio_from_per_bio_data + */ +struct dm_target_io { + struct dm_io *io; + struct dm_target *ti; + union map_info info; + struct bio clone; +}; + +static inline void *dm_per_bio_data(struct bio *bio, size_t data_size) +{ + return (char *)bio - offsetof(struct dm_target_io, clone) - data_size; +} + +static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size) +{ + return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone)); +} + int dm_register_target(struct target_type *t); void dm_unregister_target(struct target_type *t); -- cgit v0.10.2 From 39cf0ed27ec70626e416c2f4780ea0449d405941 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:38 +0000 Subject: dm raid1: use per_bio_data Replace read_record_pool with per_bio_data in dm-raid1. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index cec5f9b..b4bc287 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -61,7 +61,6 @@ struct mirror_set { struct dm_region_hash *rh; struct dm_kcopyd_client *kcopyd_client; struct dm_io_client *io_client; - mempool_t *read_record_pool; /* recovery */ region_t nr_regions; @@ -139,14 +138,11 @@ static void dispatch_bios(void *context, struct bio_list *bio_list) queue_bio(ms, bio, WRITE); } -#define MIN_READ_RECORDS 20 struct dm_raid1_read_record { struct mirror *m; struct dm_bio_details details; }; -static struct kmem_cache *_dm_raid1_read_record_cache; - /* * Every mirror should look like this one. */ @@ -876,19 +872,9 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, atomic_set(&ms->suspend, 0); atomic_set(&ms->default_mirror, DEFAULT_MIRROR); - ms->read_record_pool = mempool_create_slab_pool(MIN_READ_RECORDS, - _dm_raid1_read_record_cache); - - if (!ms->read_record_pool) { - ti->error = "Error creating mirror read_record_pool"; - kfree(ms); - return NULL; - } - ms->io_client = dm_io_client_create(); if (IS_ERR(ms->io_client)) { ti->error = "Error creating dm_io client"; - mempool_destroy(ms->read_record_pool); kfree(ms); return NULL; } @@ -900,7 +886,6 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors, if (IS_ERR(ms->rh)) { ti->error = "Error creating dirty region hash"; dm_io_client_destroy(ms->io_client); - mempool_destroy(ms->read_record_pool); kfree(ms); return NULL; } @@ -916,7 +901,6 @@ static void free_context(struct mirror_set *ms, struct dm_target *ti, dm_io_client_destroy(ms->io_client); dm_region_hash_destroy(ms->rh); - mempool_destroy(ms->read_record_pool); kfree(ms); } @@ -1088,6 +1072,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->per_bio_data_size = sizeof(struct dm_raid1_read_record); ti->discard_zeroes_data_unsupported = true; ms->kmirrord_wq = alloc_workqueue("kmirrord", @@ -1161,7 +1146,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, int r, rw = bio_rw(bio); struct mirror *m; struct mirror_set *ms = ti->private; - struct dm_raid1_read_record *read_record = NULL; + struct dm_raid1_read_record *read_record; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); if (rw == WRITE) { @@ -1194,7 +1179,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, if (unlikely(!m)) return -EIO; - read_record = mempool_alloc(ms->read_record_pool, GFP_NOIO); + read_record = dm_per_bio_data(bio, sizeof(struct dm_raid1_read_record)); dm_bio_record(&read_record->details, bio); map_context->ptr = read_record; read_record->m = m; @@ -1254,7 +1239,6 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, bd = &read_record->details; dm_bio_restore(bd, bio); - mempool_free(read_record, ms->read_record_pool); map_context->ptr = NULL; queue_bio(ms, bio, rw); return DM_ENDIO_INCOMPLETE; @@ -1263,10 +1247,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, } out: - if (read_record) { - mempool_free(read_record, ms->read_record_pool); - map_context->ptr = NULL; - } + map_context->ptr = NULL; return error; } @@ -1420,7 +1401,7 @@ static int mirror_iterate_devices(struct dm_target *ti, static struct target_type mirror_target = { .name = "mirror", - .version = {1, 12, 1}, + .version = {1, 13, 1}, .module = THIS_MODULE, .ctr = mirror_ctr, .dtr = mirror_dtr, @@ -1437,13 +1418,6 @@ static int __init dm_mirror_init(void) { int r; - _dm_raid1_read_record_cache = KMEM_CACHE(dm_raid1_read_record, 0); - if (!_dm_raid1_read_record_cache) { - DMERR("Can't allocate dm_raid1_read_record cache"); - r = -ENOMEM; - goto bad_cache; - } - r = dm_register_target(&mirror_target); if (r < 0) { DMERR("Failed to register mirror target"); @@ -1453,15 +1427,12 @@ static int __init dm_mirror_init(void) return 0; bad_target: - kmem_cache_destroy(_dm_raid1_read_record_cache); -bad_cache: return r; } static void __exit dm_mirror_exit(void) { dm_unregister_target(&mirror_target); - kmem_cache_destroy(_dm_raid1_read_record_cache); } /* Module hooks */ -- cgit v0.10.2 From e42c3f914da79102c54a7002329a086790c15327 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:38 +0000 Subject: dm verity: use per_bio_data Replace io_mempool with per_bio_data in dm-verity. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 9e7328b..78f3498 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -55,7 +55,6 @@ struct dm_verity { unsigned shash_descsize;/* the size of temporary space for crypto */ int hash_failed; /* set to 1 if hash of any block failed */ - mempool_t *io_mempool; /* mempool of struct dm_verity_io */ mempool_t *vec_mempool; /* mempool of bio vector */ struct workqueue_struct *verify_wq; @@ -66,7 +65,6 @@ struct dm_verity { struct dm_verity_io { struct dm_verity *v; - struct bio *bio; /* original values of bio->bi_end_io and bio->bi_private */ bio_end_io_t *orig_bi_end_io; @@ -389,8 +387,8 @@ test_block_hash: */ static void verity_finish_io(struct dm_verity_io *io, int error) { - struct bio *bio = io->bio; struct dm_verity *v = io->v; + struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_bio_data_size); bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; @@ -398,8 +396,6 @@ static void verity_finish_io(struct dm_verity_io *io, int error) if (io->io_vec != io->io_vec_inline) mempool_free(io->io_vec, v->vec_mempool); - mempool_free(io, v->io_mempool); - bio_endio(bio, error); } @@ -486,9 +482,8 @@ static int verity_map(struct dm_target *ti, struct bio *bio, if (bio_data_dir(bio) == WRITE) return -EIO; - io = mempool_alloc(v->io_mempool, GFP_NOIO); + io = dm_per_bio_data(bio, ti->per_bio_data_size); io->v = v; - io->bio = bio; io->orig_bi_end_io = bio->bi_end_io; io->orig_bi_private = bio->bi_private; io->block = bio->bi_sector >> (v->data_dev_block_bits - SECTOR_SHIFT); @@ -610,9 +605,6 @@ static void verity_dtr(struct dm_target *ti) if (v->vec_mempool) mempool_destroy(v->vec_mempool); - if (v->io_mempool) - mempool_destroy(v->io_mempool); - if (v->bufio) dm_bufio_client_destroy(v->bufio); @@ -841,13 +833,7 @@ static int verity_ctr(struct dm_target *ti, unsigned argc, char **argv) goto bad; } - v->io_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, - sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2); - if (!v->io_mempool) { - ti->error = "Cannot allocate io mempool"; - r = -ENOMEM; - goto bad; - } + ti->per_bio_data_size = roundup(sizeof(struct dm_verity_io) + v->shash_descsize + v->digest_size * 2, __alignof__(struct dm_verity_io)); v->vec_mempool = mempool_create_kmalloc_pool(DM_VERITY_MEMPOOL_SIZE, BIO_MAX_PAGES * sizeof(struct bio_vec)); @@ -875,7 +861,7 @@ bad: static struct target_type verity_target = { .name = "verity", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, -- cgit v0.10.2 From 42bc954f2a4525c9034667dedc9bd1c342208013 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:38 +0000 Subject: dm snapshot: use per_bio_data Replace tracked_chunk_pool with per_bio_data in dm-snap. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 223e7eb..5e88bc4 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -79,7 +79,6 @@ struct dm_snapshot { /* Chunks with outstanding reads */ spinlock_t tracked_chunk_lock; - mempool_t *tracked_chunk_pool; struct hlist_head tracked_chunk_hash[DM_TRACKED_CHUNK_HASH_SIZE]; /* The on disk metadata handler */ @@ -191,13 +190,11 @@ struct dm_snap_tracked_chunk { chunk_t chunk; }; -static struct kmem_cache *tracked_chunk_cache; - static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, + struct bio *bio, chunk_t chunk) { - struct dm_snap_tracked_chunk *c = mempool_alloc(s->tracked_chunk_pool, - GFP_NOIO); + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); c->chunk = chunk; @@ -217,8 +214,6 @@ static void stop_tracking_chunk(struct dm_snapshot *s, spin_lock_irqsave(&s->tracked_chunk_lock, flags); hlist_del(&c->node); spin_unlock_irqrestore(&s->tracked_chunk_lock, flags); - - mempool_free(c, s->tracked_chunk_pool); } static int __chunk_is_tracked(struct dm_snapshot *s, chunk_t chunk) @@ -1119,14 +1114,6 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_pending_pool; } - s->tracked_chunk_pool = mempool_create_slab_pool(MIN_IOS, - tracked_chunk_cache); - if (!s->tracked_chunk_pool) { - ti->error = "Could not allocate tracked_chunk mempool for " - "tracking reads"; - goto bad_tracked_chunk_pool; - } - for (i = 0; i < DM_TRACKED_CHUNK_HASH_SIZE; i++) INIT_HLIST_HEAD(&s->tracked_chunk_hash[i]); @@ -1134,6 +1121,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->private = s; ti->num_flush_requests = num_flush_requests; + ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk); /* Add snapshot to the list of snapshots for this origin */ /* Exceptions aren't triggered till snapshot_resume() is called */ @@ -1182,9 +1170,6 @@ bad_read_metadata: unregister_snapshot(s); bad_load_and_register: - mempool_destroy(s->tracked_chunk_pool); - -bad_tracked_chunk_pool: mempool_destroy(s->pending_pool); bad_pending_pool: @@ -1289,8 +1274,6 @@ static void snapshot_dtr(struct dm_target *ti) BUG_ON(!hlist_empty(&s->tracked_chunk_hash[i])); #endif - mempool_destroy(s->tracked_chunk_pool); - __free_exceptions(s); mempool_destroy(s->pending_pool); @@ -1669,7 +1652,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, } } else { bio->bi_bdev = s->origin->bdev; - map_context->ptr = track_chunk(s, chunk); + map_context->ptr = track_chunk(s, bio, chunk); } out_unlock: @@ -1732,7 +1715,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, remap_exception(s, e, bio, chunk); if (bio_rw(bio) == WRITE) - map_context->ptr = track_chunk(s, chunk); + map_context->ptr = track_chunk(s, bio, chunk); goto out_unlock; } @@ -2192,7 +2175,7 @@ static int origin_iterate_devices(struct dm_target *ti, static struct target_type origin_target = { .name = "snapshot-origin", - .version = {1, 7, 1}, + .version = {1, 8, 0}, .module = THIS_MODULE, .ctr = origin_ctr, .dtr = origin_dtr, @@ -2205,7 +2188,7 @@ static struct target_type origin_target = { static struct target_type snapshot_target = { .name = "snapshot", - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2219,7 +2202,7 @@ static struct target_type snapshot_target = { static struct target_type merge_target = { .name = dm_snapshot_merge_target_name, - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = snapshot_ctr, .dtr = snapshot_dtr, @@ -2280,17 +2263,8 @@ static int __init dm_snapshot_init(void) goto bad_pending_cache; } - tracked_chunk_cache = KMEM_CACHE(dm_snap_tracked_chunk, 0); - if (!tracked_chunk_cache) { - DMERR("Couldn't create cache to track chunks in use."); - r = -ENOMEM; - goto bad_tracked_chunk_cache; - } - return 0; -bad_tracked_chunk_cache: - kmem_cache_destroy(pending_cache); bad_pending_cache: kmem_cache_destroy(exception_cache); bad_exception_cache: @@ -2316,7 +2290,6 @@ static void __exit dm_snapshot_exit(void) exit_origin_hash(); kmem_cache_destroy(pending_cache); kmem_cache_destroy(exception_cache); - kmem_cache_destroy(tracked_chunk_cache); dm_exception_store_exit(); } -- cgit v0.10.2 From ddbd658f6446a35e4d6ba84812fd71023320cae2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:39 +0000 Subject: dm: move target request nr to dm_target_io This patch moves target_request_nr from map_info to dm_target_io and makes it accessible with dm_bio_get_target_request_nr. This patch is a preparation for the next patch that removes map_info. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 5e88bc4..b7e179c 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1682,7 +1682,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, chunk_t chunk; if (bio->bi_rw & REQ_FLUSH) { - if (!map_context->target_request_nr) + if (!dm_bio_get_target_request_nr(bio)) bio->bi_bdev = s->origin->bdev; else bio->bi_bdev = s->cow->bdev; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index e2f87653..4e7ba82 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -279,13 +279,13 @@ static int stripe_map(struct dm_target *ti, struct bio *bio, unsigned target_request_nr; if (bio->bi_rw & REQ_FLUSH) { - target_request_nr = map_context->target_request_nr; + target_request_nr = dm_bio_get_target_request_nr(bio); BUG_ON(target_request_nr >= sc->stripes); bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; return DM_MAPIO_REMAPPED; } if (unlikely(bio->bi_rw & REQ_DISCARD)) { - target_request_nr = map_context->target_request_nr; + target_request_nr = dm_bio_get_target_request_nr(bio); BUG_ON(target_request_nr >= sc->stripes); return stripe_map_discard(sc, bio, target_request_nr); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 2765cf2..5ee580b 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1099,6 +1099,7 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, tio->io = ci->io; tio->ti = ti; memset(&tio->info, 0, sizeof(tio->info)); + tio->target_request_nr = 0; return tio; } @@ -1109,7 +1110,7 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs); struct bio *clone = &tio->clone; - tio->info.target_request_nr = request_nr; + tio->target_request_nr = request_nr; /* * Discard requests require the bio's inline iovecs be initialized. diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 6f0e73b..eb96ef6 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -23,7 +23,6 @@ typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; union map_info { void *ptr; unsigned long long ll; - unsigned target_request_nr; }; /* @@ -193,20 +192,21 @@ struct dm_target { * A number of zero-length barrier requests that will be submitted * to the target for the purpose of flushing cache. * - * The request number will be placed in union map_info->target_request_nr. + * The request number can be accessed with dm_bio_get_target_request_nr. * It is a responsibility of the target driver to remap these requests * to the real underlying devices. */ unsigned num_flush_requests; /* - * The number of discard requests that will be submitted to the - * target. map_info->request_nr is used just like num_flush_requests. + * The number of discard requests that will be submitted to the target. + * The request number can be accessed with dm_bio_get_target_request_nr. */ unsigned num_discard_requests; /* * The number of WRITE SAME requests that will be submitted to the target. + * The request number can be accessed with dm_bio_get_target_request_nr. */ unsigned num_write_same_requests; @@ -263,6 +263,7 @@ struct dm_target_io { struct dm_io *io; struct dm_target *ti; union map_info info; + unsigned target_request_nr; struct bio clone; }; @@ -276,6 +277,11 @@ static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size) return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone)); } +static inline unsigned dm_bio_get_target_request_nr(const struct bio *bio) +{ + return container_of(bio, struct dm_target_io, clone)->target_request_nr; +} + int dm_register_target(struct target_type *t); void dm_unregister_target(struct target_type *t); -- cgit v0.10.2 From 89c7cd8974035f1fbfac5c01e668b2de0733b443 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:39 +0000 Subject: dm raid1: rename read_record to bio_record Rename struct read_record to bio_record in dm-raid1. In the following patch, the structure will be used for both read and write bios, so rename it. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index b4bc287..a7d0d8a 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -138,7 +138,7 @@ static void dispatch_bios(void *context, struct bio_list *bio_list) queue_bio(ms, bio, WRITE); } -struct dm_raid1_read_record { +struct dm_raid1_bio_record { struct mirror *m; struct dm_bio_details details; }; @@ -1072,7 +1072,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; - ti->per_bio_data_size = sizeof(struct dm_raid1_read_record); + ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record); ti->discard_zeroes_data_unsupported = true; ms->kmirrord_wq = alloc_workqueue("kmirrord", @@ -1146,7 +1146,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, int r, rw = bio_rw(bio); struct mirror *m; struct mirror_set *ms = ti->private; - struct dm_raid1_read_record *read_record; + struct dm_raid1_bio_record *bio_record; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); if (rw == WRITE) { @@ -1179,10 +1179,10 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, if (unlikely(!m)) return -EIO; - read_record = dm_per_bio_data(bio, sizeof(struct dm_raid1_read_record)); - dm_bio_record(&read_record->details, bio); - map_context->ptr = read_record; - read_record->m = m; + bio_record = dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); + dm_bio_record(&bio_record->details, bio); + map_context->ptr = bio_record; + bio_record->m = m; map_bio(m, bio); @@ -1196,7 +1196,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, struct mirror_set *ms = (struct mirror_set *) ti->private; struct mirror *m = NULL; struct dm_bio_details *bd = NULL; - struct dm_raid1_read_record *read_record = map_context->ptr; + struct dm_raid1_bio_record *bio_record = map_context->ptr; /* * We need to dec pending if this was a write. @@ -1214,7 +1214,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, goto out; if (unlikely(error)) { - if (!read_record) { + if (!bio_record) { /* * There wasn't enough memory to record necessary * information for a retry or there was no other @@ -1224,7 +1224,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, return -EIO; } - m = read_record->m; + m = bio_record->m; DMERR("Mirror read failed from %s. Trying alternative device.", m->dev->name); @@ -1236,7 +1236,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, * mirror. */ if (default_ok(m) || mirror_available(ms, bio)) { - bd = &read_record->details; + bd = &bio_record->details; dm_bio_restore(bd, bio); map_context->ptr = NULL; -- cgit v0.10.2 From c7cfdf5973f644a21ef4a0a0f1aa1f081efc42c1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:39 +0000 Subject: dm flakey: dont use map_context Replace map_info with a per-bio structure "struct per_bio_data" in dm-flakey. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index cc15543..660f981 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -39,6 +39,10 @@ enum feature_flag_bits { DROP_WRITES }; +struct per_bio_data { + bool bio_submitted; +}; + static int parse_features(struct dm_arg_set *as, struct flakey_c *fc, struct dm_target *ti) { @@ -214,6 +218,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = 1; ti->num_discard_requests = 1; + ti->per_bio_data_size = sizeof(struct per_bio_data); ti->private = fc; return 0; @@ -270,6 +275,8 @@ static int flakey_map(struct dm_target *ti, struct bio *bio, { struct flakey_c *fc = ti->private; unsigned elapsed; + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); + pb->bio_submitted = false; /* Are we alive ? */ elapsed = (jiffies - fc->start_time) / HZ; @@ -277,7 +284,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio, /* * Flag this bio as submitted while down. */ - map_context->ll = 1; + pb->bio_submitted = true; /* * Map reads as normal. @@ -318,13 +325,13 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error, union map_info *map_context) { struct flakey_c *fc = ti->private; - unsigned bio_submitted_while_down = map_context->ll; + struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); /* * Corrupt successful READs while in down state. * If flags were specified, only corrupt those that match. */ - if (fc->corrupt_bio_byte && !error && bio_submitted_while_down && + if (fc->corrupt_bio_byte && !error && pb->bio_submitted && (bio_data_dir(bio) == READ) && (fc->corrupt_bio_rw == READ) && all_corrupt_bio_flags_match(bio, fc)) corrupt_bio_data(bio, fc); @@ -406,7 +413,7 @@ static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_ static struct target_type flakey_target = { .name = "flakey", - .version = {1, 2, 0}, + .version = {1, 3, 0}, .module = THIS_MODULE, .ctr = flakey_ctr, .dtr = flakey_dtr, -- cgit v0.10.2 From 0045d61b5b7470f7228b35e1ab7139119e249503 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:40 +0000 Subject: dm raid1: dont use map_context Don't use map_info any more in dm-raid1. map_info was used for writes to hold the region number. For this purpose we add a new field dm_bio_details to dm_raid1_bio_record. map_info was used for reads to hold a pointer to dm_raid1_bio_record (if the pointer was non-NULL, bio details were saved; if the pointer was NULL, bio details were not saved). We use dm_raid1_bio_record.details->bi_bdev for this purpose. If bi_bdev is NULL, details were not saved, if bi_bdev is non-NULL, details were saved. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index a7d0d8a..57685cf 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -140,7 +140,9 @@ static void dispatch_bios(void *context, struct bio_list *bio_list) struct dm_raid1_bio_record { struct mirror *m; + /* if details->bi_bdev == NULL, details were not saved */ struct dm_bio_details details; + region_t write_region; }; /* @@ -1146,12 +1148,15 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, int r, rw = bio_rw(bio); struct mirror *m; struct mirror_set *ms = ti->private; - struct dm_raid1_bio_record *bio_record; struct dm_dirty_log *log = dm_rh_dirty_log(ms->rh); + struct dm_raid1_bio_record *bio_record = + dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); + + bio_record->details.bi_bdev = NULL; if (rw == WRITE) { /* Save region for mirror_end_io() handler */ - map_context->ll = dm_rh_bio_to_region(ms->rh, bio); + bio_record->write_region = dm_rh_bio_to_region(ms->rh, bio); queue_bio(ms, bio, rw); return DM_MAPIO_SUBMITTED; } @@ -1179,9 +1184,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, if (unlikely(!m)) return -EIO; - bio_record = dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); dm_bio_record(&bio_record->details, bio); - map_context->ptr = bio_record; bio_record->m = m; map_bio(m, bio); @@ -1196,14 +1199,15 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, struct mirror_set *ms = (struct mirror_set *) ti->private; struct mirror *m = NULL; struct dm_bio_details *bd = NULL; - struct dm_raid1_bio_record *bio_record = map_context->ptr; + struct dm_raid1_bio_record *bio_record = + dm_per_bio_data(bio, sizeof(struct dm_raid1_bio_record)); /* * We need to dec pending if this was a write. */ if (rw == WRITE) { if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))) - dm_rh_dec(ms->rh, map_context->ll); + dm_rh_dec(ms->rh, bio_record->write_region); return error; } @@ -1214,7 +1218,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, goto out; if (unlikely(error)) { - if (!bio_record) { + if (!bio_record->details.bi_bdev) { /* * There wasn't enough memory to record necessary * information for a retry or there was no other @@ -1239,7 +1243,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, bd = &bio_record->details; dm_bio_restore(bd, bio); - map_context->ptr = NULL; + bio_record->details.bi_bdev = NULL; queue_bio(ms, bio, rw); return DM_ENDIO_INCOMPLETE; } @@ -1247,7 +1251,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, } out: - map_context->ptr = NULL; + bio_record->details.bi_bdev = NULL; return error; } -- cgit v0.10.2 From 59c3d2c6a12ff580b2c19c3925af4f4552639f8a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:40 +0000 Subject: dm thin: dont use map_context This patch removes endio_hook_pool from dm-thin and uses per-bio data instead. This patch removes any use of map_info in preparation for the next patch that removes map_info from bio-based device mapper. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 4b94074..e7743c6 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -186,7 +186,6 @@ struct pool { struct dm_thin_new_mapping *next_mapping; mempool_t *mapping_pool; - mempool_t *endio_hook_pool; process_bio_fn process_bio; process_bio_fn process_discard; @@ -304,7 +303,7 @@ static void __requeue_bio_list(struct thin_c *tc, struct bio_list *master) bio_list_init(master); while ((bio = bio_list_pop(&bios))) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); if (h->tc == tc) bio_endio(bio, DM_ENDIO_REQUEUE); @@ -375,7 +374,7 @@ static void inc_all_io_entry(struct pool *pool, struct bio *bio) if (bio->bi_rw & REQ_DISCARD) return; - h = dm_get_mapinfo(bio)->ptr; + h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->all_io_entry = dm_deferred_entry_inc(pool->all_io_ds); } @@ -485,7 +484,7 @@ static void copy_complete(int read_err, unsigned long write_err, void *context) static void overwrite_endio(struct bio *bio, int err) { unsigned long flags; - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct dm_thin_new_mapping *m = h->overwrite_mapping; struct pool *pool = m->tc->pool; @@ -714,7 +713,7 @@ static void schedule_copy(struct thin_c *tc, dm_block_t virt_block, * bio immediately. Otherwise we use kcopyd to clone the data first. */ if (io_overwrites_block(pool, bio)) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->overwrite_mapping = m; m->bio = bio; @@ -784,7 +783,7 @@ static void schedule_zero(struct thin_c *tc, dm_block_t virt_block, process_prepared_mapping(m); else if (io_overwrites_block(pool, bio)) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->overwrite_mapping = m; m->bio = bio; @@ -899,7 +898,7 @@ static int alloc_data_block(struct thin_c *tc, dm_block_t *result) */ static void retry_on_resume(struct bio *bio) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct thin_c *tc = h->tc; struct pool *pool = tc->pool; unsigned long flags; @@ -1051,7 +1050,7 @@ static void process_shared_bio(struct thin_c *tc, struct bio *bio, if (bio_data_dir(bio) == WRITE && bio->bi_size) break_sharing(tc, bio, block, &key, lookup_result, cell); else { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->shared_read_entry = dm_deferred_entry_inc(pool->shared_read_ds); inc_all_io_entry(pool, bio); @@ -1226,7 +1225,7 @@ static void process_deferred_bios(struct pool *pool) spin_unlock_irqrestore(&pool->lock, flags); while ((bio = bio_list_pop(&bios))) { - struct dm_thin_endio_hook *h = dm_get_mapinfo(bio)->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct thin_c *tc = h->tc; /* @@ -1359,17 +1358,14 @@ static void thin_defer_bio(struct thin_c *tc, struct bio *bio) wake_worker(pool); } -static struct dm_thin_endio_hook *thin_hook_bio(struct thin_c *tc, struct bio *bio) +static void thin_hook_bio(struct thin_c *tc, struct bio *bio) { - struct pool *pool = tc->pool; - struct dm_thin_endio_hook *h = mempool_alloc(pool->endio_hook_pool, GFP_NOIO); + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); h->tc = tc; h->shared_read_entry = NULL; h->all_io_entry = NULL; h->overwrite_mapping = NULL; - - return h; } /* @@ -1386,7 +1382,7 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio, struct dm_bio_prison_cell *cell1, *cell2; struct dm_cell_key key; - map_context->ptr = thin_hook_bio(tc, bio); + thin_hook_bio(tc, bio); if (get_pool_mode(tc->pool) == PM_FAIL) { bio_io_error(bio); @@ -1595,14 +1591,12 @@ static void __pool_destroy(struct pool *pool) if (pool->next_mapping) mempool_free(pool->next_mapping, pool->mapping_pool); mempool_destroy(pool->mapping_pool); - mempool_destroy(pool->endio_hook_pool); dm_deferred_set_destroy(pool->shared_read_ds); dm_deferred_set_destroy(pool->all_io_ds); kfree(pool); } static struct kmem_cache *_new_mapping_cache; -static struct kmem_cache *_endio_hook_cache; static struct pool *pool_create(struct mapped_device *pool_md, struct block_device *metadata_dev, @@ -1696,13 +1690,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_mapping_pool; } - pool->endio_hook_pool = mempool_create_slab_pool(ENDIO_HOOK_POOL_SIZE, - _endio_hook_cache); - if (!pool->endio_hook_pool) { - *error = "Error creating pool's endio_hook mempool"; - err_p = ERR_PTR(-ENOMEM); - goto bad_endio_hook_pool; - } pool->ref_count = 1; pool->last_commit_jiffies = jiffies; pool->pool_md = pool_md; @@ -1711,8 +1698,6 @@ static struct pool *pool_create(struct mapped_device *pool_md, return pool; -bad_endio_hook_pool: - mempool_destroy(pool->mapping_pool); bad_mapping_pool: dm_deferred_set_destroy(pool->all_io_ds); bad_all_io_ds: @@ -2607,6 +2592,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) ti->num_flush_requests = 1; ti->flush_supported = true; + ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { @@ -2653,7 +2639,7 @@ static int thin_endio(struct dm_target *ti, union map_info *map_context) { unsigned long flags; - struct dm_thin_endio_hook *h = map_context->ptr; + struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); struct list_head work; struct dm_thin_new_mapping *m, *tmp; struct pool *pool = h->tc->pool; @@ -2683,8 +2669,6 @@ static int thin_endio(struct dm_target *ti, } } - mempool_free(h, pool->endio_hook_pool); - return 0; } @@ -2813,14 +2797,8 @@ static int __init dm_thin_init(void) if (!_new_mapping_cache) goto bad_new_mapping_cache; - _endio_hook_cache = KMEM_CACHE(dm_thin_endio_hook, 0); - if (!_endio_hook_cache) - goto bad_endio_hook_cache; - return 0; -bad_endio_hook_cache: - kmem_cache_destroy(_new_mapping_cache); bad_new_mapping_cache: dm_unregister_target(&pool_target); bad_pool_target: @@ -2835,7 +2813,6 @@ static void dm_thin_exit(void) dm_unregister_target(&pool_target); kmem_cache_destroy(_new_mapping_cache); - kmem_cache_destroy(_endio_hook_cache); } module_init(dm_thin_init); -- cgit v0.10.2 From ee18026ac69efba804144541171299efd41747d2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:41 +0000 Subject: dm snapshot: do not use map_context Eliminate struct map_info from dm-snap. map_info->ptr was used in dm-snap to indicate if the bio was tracked. If map_info->ptr was non-NULL, the bio was linked in tracked_chunk_hash. This patch removes the use of map_info->ptr. We determine if the bio was tracked based on hlist_unhashed(&c->node). If hlist_unhashed is true, the bio is not tracked, if it is false, the bio is tracked. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index b7e179c..e1ecacf 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -190,9 +190,19 @@ struct dm_snap_tracked_chunk { chunk_t chunk; }; -static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, - struct bio *bio, - chunk_t chunk) +static void init_tracked_chunk(struct bio *bio) +{ + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); + INIT_HLIST_NODE(&c->node); +} + +static bool is_bio_tracked(struct bio *bio) +{ + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); + return !hlist_unhashed(&c->node); +} + +static void track_chunk(struct dm_snapshot *s, struct bio *bio, chunk_t chunk) { struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); @@ -202,13 +212,11 @@ static struct dm_snap_tracked_chunk *track_chunk(struct dm_snapshot *s, hlist_add_head(&c->node, &s->tracked_chunk_hash[DM_TRACKED_CHUNK_HASH(chunk)]); spin_unlock_irq(&s->tracked_chunk_lock); - - return c; } -static void stop_tracking_chunk(struct dm_snapshot *s, - struct dm_snap_tracked_chunk *c) +static void stop_tracking_chunk(struct dm_snapshot *s, struct bio *bio) { + struct dm_snap_tracked_chunk *c = dm_per_bio_data(bio, sizeof(struct dm_snap_tracked_chunk)); unsigned long flags; spin_lock_irqsave(&s->tracked_chunk_lock, flags); @@ -1568,6 +1576,8 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, chunk_t chunk; struct dm_snap_pending_exception *pe = NULL; + init_tracked_chunk(bio); + if (bio->bi_rw & REQ_FLUSH) { bio->bi_bdev = s->cow->bdev; return DM_MAPIO_REMAPPED; @@ -1652,7 +1662,7 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, } } else { bio->bi_bdev = s->origin->bdev; - map_context->ptr = track_chunk(s, bio, chunk); + track_chunk(s, bio, chunk); } out_unlock: @@ -1681,12 +1691,13 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, int r = DM_MAPIO_REMAPPED; chunk_t chunk; + init_tracked_chunk(bio); + if (bio->bi_rw & REQ_FLUSH) { if (!dm_bio_get_target_request_nr(bio)) bio->bi_bdev = s->origin->bdev; else bio->bi_bdev = s->cow->bdev; - map_context->ptr = NULL; return DM_MAPIO_REMAPPED; } @@ -1715,7 +1726,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, remap_exception(s, e, bio, chunk); if (bio_rw(bio) == WRITE) - map_context->ptr = track_chunk(s, bio, chunk); + track_chunk(s, bio, chunk); goto out_unlock; } @@ -1737,10 +1748,9 @@ static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error, union map_info *map_context) { struct dm_snapshot *s = ti->private; - struct dm_snap_tracked_chunk *c = map_context->ptr; - if (c) - stop_tracking_chunk(s, c); + if (is_bio_tracked(bio)) + stop_tracking_chunk(s, bio); return 0; } -- cgit v0.10.2 From 7de3ee57da4b717050e79c9313a9bf66ccc72519 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:41 +0000 Subject: dm: remove map_info This patch removes map_info from bio-based device mapper targets. map_info is still used for request-based targets. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index bbf459b..f7369f9 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1689,8 +1689,7 @@ bad: return ret; } -static int crypt_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int crypt_map(struct dm_target *ti, struct bio *bio) { struct dm_crypt_io *io; struct crypt_config *cc = ti->private; @@ -1846,7 +1845,7 @@ static int crypt_iterate_devices(struct dm_target *ti, static struct target_type crypt_target = { .name = "crypt", - .version = {1, 11, 0}, + .version = {1, 12, 0}, .module = THIS_MODULE, .ctr = crypt_ctr, .dtr = crypt_dtr, diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index f53846f..cc1bd04 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -274,8 +274,7 @@ static void delay_resume(struct dm_target *ti) atomic_set(&dc->may_delay, 1); } -static int delay_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int delay_map(struct dm_target *ti, struct bio *bio) { struct delay_c *dc = ti->private; @@ -338,7 +337,7 @@ out: static struct target_type delay_target = { .name = "delay", - .version = {1, 1, 0}, + .version = {1, 2, 0}, .module = THIS_MODULE, .ctr = delay_ctr, .dtr = delay_dtr, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 660f981..9721f2f 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -270,8 +270,7 @@ static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc) } } -static int flakey_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int flakey_map(struct dm_target *ti, struct bio *bio) { struct flakey_c *fc = ti->private; unsigned elapsed; @@ -321,8 +320,7 @@ map_bio: return DM_MAPIO_REMAPPED; } -static int flakey_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) { struct flakey_c *fc = ti->private; struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 82222a8..328cad5 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -88,8 +88,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio) bio->bi_sector = linear_map_sector(ti, bio->bi_sector); } -static int linear_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int linear_map(struct dm_target *ti, struct bio *bio) { linear_map_bio(ti, bio); diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 4a20bf8..3d8984e 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1218,7 +1218,7 @@ static void raid_dtr(struct dm_target *ti) context_free(rs); } -static int raid_map(struct dm_target *ti, struct bio *bio, union map_info *map_context) +static int raid_map(struct dm_target *ti, struct bio *bio) { struct raid_set *rs = ti->private; struct mddev *mddev = &rs->md; @@ -1432,7 +1432,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 3, 1}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 57685cf..fa51918 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1142,8 +1142,7 @@ static void mirror_dtr(struct dm_target *ti) /* * Mirror mapping function */ -static int mirror_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int mirror_map(struct dm_target *ti, struct bio *bio) { int r, rw = bio_rw(bio); struct mirror *m; @@ -1192,8 +1191,7 @@ static int mirror_map(struct dm_target *ti, struct bio *bio, return DM_MAPIO_REMAPPED; } -static int mirror_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) { int rw = bio_rw(bio); struct mirror_set *ms = (struct mirror_set *) ti->private; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index e1ecacf..59fc18a 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1567,8 +1567,7 @@ static void remap_exception(struct dm_snapshot *s, struct dm_exception *e, s->store->chunk_mask); } -static int snapshot_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int snapshot_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; struct dm_snapshot *s = ti->private; @@ -1683,8 +1682,7 @@ out: * If merging is currently taking place on the chunk in question, the * I/O is deferred by adding it to s->bios_queued_during_merge. */ -static int snapshot_merge_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) { struct dm_exception *e; struct dm_snapshot *s = ti->private; @@ -1744,8 +1742,7 @@ out_unlock: return r; } -static int snapshot_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int snapshot_end_io(struct dm_target *ti, struct bio *bio, int error) { struct dm_snapshot *s = ti->private; @@ -2119,8 +2116,7 @@ static void origin_dtr(struct dm_target *ti) dm_put_device(ti, dev); } -static int origin_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int origin_map(struct dm_target *ti, struct bio *bio) { struct dm_dev *dev = ti->private; bio->bi_bdev = dev->bdev; diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 4e7ba82..6b0e5ea 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -271,8 +271,7 @@ static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, } } -static int stripe_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int stripe_map(struct dm_target *ti, struct bio *bio) { struct stripe_c *sc = ti->private; uint32_t stripe; @@ -342,8 +341,7 @@ static int stripe_status(struct dm_target *ti, status_type_t type, return 0; } -static int stripe_end_io(struct dm_target *ti, struct bio *bio, - int error, union map_info *map_context) +static int stripe_end_io(struct dm_target *ti, struct bio *bio, int error) { unsigned i; char major_minor[16]; diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 8da366c..617d21a 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -126,15 +126,14 @@ static void io_err_dtr(struct dm_target *tt) /* empty */ } -static int io_err_map(struct dm_target *tt, struct bio *bio, - union map_info *map_context) +static int io_err_map(struct dm_target *tt, struct bio *bio) { return -EIO; } static struct target_type error_target = { .name = "error", - .version = {1, 0, 1}, + .version = {1, 1, 0}, .ctr = io_err_ctr, .dtr = io_err_dtr, .map = io_err_map, diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index e7743c6..675ae52 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1371,8 +1371,7 @@ static void thin_hook_bio(struct thin_c *tc, struct bio *bio) /* * Non-blocking function called from the thin target's map function. */ -static int thin_bio_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int thin_bio_map(struct dm_target *ti, struct bio *bio) { int r; struct thin_c *tc = ti->private; @@ -1980,8 +1979,7 @@ out_unlock: return r; } -static int pool_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int pool_map(struct dm_target *ti, struct bio *bio) { int r; struct pool_c *pt = ti->private; @@ -2626,17 +2624,14 @@ out_unlock: return r; } -static int thin_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int thin_map(struct dm_target *ti, struct bio *bio) { bio->bi_sector = dm_target_offset(ti, bio->bi_sector); - return thin_bio_map(ti, bio, map_context); + return thin_bio_map(ti, bio); } -static int thin_endio(struct dm_target *ti, - struct bio *bio, int err, - union map_info *map_context) +static int thin_endio(struct dm_target *ti, struct bio *bio, int err) { unsigned long flags; struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook)); diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 78f3498..52cde982 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -458,8 +458,7 @@ no_prefetch_cluster: * Bio map function. It allocates dm_verity_io structure and bio vector and * fills them. Then it issues prefetches and the I/O. */ -static int verity_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int verity_map(struct dm_target *ti, struct bio *bio) { struct dm_verity *v = ti->private; struct dm_verity_io *io; diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index cc2b3cb..69a5c3b 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -33,8 +33,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* * Return zeros only on reads */ -static int zero_map(struct dm_target *ti, struct bio *bio, - union map_info *map_context) +static int zero_map(struct dm_target *ti, struct bio *bio) { switch(bio_rw(bio)) { case READ: @@ -56,7 +55,7 @@ static int zero_map(struct dm_target *ti, struct bio *bio, static struct target_type zero_target = { .name = "zero", - .version = {1, 0, 0}, + .version = {1, 1, 0}, .module = THIS_MODULE, .ctr = zero_ctr, .map = zero_map, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 5ee580b..c72e4d5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -645,7 +645,7 @@ static void clone_endio(struct bio *bio, int error) error = -EIO; if (endio) { - r = endio(tio->ti, bio, error, &tio->info); + r = endio(tio->ti, bio, error); if (r < 0 || r == DM_ENDIO_REQUEUE) /* * error and requeue request are handled @@ -1004,7 +1004,7 @@ static void __map_bio(struct dm_target *ti, struct dm_target_io *tio) */ atomic_inc(&tio->io->io_count); sector = clone->bi_sector; - r = ti->type->map(ti, clone, &tio->info); + r = ti->type->map(ti, clone); if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index eb96ef6..bf6afa2 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -45,8 +45,7 @@ typedef void (*dm_dtr_fn) (struct dm_target *ti); * = 1: simple remap complete * = 2: The target wants to push back the io */ -typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio, - union map_info *map_context); +typedef int (*dm_map_fn) (struct dm_target *ti, struct bio *bio); typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, union map_info *map_context); @@ -59,8 +58,7 @@ typedef int (*dm_map_request_fn) (struct dm_target *ti, struct request *clone, * 2 : The target wants to push back the io */ typedef int (*dm_endio_fn) (struct dm_target *ti, - struct bio *bio, int error, - union map_info *map_context); + struct bio *bio, int error); typedef int (*dm_request_endio_fn) (struct dm_target *ti, struct request *clone, int error, union map_info *map_context); -- cgit v0.10.2 From 45e621d45e24ffc4cb2b2935e8438987b860063a Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 21 Dec 2012 20:23:41 +0000 Subject: dm stripe: add WRITE SAME support Rename stripe_map_discard to stripe_map_range and reuse it for WRITE SAME bio processing. Signed-off-by: Mike Snitzer Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 6b0e5ea..c89cde8 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -162,6 +162,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->num_flush_requests = stripes; ti->num_discard_requests = stripes; + ti->num_write_same_requests = stripes; sc->chunk_size = chunk_size; if (chunk_size & (chunk_size - 1)) @@ -251,8 +252,8 @@ static void stripe_map_range_sector(struct stripe_c *sc, sector_t sector, *result += sc->chunk_size; /* next chunk */ } -static int stripe_map_discard(struct stripe_c *sc, struct bio *bio, - uint32_t target_stripe) +static int stripe_map_range(struct stripe_c *sc, struct bio *bio, + uint32_t target_stripe) { sector_t begin, end; @@ -283,10 +284,11 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; return DM_MAPIO_REMAPPED; } - if (unlikely(bio->bi_rw & REQ_DISCARD)) { + if (unlikely(bio->bi_rw & REQ_DISCARD) || + unlikely(bio->bi_rw & REQ_WRITE_SAME)) { target_request_nr = dm_bio_get_target_request_nr(bio); BUG_ON(target_request_nr >= sc->stripes); - return stripe_map_discard(sc, bio, target_request_nr); + return stripe_map_range(sc, bio, target_request_nr); } stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector); -- cgit v0.10.2