From 512875bd9661368da6f993205a61213b79ba1df0 Mon Sep 17 00:00:00 2001 From: Jun'ichi Nomura Date: Thu, 13 Dec 2007 14:15:25 +0000 Subject: dm: table detect io beyond device This patch fixes a panic on shrinking a DM device if there is outstanding I/O to the part of the device that is being removed. (Normally this doesn't happen - a filesystem would be resized first, for example.) The bug is that __clone_and_map() assumes dm_table_find_target() always returns a valid pointer. It may fail if a bio arrives from the block layer but its target sector is no longer included in the DM btree. This patch appends an empty entry to table->targets[] which will be returned by a lookup beyond the end of the device. After calling dm_table_find_target(), __clone_and_map() and target_message() check for this condition using dm_target_is_valid(). Sample test script to trigger oops: diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 138200b..be730fd 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1250,21 +1250,17 @@ static int target_message(struct dm_ioctl *param, size_t param_size) if (!table) goto out_argv; - if (tmsg->sector >= dm_table_get_size(table)) { + ti = dm_table_find_target(table, tmsg->sector); + if (!dm_target_is_valid(ti)) { DMWARN("Target message sector outside device."); r = -EINVAL; - goto out_table; - } - - ti = dm_table_find_target(table, tmsg->sector); - if (ti->type->message) + } else if (ti->type->message) r = ti->type->message(ti, argc, argv); else { DMWARN("Target type does not support messages"); r = -EINVAL; } - out_table: dm_table_put(table); out_argv: kfree(argv); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index e298d8d..f3f952e 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -189,8 +189,10 @@ static int alloc_targets(struct dm_table *t, unsigned int num) /* * Allocate both the target array and offset array at once. + * Append an empty entry to catch sectors beyond the end of + * the device. */ - n_highs = (sector_t *) dm_vcalloc(num, sizeof(struct dm_target) + + n_highs = (sector_t *) dm_vcalloc(num + 1, sizeof(struct dm_target) + sizeof(sector_t)); if (!n_highs) return -ENOMEM; @@ -867,6 +869,9 @@ struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) /* * Search the btree for the correct target. + * + * Caller should check returned pointer with dm_target_is_valid() + * to trap I/O beyond end of device. */ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) { diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 07cbbb8..cff2a714 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -672,13 +672,19 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, return clone; } -static void __clone_and_map(struct clone_info *ci) +static int __clone_and_map(struct clone_info *ci) { struct bio *clone, *bio = ci->bio; - struct dm_target *ti = dm_table_find_target(ci->map, ci->sector); - sector_t len = 0, max = max_io_len(ci->md, ci->sector, ti); + struct dm_target *ti; + sector_t len = 0, max; struct dm_target_io *tio; + ti = dm_table_find_target(ci->map, ci->sector); + if (!dm_target_is_valid(ti)) + return -EIO; + + max = max_io_len(ci->md, ci->sector, ti); + /* * Allocate a target io object. */ @@ -736,6 +742,9 @@ static void __clone_and_map(struct clone_info *ci) do { if (offset) { ti = dm_table_find_target(ci->map, ci->sector); + if (!dm_target_is_valid(ti)) + return -EIO; + max = max_io_len(ci->md, ci->sector, ti); tio = alloc_tio(ci->md); @@ -759,6 +768,8 @@ static void __clone_and_map(struct clone_info *ci) ci->idx++; } + + return 0; } /* @@ -767,6 +778,7 @@ static void __clone_and_map(struct clone_info *ci) static int __split_bio(struct mapped_device *md, struct bio *bio) { struct clone_info ci; + int error = 0; ci.map = dm_get_table(md); if (unlikely(!ci.map)) @@ -784,11 +796,11 @@ static int __split_bio(struct mapped_device *md, struct bio *bio) ci.idx = bio->bi_idx; start_io_acct(ci.io); - while (ci.sector_count) - __clone_and_map(&ci); + while (ci.sector_count && !error) + error = __clone_and_map(&ci); /* drop the extra reference count */ - dec_pending(ci.io, 0); + dec_pending(ci.io, error); dm_table_put(ci.map); return 0; diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 4b3faa4..177297a 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -112,6 +112,11 @@ int dm_table_resume_targets(struct dm_table *t); int dm_table_any_congested(struct dm_table *t, int bdi_bits); void dm_table_unplug_all(struct dm_table *t); +/* + * To check the return value from dm_table_find_target(). + */ +#define dm_target_is_valid(t) ((t)->table) + /*----------------------------------------------------------------- * A registry of target types. *---------------------------------------------------------------*/ -- cgit v0.10.2 From d1622e89099b7cdda20d95a68940067bdddda03c Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Thu, 13 Dec 2007 14:15:43 +0000 Subject: dm mpath: hp requires scsi With CONFIG_SCSI=n __scsi_print_sense() is never linked in. drivers/built-in.o: In function `hp_sw_end_io': dm-mpath-hp-sw.c:(.text+0x914f8): undefined reference to `__scsi_print_sense' Caught with a randconfig on current git. Signed-off-by: Paul Mundt Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 9b6fbf0..3fa7c77 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -269,7 +269,7 @@ config DM_MULTIPATH_RDAC config DM_MULTIPATH_HP tristate "HP MSA multipath support (EXPERIMENTAL)" - depends on DM_MULTIPATH && BLK_DEV_DM && EXPERIMENTAL + depends on DM_MULTIPATH && BLK_DEV_DM && SCSI && EXPERIMENTAL ---help--- Multipath support for HP MSA (Active/Passive) series hardware. -- cgit v0.10.2 From adfe47702c4726b3e045f9f83178def02833be4c Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Thu, 13 Dec 2007 14:15:51 +0000 Subject: dm crypt: fix write endio Fix BIO_UPTODATE test for write io. Cc: stable@kernel.org Cc: dm-crypt@saout.de Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 28c6ae0..30d51a0 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -511,6 +511,9 @@ static void crypt_endio(struct bio *clone, int error) struct crypt_config *cc = io->target->private; unsigned read_io = bio_data_dir(clone) == READ; + if (unlikely(!bio_flagged(clone, BIO_UPTODATE) && !error)) + error = -EIO; + /* * free the processed pages */ @@ -519,10 +522,8 @@ static void crypt_endio(struct bio *clone, int error) goto out; } - if (unlikely(!bio_flagged(clone, BIO_UPTODATE))) { - error = -EIO; + if (unlikely(error)) goto out; - } bio_put(clone); kcryptd_queue_crypt(io); -- cgit v0.10.2 From 69267a30bed1fabec658058c63845528a8b813d4 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 13 Dec 2007 14:15:57 +0000 Subject: dm: trigger change uevent on rename Insert a missing KOBJ_CHANGE notification when a device is renamed. Cc: Scott James Remnant Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index be730fd..9627fa0 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -332,6 +332,8 @@ static int dm_hash_rename(const char *old, const char *new) dm_table_put(table); } + dm_kobject_uevent(hc->md); + dm_put(hc->md); up_write(&_hash_lock); kfree(old_name); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index cff2a714..88c0fd6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1514,7 +1514,7 @@ int dm_resume(struct mapped_device *md) dm_table_unplug_all(map); - kobject_uevent(&md->disk->kobj, KOBJ_CHANGE); + dm_kobject_uevent(md); r = 0; @@ -1528,6 +1528,11 @@ out: /*----------------------------------------------------------------- * Event notification. *---------------------------------------------------------------*/ +void dm_kobject_uevent(struct mapped_device *md) +{ + kobject_uevent(&md->disk->kobj, KOBJ_CHANGE); +} + uint32_t dm_next_uevent_seq(struct mapped_device *md) { return atomic_add_return(1, &md->uevent_seq); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 177297a..b4584a3 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -187,4 +187,6 @@ union map_info *dm_get_mapinfo(struct bio *bio); int dm_open_count(struct mapped_device *md); int dm_lock_for_deletion(struct mapped_device *md); +void dm_kobject_uevent(struct mapped_device *md); + #endif -- cgit v0.10.2 From 91212507f93778c09d4c1335207b6f4b995f5ad1 Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Thu, 13 Dec 2007 14:16:04 +0000 Subject: dm: merge max_hw_sector Make sure dm honours max_hw_sectors of underlying devices We still have no firm testing evidence in support of this patch but believe it may help to resolve some bug reports. - agk Signed-off-by: Neil Brown Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index f3f952e..47818d8 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -99,6 +99,9 @@ static void combine_restrictions_low(struct io_restrictions *lhs, lhs->max_segment_size = min_not_zero(lhs->max_segment_size, rhs->max_segment_size); + lhs->max_hw_sectors = + min_not_zero(lhs->max_hw_sectors, rhs->max_hw_sectors); + lhs->seg_boundary_mask = min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask); @@ -566,6 +569,9 @@ void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) rs->max_segment_size = min_not_zero(rs->max_segment_size, q->max_segment_size); + rs->max_hw_sectors = + min_not_zero(rs->max_hw_sectors, q->max_hw_sectors); + rs->seg_boundary_mask = min_not_zero(rs->seg_boundary_mask, q->seg_boundary_mask); @@ -703,6 +709,8 @@ static void check_for_valid_limits(struct io_restrictions *rs) { if (!rs->max_sectors) rs->max_sectors = SAFE_MAX_SECTORS; + if (!rs->max_hw_sectors) + rs->max_hw_sectors = SAFE_MAX_SECTORS; if (!rs->max_phys_segments) rs->max_phys_segments = MAX_PHYS_SEGMENTS; if (!rs->max_hw_segments) @@ -901,6 +909,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) q->max_hw_segments = t->limits.max_hw_segments; q->hardsect_size = t->limits.hardsect_size; q->max_segment_size = t->limits.max_segment_size; + q->max_hw_sectors = t->limits.max_hw_sectors; q->seg_boundary_mask = t->limits.seg_boundary_mask; q->bounce_pfn = t->limits.bounce_pfn; if (t->limits.no_cluster) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index b8b7c51..e765e19 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -115,6 +115,7 @@ struct io_restrictions { unsigned short max_hw_segments; unsigned short hardsect_size; unsigned int max_segment_size; + unsigned int max_hw_sectors; unsigned long seg_boundary_mask; unsigned long bounce_pfn; unsigned char no_cluster; /* inverted so that 0 is default */ -- cgit v0.10.2 From 91e106259214b40e992a58fb9417da46868e19b2 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Thu, 13 Dec 2007 14:16:10 +0000 Subject: dm crypt: use bio_add_page Fix possible max_phys_segments violation in cloned dm-crypt bio. In write operation dm-crypt needs to allocate new bio request and run crypto operation on this clone. Cloned request has always the same size, but number of physical segments can be increased and violate max_phys_segments restriction. This can lead to data corruption and serious hardware malfunction. This was observed when using XFS over dm-crypt and at least two HBA controller drivers (arcmsr, cciss) recently. Fix it by using bio_add_page() call (which tests for other restrictions too) instead of constructing own biovec. All versions of dm-crypt are affected by this bug. Cc: stable@kernel.org Cc: dm-crypt@saout.de Signed-off-by: Milan Broz Signed-off-by: Alasdair G Kergon diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 30d51a0..6b66ee4 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -398,7 +398,8 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) struct bio *clone; unsigned int nr_iovecs = (size + PAGE_SIZE - 1) >> PAGE_SHIFT; gfp_t gfp_mask = GFP_NOIO | __GFP_HIGHMEM; - unsigned int i; + unsigned i, len; + struct page *page; clone = bio_alloc_bioset(GFP_NOIO, nr_iovecs, cc->bs); if (!clone) @@ -407,10 +408,8 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) clone_init(io, clone); for (i = 0; i < nr_iovecs; i++) { - struct bio_vec *bv = bio_iovec_idx(clone, i); - - bv->bv_page = mempool_alloc(cc->page_pool, gfp_mask); - if (!bv->bv_page) + page = mempool_alloc(cc->page_pool, gfp_mask); + if (!page) break; /* @@ -421,15 +420,14 @@ static struct bio *crypt_alloc_buffer(struct dm_crypt_io *io, unsigned size) if (i == (MIN_BIO_PAGES - 1)) gfp_mask = (gfp_mask | __GFP_NOWARN) & ~__GFP_WAIT; - bv->bv_offset = 0; - if (size > PAGE_SIZE) - bv->bv_len = PAGE_SIZE; - else - bv->bv_len = size; + len = (size > PAGE_SIZE) ? PAGE_SIZE : size; + + if (!bio_add_page(clone, page, len, 0)) { + mempool_free(page, cc->page_pool); + break; + } - clone->bi_size += bv->bv_len; - clone->bi_vcnt++; - size -= bv->bv_len; + size -= len; } if (!clone->bi_size) { -- cgit v0.10.2