From 7bb23c4934059c64cbee2e41d5d24ce122285176 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 16 Jul 2013 16:50:47 +1000
Subject: md/raid10: fix two problems with RAID10 resync.

1/ When an different between blocks is found, data is copied from
   one bio to the other.  However bv_len is used as the length to
   copy and this could be zero.  So use r10_bio->sectors to calculate
   length instead.
   Using bv_len was probably always a bit dubious, but the introduction
   of bio_advance made it much more likely to be a problem.

2/ When preparing some blocks for sync, we don't set BIO_UPTODATE
   except on bios that we schedule for a read.  This ensures that
   missing/failed devices don't confuse the loop at the top of
   sync_request write.
   Commit 8be185f2c9d54d6 "raid10: Use bio_reset()"
   removed a loop which set BIO_UPTDATE on all appropriate bios.
   So we need to re-add that flag.

These bugs were introduced in 3.10, so this patch is suitable for
3.10-stable, and can remove a potential for data corruption.

Cc: stable@vger.kernel.org (3.10)
Reported-by: Brassow Jonathan <jbrassow@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index cd066b6..957a719 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2097,11 +2097,17 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
 			 * both 'first' and 'i', so we just compare them.
 			 * All vec entries are PAGE_SIZE;
 			 */
-			for (j = 0; j < vcnt; j++)
+			int sectors = r10_bio->sectors;
+			for (j = 0; j < vcnt; j++) {
+				int len = PAGE_SIZE;
+				if (sectors < (len / 512))
+					len = sectors * 512;
 				if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
 					   page_address(tbio->bi_io_vec[j].bv_page),
-					   fbio->bi_io_vec[j].bv_len))
+					   len))
 					break;
+				sectors -= len/512;
+			}
 			if (j == vcnt)
 				continue;
 			atomic64_add(r10_bio->sectors, &mddev->resync_mismatches);
@@ -3407,6 +3413,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
 
 		if (bio->bi_end_io == end_sync_read) {
 			md_sync_acct(bio->bi_bdev, nr_sectors);
+			set_bit(BIO_UPTODATE, &bio->bi_flags);
 			generic_make_request(bio);
 		}
 	}
-- 
cgit v0.10.2


From 5024c298311f3b97c85cb034f9edaa333fdb9338 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 17 Jul 2013 14:55:31 +1000
Subject: md: Remove recent change which allows devices to skip recovery.

commit 7ceb17e87bde79d285a8b988cfed9eaeebe60b86
    md: Allow devices to be re-added to a read-only array.

allowed a bit more than just that.  It also allows devices to be added
to a read-write array and to end up skipping recovery.

This patch removes the offending piece of code pending a rewrite for a
subsequent release.

More specifically:
 If the array has a bitmap, then the device will still need a bitmap
 based resync ('saved_raid_disk' is set under different conditions
 is a bitmap is present).
 If the array doesn't have a bitmap, then this is correct as long as
 nothing has been written to the array since the metadata was checked
 by ->validate_super.  However there is no locking to ensure that there
 was no write.

Bug was introduced in 3.10 and causes data corruption so
patch is suitable for 3.10-stable.

Cc: stable@vger.kernel.org (3.10)
Reported-by: Joe Lawrence <joe.lawrence@stratus.com>
Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/md.c b/drivers/md/md.c
index dddc87b..9f13e13 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -7716,20 +7716,6 @@ static int remove_and_add_spares(struct mddev *mddev,
 			continue;
 
 		rdev->recovery_offset = 0;
-		if (rdev->saved_raid_disk >= 0 && mddev->in_sync) {
-			spin_lock_irq(&mddev->write_lock);
-			if (mddev->in_sync)
-				/* OK, this device, which is in_sync,
-				 * will definitely be noticed before
-				 * the next write, so recovery isn't
-				 * needed.
-				 */
-				rdev->recovery_offset = mddev->recovery_cp;
-			spin_unlock_irq(&mddev->write_lock);
-		}
-		if (mddev->ro && rdev->recovery_offset != MaxSector)
-			/* not safe to add this disk now */
-			continue;
 		if (mddev->pers->
 		    hot_add_disk(mddev, rdev) == 0) {
 			if (sysfs_link_rdev(mddev, rdev))
-- 
cgit v0.10.2


From 30bc9b53878a9921b02e3b5bc4283ac1c6de102a Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 17 Jul 2013 15:19:29 +1000
Subject: md/raid1: fix bio handling problems in process_checks()

Recent change to use bio_copy_data() in raid1 when repairing
an array is faulty.

The underlying may have changed the bio in various ways using
bio_advance and these need to be undone not just for the 'sbio' which
is being copied to, but also the 'pbio' (primary) which is being
copied from.

So perform the reset on all bios that were read from and do it early.

This also ensure that the sbio->bi_io_vec[j].bv_len passed to
memcmp is correct.

This fixes a crash during a 'check' of a RAID1 array.  The crash was
introduced in 3.10 so this is suitable for 3.10-stable.

Cc: stable@vger.kernel.org (3.10)
Reported-by: Joe Lawrence <joe.lawrence@stratus.com>
Signed-off-by: NeilBrown <neilb@suse.de>

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ec73458..d60412c 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1849,6 +1849,36 @@ static int process_checks(struct r1bio *r1_bio)
 	int i;
 	int vcnt;
 
+	/* Fix variable parts of all bios */
+	vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9);
+	for (i = 0; i < conf->raid_disks * 2; i++) {
+		int j;
+		int size;
+		struct bio *b = r1_bio->bios[i];
+		if (b->bi_end_io != end_sync_read)
+			continue;
+		/* fixup the bio for reuse */
+		bio_reset(b);
+		b->bi_vcnt = vcnt;
+		b->bi_size = r1_bio->sectors << 9;
+		b->bi_sector = r1_bio->sector +
+			conf->mirrors[i].rdev->data_offset;
+		b->bi_bdev = conf->mirrors[i].rdev->bdev;
+		b->bi_end_io = end_sync_read;
+		b->bi_private = r1_bio;
+
+		size = b->bi_size;
+		for (j = 0; j < vcnt ; j++) {
+			struct bio_vec *bi;
+			bi = &b->bi_io_vec[j];
+			bi->bv_offset = 0;
+			if (size > PAGE_SIZE)
+				bi->bv_len = PAGE_SIZE;
+			else
+				bi->bv_len = size;
+			size -= PAGE_SIZE;
+		}
+	}
 	for (primary = 0; primary < conf->raid_disks * 2; primary++)
 		if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
 		    test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
@@ -1857,12 +1887,10 @@ static int process_checks(struct r1bio *r1_bio)
 			break;
 		}
 	r1_bio->read_disk = primary;
-	vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9);
 	for (i = 0; i < conf->raid_disks * 2; i++) {
 		int j;
 		struct bio *pbio = r1_bio->bios[primary];
 		struct bio *sbio = r1_bio->bios[i];
-		int size;
 
 		if (sbio->bi_end_io != end_sync_read)
 			continue;
@@ -1888,27 +1916,6 @@ static int process_checks(struct r1bio *r1_bio)
 			rdev_dec_pending(conf->mirrors[i].rdev, mddev);
 			continue;
 		}
-		/* fixup the bio for reuse */
-		bio_reset(sbio);
-		sbio->bi_vcnt = vcnt;
-		sbio->bi_size = r1_bio->sectors << 9;
-		sbio->bi_sector = r1_bio->sector +
-			conf->mirrors[i].rdev->data_offset;
-		sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
-		sbio->bi_end_io = end_sync_read;
-		sbio->bi_private = r1_bio;
-
-		size = sbio->bi_size;
-		for (j = 0; j < vcnt ; j++) {
-			struct bio_vec *bi;
-			bi = &sbio->bi_io_vec[j];
-			bi->bv_offset = 0;
-			if (size > PAGE_SIZE)
-				bi->bv_len = PAGE_SIZE;
-			else
-				bi->bv_len = size;
-			size -= PAGE_SIZE;
-		}
 
 		bio_copy_data(sbio, pbio);
 	}
-- 
cgit v0.10.2