summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2012-04-01 13:48:38 (GMT)
committerNeilBrown <neilb@suse.de>2012-04-03 05:36:17 (GMT)
commit18b9837ea0dc3cf844c6c4196871ce91d047bddb (patch)
treede8be80b767d144405aeeaf7238c723f575b4b40
parent5220ea1e640869e70f894837678315c878c651fd (diff)
downloadlinux-18b9837ea0dc3cf844c6c4196871ce91d047bddb.tar.xz
md/raid5: fix handling of bad blocks during recovery.
1/ We can only treat a known-bad-block like a read-error if we have the data that belongs in that block. So fix that test. 2/ If we cannot recovery a stripe due to insufficient data, don't tell "md_done_sync" that the sync failed unless we really did fail something. If we successfully record bad blocks, that is success. Reported-by: "majianpeng" <majianpeng@gmail.com> Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid5.c55
1 files changed, 29 insertions, 26 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 23ac880..9799be8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2471,39 +2471,41 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
int abort = 0;
int i;
- md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
clear_bit(STRIPE_SYNCING, &sh->state);
s->syncing = 0;
s->replacing = 0;
/* There is nothing more to do for sync/check/repair.
+ * Don't even need to abort as that is handled elsewhere
+ * if needed, and not always wanted e.g. if there is a known
+ * bad block here.
* For recover/replace we need to record a bad block on all
* non-sync devices, or abort the recovery
*/
- if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
- return;
- /* During recovery devices cannot be removed, so locking and
- * refcounting of rdevs is not needed
- */
- for (i = 0; i < conf->raid_disks; i++) {
- struct md_rdev *rdev = conf->disks[i].rdev;
- if (rdev
- && !test_bit(Faulty, &rdev->flags)
- && !test_bit(In_sync, &rdev->flags)
- && !rdev_set_badblocks(rdev, sh->sector,
- STRIPE_SECTORS, 0))
- abort = 1;
- rdev = conf->disks[i].replacement;
- if (rdev
- && !test_bit(Faulty, &rdev->flags)
- && !test_bit(In_sync, &rdev->flags)
- && !rdev_set_badblocks(rdev, sh->sector,
- STRIPE_SECTORS, 0))
- abort = 1;
- }
- if (abort) {
- conf->recovery_disabled = conf->mddev->recovery_disabled;
- set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
+ if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) {
+ /* During recovery devices cannot be removed, so
+ * locking and refcounting of rdevs is not needed
+ */
+ for (i = 0; i < conf->raid_disks; i++) {
+ struct md_rdev *rdev = conf->disks[i].rdev;
+ if (rdev
+ && !test_bit(Faulty, &rdev->flags)
+ && !test_bit(In_sync, &rdev->flags)
+ && !rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ abort = 1;
+ rdev = conf->disks[i].replacement;
+ if (rdev
+ && !test_bit(Faulty, &rdev->flags)
+ && !test_bit(In_sync, &rdev->flags)
+ && !rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ abort = 1;
+ }
+ if (abort)
+ conf->recovery_disabled =
+ conf->mddev->recovery_disabled;
}
+ md_done_sync(conf->mddev, STRIPE_SECTORS, !abort);
}
static int want_replace(struct stripe_head *sh, int disk_idx)
@@ -3203,7 +3205,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
/* Not in-sync */;
else if (is_bad) {
/* also not in-sync */
- if (!test_bit(WriteErrorSeen, &rdev->flags)) {
+ if (!test_bit(WriteErrorSeen, &rdev->flags) &&
+ test_bit(R5_UPTODATE, &dev->flags)) {
/* treat as in-sync, but with a read error
* which we can now try to correct
*/