diff options
author | NeilBrown <neilb@cse.unsw.edu.au> | 2005-06-22 00:17:25 (GMT) |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-22 02:07:46 (GMT) |
commit | 41158c7eb22312cfaa256744e1553bb4042ff085 (patch) | |
tree | 21c28e0630d66fc32d758993299a78088a846562 | |
parent | 289e99e8ed8f36e386bf7de49947311c17ae1482 (diff) | |
download | linux-41158c7eb22312cfaa256744e1553bb4042ff085.tar.xz |
[PATCH] md: optimise reconstruction when re-adding a recently failed drive.
When an array is degraded, bit in the intent-bitmap are never cleared. So if
a recently failed drive is re-added, we only need to reconstruct the block
that are still reflected in the bitmap.
This patch adds support for this re-adding.
Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | drivers/md/md.c | 71 | ||||
-rw-r--r-- | drivers/md/raid1.c | 7 | ||||
-rw-r--r-- | include/linux/raid/md_k.h | 4 |
3 files changed, 65 insertions, 17 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index b02f8d1..789b114 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -577,6 +577,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) mdp_disk_t *desc; mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); + rdev->raid_disk = -1; + rdev->in_sync = 0; if (mddev->raid_disks == 0) { mddev->major_version = 0; mddev->minor_version = sb->minor_version; @@ -607,16 +609,24 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) memcpy(mddev->uuid+12,&sb->set_uuid3, 4); mddev->max_disks = MD_SB_DISKS; - } else { - __u64 ev1; - ev1 = md_event(sb); + } else if (mddev->pers == NULL) { + /* Insist on good event counter while assembling */ + __u64 ev1 = md_event(sb); ++ev1; if (ev1 < mddev->events) return -EINVAL; - } + } else if (mddev->bitmap) { + /* if adding to array with a bitmap, then we can accept an + * older device ... but not too old. + */ + __u64 ev1 = md_event(sb); + if (ev1 < mddev->bitmap->events_cleared) + return 0; + } else /* just a hot-add of a new device, leave raid_disk at -1 */ + return 0; + if (mddev->level != LEVEL_MULTIPATH) { - rdev->raid_disk = -1; - rdev->in_sync = rdev->faulty = 0; + rdev->faulty = 0; desc = sb->disks + rdev->desc_nr; if (desc->state & (1<<MD_DISK_FAULTY)) @@ -626,7 +636,8 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) rdev->in_sync = 1; rdev->raid_disk = desc->raid_disk; } - } + } else /* MULTIPATH are always insync */ + rdev->in_sync = 1; return 0; } @@ -868,6 +879,8 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) { struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); + rdev->raid_disk = -1; + rdev->in_sync = 0; if (mddev->raid_disks == 0) { mddev->major_version = 1; mddev->patch_version = 0; @@ -885,13 +898,21 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) memcpy(mddev->uuid, sb->set_uuid, 16); mddev->max_disks = (4096-256)/2; - } else { - __u64 ev1; - ev1 = le64_to_cpu(sb->events); + } else if (mddev->pers == NULL) { + /* Insist of good event counter while assembling */ + __u64 ev1 = le64_to_cpu(sb->events); ++ev1; if (ev1 < mddev->events) return -EINVAL; - } + } else if (mddev->bitmap) { + /* If adding to array with a bitmap, then we can accept an + * older device, but not too old. + */ + __u64 ev1 = le64_to_cpu(sb->events); + if (ev1 < mddev->bitmap->events_cleared) + return 0; + } else /* just a hot-add of a new device, leave raid_disk at -1 */ + return 0; if (mddev->level != LEVEL_MULTIPATH) { int role; @@ -899,14 +920,10 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); switch(role) { case 0xffff: /* spare */ - rdev->in_sync = 0; rdev->faulty = 0; - rdev->raid_disk = -1; break; case 0xfffe: /* faulty */ - rdev->in_sync = 0; rdev->faulty = 1; - rdev->raid_disk = -1; break; default: rdev->in_sync = 1; @@ -914,7 +931,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) rdev->raid_disk = role; break; } - } + } else /* MULTIPATH are always insync */ + rdev->in_sync = 1; + return 0; } @@ -2155,6 +2174,18 @@ static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info) PTR_ERR(rdev)); return PTR_ERR(rdev); } + /* set save_raid_disk if appropriate */ + if (!mddev->persistent) { + if (info->state & (1<<MD_DISK_SYNC) && + info->raid_disk < mddev->raid_disks) + rdev->raid_disk = info->raid_disk; + else + rdev->raid_disk = -1; + } else + super_types[mddev->major_version]. + validate_super(mddev, rdev); + rdev->saved_raid_disk = rdev->raid_disk; + rdev->in_sync = 0; /* just to be sure */ rdev->raid_disk = -1; err = bind_rdev_to_array(rdev, mddev); @@ -3706,6 +3737,14 @@ void md_check_recovery(mddev_t *mddev) mddev->pers->spare_active(mddev); } md_update_sb(mddev); + + /* if array is no-longer degraded, then any saved_raid_disk + * information must be scrapped + */ + if (!mddev->degraded) + ITERATE_RDEV(mddev,rdev,rtmp) + rdev->saved_raid_disk = -1; + mddev->recovery = 0; /* flag recovery needed just to double check */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index c3b4772..3f5234f 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -811,9 +811,12 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) { conf_t *conf = mddev->private; int found = 0; - int mirror; + int mirror = 0; mirror_info_t *p; + if (rdev->saved_raid_disk >= 0 && + conf->mirrors[rdev->saved_raid_disk].rdev == NULL) + mirror = rdev->saved_raid_disk; for (mirror=0; mirror < mddev->raid_disks; mirror++) if ( !(p=conf->mirrors+mirror)->rdev) { @@ -830,6 +833,8 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) p->head_position = 0; rdev->raid_disk = mirror; found = 1; + if (rdev->saved_raid_disk != mirror) + conf->fullsync = 1; p->rdev = rdev; break; } diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 16e94a9..6cdcb44 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -183,6 +183,10 @@ struct mdk_rdev_s int desc_nr; /* descriptor index in the superblock */ int raid_disk; /* role of device in array */ + int saved_raid_disk; /* role that device used to have in the + * array and could again if we did a partial + * resync from the bitmap + */ atomic_t nr_pending; /* number of pending requests. * only maintained for arrays that |