mirror of
https://github.com/torvalds/linux.git
synced 2024-11-11 06:31:49 +00:00
md: don't retry recovery of raid1 that fails due to error on source drive.
If a raid1 has only one working drive and it has a sector which gives an error on read, then an attempt to recover onto a spare will fail, but as the single remaining drive is not removed from the array, the recovery will be immediately re-attempted, resulting in an infinite recovery loop. So detect this situation and don't retry recovery once an error on the lone remaining drive is detected. Allow recovery to be retried once every time a spare is added in case the problem wasn't actually a media error. Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
efeb53c0e5
commit
4044ba58dd
@ -1500,6 +1500,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
|
||||
|
||||
list_add_rcu(&rdev->same_set, &mddev->disks);
|
||||
bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
|
||||
|
||||
/* May as well allow recovery to be retried once */
|
||||
mddev->recovery_disabled = 0;
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
@ -6175,7 +6178,7 @@ static int remove_and_add_spares(mddev_t *mddev)
|
||||
}
|
||||
}
|
||||
|
||||
if (mddev->degraded && ! mddev->ro) {
|
||||
if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) {
|
||||
list_for_each_entry(rdev, &mddev->disks, same_set) {
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
!test_bit(In_sync, &rdev->flags) &&
|
||||
|
@ -1016,12 +1016,16 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
|
||||
* else mark the drive as failed
|
||||
*/
|
||||
if (test_bit(In_sync, &rdev->flags)
|
||||
&& (conf->raid_disks - mddev->degraded) == 1)
|
||||
&& (conf->raid_disks - mddev->degraded) == 1) {
|
||||
/*
|
||||
* Don't fail the drive, act as though we were just a
|
||||
* normal single drive
|
||||
* normal single drive.
|
||||
* However don't try a recovery from this drive as
|
||||
* it is very likely to fail.
|
||||
*/
|
||||
mddev->recovery_disabled = 1;
|
||||
return;
|
||||
}
|
||||
if (test_and_clear_bit(In_sync, &rdev->flags)) {
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
|
@ -218,6 +218,9 @@ struct mddev_s
|
||||
#define MD_RECOVERY_FROZEN 9
|
||||
|
||||
unsigned long recovery;
|
||||
int recovery_disabled; /* if we detect that recovery
|
||||
* will always fail, set this
|
||||
* so we don't loop trying */
|
||||
|
||||
int in_sync; /* know to not need resync */
|
||||
struct mutex reconfig_mutex;
|
||||
|
Loading…
Reference in New Issue
Block a user