md/raid10: locking changes for 'enough()'.
As 'enough' accesses conf->prev and conf->geo, which can change spontanously, it should guard against changes. This can be done with device_lock as start_reshape holds device_lock while updating 'geo' and end_reshape holds it while updating 'prev'. So 'error' needs to hold 'device_lock'. On the other hand, raid10_end_read_request knows which of the two it really wants to access, and as it is an active request on that one, the value cannot change underneath it. So change _enough to take flag rather than a pointer, pass the appropriate flag from raid10_end_read_request(), and remove the locking. All other calls to 'enough' are made with reconfig_mutex held, so neither 'prev' nor 'geo' can change. Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
@@ -97,7 +97,7 @@ static int max_queued_requests = 1024;
|
|||||||
|
|
||||||
static void allow_barrier(struct r10conf *conf);
|
static void allow_barrier(struct r10conf *conf);
|
||||||
static void lower_barrier(struct r10conf *conf);
|
static void lower_barrier(struct r10conf *conf);
|
||||||
static int enough(struct r10conf *conf, int ignore);
|
static int _enough(struct r10conf *conf, int previous, int ignore);
|
||||||
static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
|
static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
|
||||||
int *skipped);
|
int *skipped);
|
||||||
static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
|
static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
|
||||||
@@ -392,11 +392,9 @@ static void raid10_end_read_request(struct bio *bio, int error)
|
|||||||
* than fail the last device. Here we redefine
|
* than fail the last device. Here we redefine
|
||||||
* "uptodate" to mean "Don't want to retry"
|
* "uptodate" to mean "Don't want to retry"
|
||||||
*/
|
*/
|
||||||
unsigned long flags;
|
if (!_enough(conf, test_bit(R10BIO_Previous, &r10_bio->state),
|
||||||
spin_lock_irqsave(&conf->device_lock, flags);
|
rdev->raid_disk))
|
||||||
if (!enough(conf, rdev->raid_disk))
|
|
||||||
uptodate = 1;
|
uptodate = 1;
|
||||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
|
||||||
}
|
}
|
||||||
if (uptodate) {
|
if (uptodate) {
|
||||||
raid_end_bio_io(r10_bio);
|
raid_end_bio_io(r10_bio);
|
||||||
@@ -1632,9 +1630,17 @@ static void status(struct seq_file *seq, struct mddev *mddev)
|
|||||||
* Don't consider the device numbered 'ignore'
|
* Don't consider the device numbered 'ignore'
|
||||||
* as we might be about to remove it.
|
* as we might be about to remove it.
|
||||||
*/
|
*/
|
||||||
static int _enough(struct r10conf *conf, struct geom *geo, int ignore)
|
static int _enough(struct r10conf *conf, int previous, int ignore)
|
||||||
{
|
{
|
||||||
int first = 0;
|
int first = 0;
|
||||||
|
int disks, ncopies;
|
||||||
|
if (previous) {
|
||||||
|
disks = conf->prev.raid_disks;
|
||||||
|
ncopies = conf->prev.near_copies;
|
||||||
|
} else {
|
||||||
|
disks = conf->geo.raid_disks;
|
||||||
|
ncopies = conf->geo.near_copies;
|
||||||
|
}
|
||||||
|
|
||||||
do {
|
do {
|
||||||
int n = conf->copies;
|
int n = conf->copies;
|
||||||
@@ -1644,25 +1650,31 @@ static int _enough(struct r10conf *conf, struct geom *geo, int ignore)
|
|||||||
if (conf->mirrors[this].rdev &&
|
if (conf->mirrors[this].rdev &&
|
||||||
this != ignore)
|
this != ignore)
|
||||||
cnt++;
|
cnt++;
|
||||||
this = (this+1) % geo->raid_disks;
|
this = (this+1) % disks;
|
||||||
}
|
}
|
||||||
if (cnt == 0)
|
if (cnt == 0)
|
||||||
return 0;
|
return 0;
|
||||||
first = (first + geo->near_copies) % geo->raid_disks;
|
first = (first + ncopies) % disks;
|
||||||
} while (first != 0);
|
} while (first != 0);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int enough(struct r10conf *conf, int ignore)
|
static int enough(struct r10conf *conf, int ignore)
|
||||||
{
|
{
|
||||||
return _enough(conf, &conf->geo, ignore) &&
|
/* when calling 'enough', both 'prev' and 'geo' must
|
||||||
_enough(conf, &conf->prev, ignore);
|
* be stable.
|
||||||
|
* This is ensured if ->reconfig_mutex or ->device_lock
|
||||||
|
* is held.
|
||||||
|
*/
|
||||||
|
return _enough(conf, 0, ignore) &&
|
||||||
|
_enough(conf, 1, ignore);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void error(struct mddev *mddev, struct md_rdev *rdev)
|
static void error(struct mddev *mddev, struct md_rdev *rdev)
|
||||||
{
|
{
|
||||||
char b[BDEVNAME_SIZE];
|
char b[BDEVNAME_SIZE];
|
||||||
struct r10conf *conf = mddev->private;
|
struct r10conf *conf = mddev->private;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If it is not operational, then we have already marked it as dead
|
* If it is not operational, then we have already marked it as dead
|
||||||
@@ -1670,18 +1682,18 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
|
|||||||
* next level up know.
|
* next level up know.
|
||||||
* else mark the drive as failed
|
* else mark the drive as failed
|
||||||
*/
|
*/
|
||||||
|
spin_lock_irqsave(&conf->device_lock, flags);
|
||||||
if (test_bit(In_sync, &rdev->flags)
|
if (test_bit(In_sync, &rdev->flags)
|
||||||
&& !enough(conf, rdev->raid_disk))
|
&& !enough(conf, rdev->raid_disk)) {
|
||||||
/*
|
/*
|
||||||
* Don't fail the drive, just return an IO error.
|
* Don't fail the drive, just return an IO error.
|
||||||
*/
|
*/
|
||||||
return;
|
|
||||||
if (test_and_clear_bit(In_sync, &rdev->flags)) {
|
|
||||||
unsigned long flags;
|
|
||||||
spin_lock_irqsave(&conf->device_lock, flags);
|
|
||||||
mddev->degraded++;
|
|
||||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||||
/*
|
return;
|
||||||
|
}
|
||||||
|
if (test_and_clear_bit(In_sync, &rdev->flags)) {
|
||||||
|
mddev->degraded++;
|
||||||
|
/*
|
||||||
* if recovery is running, make sure it aborts.
|
* if recovery is running, make sure it aborts.
|
||||||
*/
|
*/
|
||||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||||
@@ -1689,6 +1701,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
|
|||||||
set_bit(Blocked, &rdev->flags);
|
set_bit(Blocked, &rdev->flags);
|
||||||
set_bit(Faulty, &rdev->flags);
|
set_bit(Faulty, &rdev->flags);
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
|
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||||
printk(KERN_ALERT
|
printk(KERN_ALERT
|
||||||
"md/raid10:%s: Disk failure on %s, disabling device.\n"
|
"md/raid10:%s: Disk failure on %s, disabling device.\n"
|
||||||
"md/raid10:%s: Operation continuing on %d devices.\n",
|
"md/raid10:%s: Operation continuing on %d devices.\n",
|
||||||
@@ -1791,7 +1804,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||||||
* very different from resync
|
* very different from resync
|
||||||
*/
|
*/
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
if (rdev->saved_raid_disk < 0 && !_enough(conf, &conf->prev, -1))
|
if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
if (rdev->raid_disk >= 0)
|
if (rdev->raid_disk >= 0)
|
||||||
|
|||||||
Reference in New Issue
Block a user