[PATCH] md: replace magic numbers in sb_dirty with well defined bit flags

Instead of magic numbers (0,1,2,3) in sb_dirty, we have
some flags instead:
MD_CHANGE_DEVS
   Some device state has changed requiring superblock update
   on all devices.
MD_CHANGE_CLEAN
   The array has transitions from 'clean' to 'dirty' or back,
   requiring a superblock update on active devices, but possibly
   not on spares
MD_CHANGE_PENDING
   A superblock update is underway.

We wait for an update to complete by waiting for all flags to be clear.  A
flag can be set at any time, even during an update, without risk that the
change will be lost.

Stop exporting md_update_sb - isn't needed.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
NeilBrown 2006-10-03 01:15:46 -07:00 committed by Linus Torvalds
parent 6814d5368d
commit 850b2b420c
7 changed files with 52 additions and 46 deletions

View File

@ -1587,7 +1587,7 @@ static void sync_sbs(mddev_t * mddev, int nospares)
} }
} }
void md_update_sb(mddev_t * mddev) static void md_update_sb(mddev_t * mddev, int force_change)
{ {
int err; int err;
struct list_head *tmp; struct list_head *tmp;
@ -1598,7 +1598,18 @@ void md_update_sb(mddev_t * mddev)
repeat: repeat:
spin_lock_irq(&mddev->write_lock); spin_lock_irq(&mddev->write_lock);
if (mddev->degraded && mddev->sb_dirty == 3) set_bit(MD_CHANGE_PENDING, &mddev->flags);
if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))
force_change = 1;
if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags))
/* just a clean<-> dirty transition, possibly leave spares alone,
* though if events isn't the right even/odd, we will have to do
* spares after all
*/
nospares = 1;
if (force_change)
nospares = 0;
if (mddev->degraded)
/* If the array is degraded, then skipping spares is both /* If the array is degraded, then skipping spares is both
* dangerous and fairly pointless. * dangerous and fairly pointless.
* Dangerous because a device that was removed from the array * Dangerous because a device that was removed from the array
@ -1608,20 +1619,14 @@ repeat:
* then a recovery will happen and soon that array won't * then a recovery will happen and soon that array won't
* be degraded any more and the spare can go back to sleep then. * be degraded any more and the spare can go back to sleep then.
*/ */
mddev->sb_dirty = 1; nospares = 0;
sync_req = mddev->in_sync; sync_req = mddev->in_sync;
mddev->utime = get_seconds(); mddev->utime = get_seconds();
if (mddev->sb_dirty == 3)
/* just a clean<-> dirty transition, possibly leave spares alone,
* though if events isn't the right even/odd, we will have to do
* spares after all
*/
nospares = 1;
/* If this is just a dirty<->clean transition, and the array is clean /* If this is just a dirty<->clean transition, and the array is clean
* and 'events' is odd, we can roll back to the previous clean state */ * and 'events' is odd, we can roll back to the previous clean state */
if (mddev->sb_dirty == 3 if (nospares
&& (mddev->in_sync && mddev->recovery_cp == MaxSector) && (mddev->in_sync && mddev->recovery_cp == MaxSector)
&& (mddev->events & 1)) && (mddev->events & 1))
mddev->events--; mddev->events--;
@ -1652,7 +1657,6 @@ repeat:
MD_BUG(); MD_BUG();
mddev->events --; mddev->events --;
} }
mddev->sb_dirty = 2;
sync_sbs(mddev, nospares); sync_sbs(mddev, nospares);
/* /*
@ -1660,7 +1664,7 @@ repeat:
* nonpersistent superblocks * nonpersistent superblocks
*/ */
if (!mddev->persistent) { if (!mddev->persistent) {
mddev->sb_dirty = 0; clear_bit(MD_CHANGE_PENDING, &mddev->flags);
spin_unlock_irq(&mddev->write_lock); spin_unlock_irq(&mddev->write_lock);
wake_up(&mddev->sb_wait); wake_up(&mddev->sb_wait);
return; return;
@ -1697,20 +1701,20 @@ repeat:
break; break;
} }
md_super_wait(mddev); md_super_wait(mddev);
/* if there was a failure, sb_dirty was set to 1, and we re-write super */ /* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */
spin_lock_irq(&mddev->write_lock); spin_lock_irq(&mddev->write_lock);
if (mddev->in_sync != sync_req|| mddev->sb_dirty == 1) { if (mddev->in_sync != sync_req ||
test_bit(MD_CHANGE_DEVS, &mddev->flags)) {
/* have to write it out again */ /* have to write it out again */
spin_unlock_irq(&mddev->write_lock); spin_unlock_irq(&mddev->write_lock);
goto repeat; goto repeat;
} }
mddev->sb_dirty = 0; clear_bit(MD_CHANGE_PENDING, &mddev->flags);
spin_unlock_irq(&mddev->write_lock); spin_unlock_irq(&mddev->write_lock);
wake_up(&mddev->sb_wait); wake_up(&mddev->sb_wait);
} }
EXPORT_SYMBOL_GPL(md_update_sb);
/* words written to sysfs files may, or my not, be \n terminated. /* words written to sysfs files may, or my not, be \n terminated.
* We want to accept with case. For this we use cmd_match. * We want to accept with case. For this we use cmd_match.
@ -1783,7 +1787,7 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
else { else {
mddev_t *mddev = rdev->mddev; mddev_t *mddev = rdev->mddev;
kick_rdev_from_array(rdev); kick_rdev_from_array(rdev);
md_update_sb(mddev); md_update_sb(mddev, 1);
md_new_event(mddev); md_new_event(mddev);
err = 0; err = 0;
} }
@ -2426,7 +2430,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
spin_lock_irq(&mddev->write_lock); spin_lock_irq(&mddev->write_lock);
if (atomic_read(&mddev->writes_pending) == 0) { if (atomic_read(&mddev->writes_pending) == 0) {
mddev->in_sync = 1; mddev->in_sync = 1;
mddev->sb_dirty = 1; set_bit(MD_CHANGE_CLEAN, &mddev->flags);
} }
spin_unlock_irq(&mddev->write_lock); spin_unlock_irq(&mddev->write_lock);
} else { } else {
@ -2438,7 +2442,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
case active: case active:
if (mddev->pers) { if (mddev->pers) {
restart_array(mddev); restart_array(mddev);
mddev->sb_dirty = 0; clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
wake_up(&mddev->sb_wait); wake_up(&mddev->sb_wait);
err = 0; err = 0;
} else { } else {
@ -2543,7 +2547,7 @@ size_store(mddev_t *mddev, const char *buf, size_t len)
if (mddev->pers) { if (mddev->pers) {
err = update_size(mddev, size); err = update_size(mddev, size);
md_update_sb(mddev); md_update_sb(mddev, 1);
} else { } else {
if (mddev->size == 0 || if (mddev->size == 0 ||
mddev->size > size) mddev->size > size)
@ -3111,8 +3115,8 @@ static int do_md_run(mddev_t * mddev)
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
if (mddev->sb_dirty) if (mddev->flags)
md_update_sb(mddev); md_update_sb(mddev, 0);
set_capacity(disk, mddev->array_size<<1); set_capacity(disk, mddev->array_size<<1);
@ -3275,10 +3279,10 @@ static int do_md_stop(mddev_t * mddev, int mode)
if (mddev->ro) if (mddev->ro)
mddev->ro = 0; mddev->ro = 0;
} }
if (!mddev->in_sync || mddev->sb_dirty) { if (!mddev->in_sync || mddev->flags) {
/* mark array as shutdown cleanly */ /* mark array as shutdown cleanly */
mddev->in_sync = 1; mddev->in_sync = 1;
md_update_sb(mddev); md_update_sb(mddev, 1);
} }
if (mode == 1) if (mode == 1)
set_disk_ro(disk, 1); set_disk_ro(disk, 1);
@ -3747,7 +3751,7 @@ static int hot_remove_disk(mddev_t * mddev, dev_t dev)
goto busy; goto busy;
kick_rdev_from_array(rdev); kick_rdev_from_array(rdev);
md_update_sb(mddev); md_update_sb(mddev, 1);
md_new_event(mddev); md_new_event(mddev);
return 0; return 0;
@ -3824,7 +3828,7 @@ static int hot_add_disk(mddev_t * mddev, dev_t dev)
rdev->raid_disk = -1; rdev->raid_disk = -1;
md_update_sb(mddev); md_update_sb(mddev, 1);
/* /*
* Kick recovery, maybe this spare has to be added to the * Kick recovery, maybe this spare has to be added to the
@ -3955,7 +3959,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
mddev->max_disks = MD_SB_DISKS; mddev->max_disks = MD_SB_DISKS;
mddev->sb_dirty = 1; mddev->flags = 0;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
mddev->default_bitmap_offset = MD_SB_BYTES >> 9; mddev->default_bitmap_offset = MD_SB_BYTES >> 9;
mddev->bitmap_offset = 0; mddev->bitmap_offset = 0;
@ -4124,7 +4129,7 @@ static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
mddev->bitmap_offset = 0; mddev->bitmap_offset = 0;
} }
} }
md_update_sb(mddev); md_update_sb(mddev, 1);
return rv; return rv;
} }
@ -4960,12 +4965,12 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
spin_lock_irq(&mddev->write_lock); spin_lock_irq(&mddev->write_lock);
if (mddev->in_sync) { if (mddev->in_sync) {
mddev->in_sync = 0; mddev->in_sync = 0;
mddev->sb_dirty = 3; set_bit(MD_CHANGE_CLEAN, &mddev->flags);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
} }
spin_unlock_irq(&mddev->write_lock); spin_unlock_irq(&mddev->write_lock);
} }
wait_event(mddev->sb_wait, mddev->sb_dirty==0); wait_event(mddev->sb_wait, mddev->flags==0);
} }
void md_write_end(mddev_t *mddev) void md_write_end(mddev_t *mddev)
@ -5235,7 +5240,6 @@ void md_do_sync(mddev_t *mddev)
!test_bit(In_sync, &rdev->flags) && !test_bit(In_sync, &rdev->flags) &&
rdev->recovery_offset < mddev->curr_resync) rdev->recovery_offset < mddev->curr_resync)
rdev->recovery_offset = mddev->curr_resync; rdev->recovery_offset = mddev->curr_resync;
mddev->sb_dirty = 1;
} }
} }
@ -5292,7 +5296,7 @@ void md_check_recovery(mddev_t *mddev)
} }
if ( ! ( if ( ! (
mddev->sb_dirty || mddev->flags ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_DONE, &mddev->recovery) || test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
(mddev->safemode == 1) || (mddev->safemode == 1) ||
@ -5308,14 +5312,14 @@ void md_check_recovery(mddev_t *mddev)
if (mddev->safemode && !atomic_read(&mddev->writes_pending) && if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
!mddev->in_sync && mddev->recovery_cp == MaxSector) { !mddev->in_sync && mddev->recovery_cp == MaxSector) {
mddev->in_sync = 1; mddev->in_sync = 1;
mddev->sb_dirty = 3; set_bit(MD_CHANGE_CLEAN, &mddev->flags);
} }
if (mddev->safemode == 1) if (mddev->safemode == 1)
mddev->safemode = 0; mddev->safemode = 0;
spin_unlock_irq(&mddev->write_lock); spin_unlock_irq(&mddev->write_lock);
if (mddev->sb_dirty) if (mddev->flags)
md_update_sb(mddev); md_update_sb(mddev, 0);
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
@ -5334,7 +5338,7 @@ void md_check_recovery(mddev_t *mddev)
/* activate any spares */ /* activate any spares */
mddev->pers->spare_active(mddev); mddev->pers->spare_active(mddev);
} }
md_update_sb(mddev); md_update_sb(mddev, 1);
/* if array is no-longer degraded, then any saved_raid_disk /* if array is no-longer degraded, then any saved_raid_disk
* information must be scrapped * information must be scrapped

View File

@ -253,7 +253,7 @@ static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
char b[BDEVNAME_SIZE]; char b[BDEVNAME_SIZE];
clear_bit(In_sync, &rdev->flags); clear_bit(In_sync, &rdev->flags);
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
mddev->sb_dirty = 1; set_bit(MD_CHANGE_DEVS, &mddev->flags);
conf->working_disks--; conf->working_disks--;
printk(KERN_ALERT "multipath: IO failure on %s," printk(KERN_ALERT "multipath: IO failure on %s,"
" disabling IO path. \n Operation continuing" " disabling IO path. \n Operation continuing"
@ -470,7 +470,6 @@ static int multipath_run (mddev_t *mddev)
} }
conf->raid_disks = mddev->raid_disks; conf->raid_disks = mddev->raid_disks;
mddev->sb_dirty = 1;
conf->mddev = mddev; conf->mddev = mddev;
spin_lock_init(&conf->device_lock); spin_lock_init(&conf->device_lock);
INIT_LIST_HEAD(&conf->retry_list); INIT_LIST_HEAD(&conf->retry_list);

View File

@ -969,7 +969,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
} }
clear_bit(In_sync, &rdev->flags); clear_bit(In_sync, &rdev->flags);
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
mddev->sb_dirty = 1; set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n" printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
" Operation continuing on %d devices\n", " Operation continuing on %d devices\n",
bdevname(rdev->bdev,b), conf->working_disks); bdevname(rdev->bdev,b), conf->working_disks);

View File

@ -960,7 +960,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
} }
clear_bit(In_sync, &rdev->flags); clear_bit(In_sync, &rdev->flags);
set_bit(Faulty, &rdev->flags); set_bit(Faulty, &rdev->flags);
mddev->sb_dirty = 1; set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n" printk(KERN_ALERT "raid10: Disk failure on %s, disabling device. \n"
" Operation continuing on %d devices\n", " Operation continuing on %d devices\n",
bdevname(rdev->bdev,b), conf->working_disks); bdevname(rdev->bdev,b), conf->working_disks);

View File

@ -696,7 +696,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
PRINTK("raid5: error called\n"); PRINTK("raid5: error called\n");
if (!test_bit(Faulty, &rdev->flags)) { if (!test_bit(Faulty, &rdev->flags)) {
mddev->sb_dirty = 1; set_bit(MD_CHANGE_DEVS, &mddev->flags);
if (test_bit(In_sync, &rdev->flags)) { if (test_bit(In_sync, &rdev->flags)) {
conf->working_disks--; conf->working_disks--;
mddev->degraded++; mddev->degraded++;
@ -2781,9 +2781,9 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
wait_event(conf->wait_for_overlap, wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes)==0); atomic_read(&conf->reshape_stripes)==0);
mddev->reshape_position = conf->expand_progress; mddev->reshape_position = conf->expand_progress;
mddev->sb_dirty = 1; set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
wait_event(mddev->sb_wait, mddev->sb_dirty == 0 || wait_event(mddev->sb_wait, mddev->flags == 0 ||
kthread_should_stop()); kthread_should_stop());
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
conf->expand_lo = mddev->reshape_position; conf->expand_lo = mddev->reshape_position;
@ -3605,7 +3605,7 @@ static int raid5_start_reshape(mddev_t *mddev)
mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices; mddev->degraded = (conf->raid_disks - conf->previous_raid_disks) - added_devices;
mddev->raid_disks = conf->raid_disks; mddev->raid_disks = conf->raid_disks;
mddev->reshape_position = 0; mddev->reshape_position = 0;
mddev->sb_dirty = 1; set_bit(MD_CHANGE_DEVS, &mddev->flags);
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);

View File

@ -95,7 +95,6 @@ extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
extern void md_do_sync(mddev_t *mddev); extern void md_do_sync(mddev_t *mddev);
extern void md_new_event(mddev_t *mddev); extern void md_new_event(mddev_t *mddev);
extern void md_update_sb(mddev_t * mddev);
#endif /* CONFIG_MD */ #endif /* CONFIG_MD */
#endif #endif

View File

@ -116,7 +116,11 @@ struct mddev_s
dev_t unit; dev_t unit;
int md_minor; int md_minor;
struct list_head disks; struct list_head disks;
int sb_dirty; unsigned long flags;
#define MD_CHANGE_DEVS 0 /* Some device status has changed */
#define MD_CHANGE_CLEAN 1 /* transition to or from 'clean' */
#define MD_CHANGE_PENDING 2 /* superblock update in progress */
int ro; int ro;
struct gendisk *gendisk; struct gendisk *gendisk;