Skip to content

Commit

Permalink
md/raid5: be more careful about write ordering when reshaping.
Browse files Browse the repository at this point in the history
When we are reshaping an array, it is very important that we read
the data from a particular sector offset before writing new data
at that offset.

In most cases when growing or shrinking an array we read long before
we even consider writing.  But when restriping an array without
changing it size, there is a small possibility that we might have
some data to available write before the read has happened at the same
location.  This would require some stripes to be in cache already.

To guard against this small possibility, we check, before writing,
that the 'old' stripe at the same location is not in the process of
being read.  And we ensure that we mark all 'source' stripes as such
before allowing new 'destination' stripes to proceed.

Signed-off-by: NeilBrown <neilb@suse.de>
  • Loading branch information
NeilBrown committed Mar 31, 2009
1 parent d1a7c50 commit ab69ae1
Showing 1 changed file with 47 additions and 2 deletions.
49 changes: 47 additions & 2 deletions drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,8 @@ get_active_stripe(raid5_conf_t *conf, sector_t sector,
init_stripe(sh, sector, previous);
} else {
if (atomic_read(&sh->count)) {
BUG_ON(!list_empty(&sh->lru));
BUG_ON(!list_empty(&sh->lru)
&& !test_bit(STRIPE_EXPANDING, &sh->state));
} else {
if (!test_bit(STRIPE_HANDLE, &sh->state))
atomic_inc(&conf->active_stripes);
Expand Down Expand Up @@ -2944,6 +2945,23 @@ static bool handle_stripe5(struct stripe_head *sh)

/* Finish reconstruct operations initiated by the expansion process */
if (sh->reconstruct_state == reconstruct_state_result) {
struct stripe_head *sh2
= get_active_stripe(conf, sh->sector, 1, 1);
if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
/* sh cannot be written until sh2 has been read.
* so arrange for sh to be delayed a little
*/
set_bit(STRIPE_DELAYED, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
&sh2->state))
atomic_inc(&conf->preread_active_stripes);
release_stripe(sh2);
goto unlock;
}
if (sh2)
release_stripe(sh2);

sh->reconstruct_state = reconstruct_state_idle;
clear_bit(STRIPE_EXPANDING, &sh->state);
for (i = conf->raid_disks; i--; ) {
Expand Down Expand Up @@ -3172,6 +3190,23 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page)
}

if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) {
struct stripe_head *sh2
= get_active_stripe(conf, sh->sector, 1, 1);
if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) {
/* sh cannot be written until sh2 has been read.
* so arrange for sh to be delayed a little
*/
set_bit(STRIPE_DELAYED, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE,
&sh2->state))
atomic_inc(&conf->preread_active_stripes);
release_stripe(sh2);
goto unlock;
}
if (sh2)
release_stripe(sh2);

/* Need to write out all blocks after computing P&Q */
sh->disks = conf->raid_disks;
stripe_set_idx(sh->sector, conf, 0, sh);
Expand Down Expand Up @@ -3739,6 +3774,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
sector_t writepos, safepos, gap;
sector_t stripe_addr;
int reshape_sectors;
struct list_head stripes;

if (sector_nr == 0) {
/* If restarting in the middle, skip the initial sectors */
Expand Down Expand Up @@ -3816,6 +3852,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
BUG_ON(writepos != sector_nr + reshape_sectors);
stripe_addr = sector_nr;
}
INIT_LIST_HEAD(&stripes);
for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) {
int j;
int skipped = 0;
Expand Down Expand Up @@ -3845,7 +3882,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
set_bit(STRIPE_EXPAND_READY, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
}
release_stripe(sh);
list_add(&sh->lru, &stripes);
}
spin_lock_irq(&conf->device_lock);
if (mddev->delta_disks < 0)
Expand Down Expand Up @@ -3874,6 +3911,14 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
release_stripe(sh);
first_sector += STRIPE_SECTORS;
}
/* Now that the sources are clearly marked, we can release
* the destination stripes
*/
while (!list_empty(&stripes)) {
sh = list_entry(stripes.next, struct stripe_head, lru);
list_del_init(&sh->lru);
release_stripe(sh);
}
/* If this takes us to the resync_max point where we have to pause,
* then we need to write out the superblock.
*/
Expand Down

0 comments on commit ab69ae1

Please sign in to comment.