Skip to content

Commit

Permalink
[PATCH] md: allow raid1 to check consistency
Browse files Browse the repository at this point in the history
Where performing a user-requested 'check' or 'repair', we read all readable
devices, and compare the contents.  We only write to blocks which had read
errors, or blocks with content that differs from the first good device found.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
NeilBrown authored and Linus Torvalds committed Jan 6, 2006
1 parent 18f0881 commit d11c171
Showing 1 changed file with 128 additions and 28 deletions.
156 changes: 128 additions & 28 deletions drivers/md/raid1.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,24 +106,41 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
}
/*
* Allocate RESYNC_PAGES data pages and attach them to
* the first bio;
* the first bio.
* If this is a user-requested check/repair, allocate
* RESYNC_PAGES for each bio.
*/
bio = r1_bio->bios[0];
for (i = 0; i < RESYNC_PAGES; i++) {
page = alloc_page(gfp_flags);
if (unlikely(!page))
goto out_free_pages;

bio->bi_io_vec[i].bv_page = page;
if (test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery))
j = pi->raid_disks;
else
j = 1;
while(j--) {
bio = r1_bio->bios[j];
for (i = 0; i < RESYNC_PAGES; i++) {
page = alloc_page(gfp_flags);
if (unlikely(!page))
goto out_free_pages;

bio->bi_io_vec[i].bv_page = page;
}
}
/* If not user-requests, copy the page pointers to all bios */
if (!test_bit(MD_RECOVERY_REQUESTED, &pi->mddev->recovery)) {
for (i=0; i<RESYNC_PAGES ; i++)
for (j=1; j<pi->raid_disks; j++)
r1_bio->bios[j]->bi_io_vec[i].bv_page =
r1_bio->bios[0]->bi_io_vec[i].bv_page;
}

r1_bio->master_bio = NULL;

return r1_bio;

out_free_pages:
for ( ; i > 0 ; i--)
__free_page(bio->bi_io_vec[i-1].bv_page);
for (i=0; i < RESYNC_PAGES ; i++)
for (j=0 ; j < pi->raid_disks; j++)
__free_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
j = -1;
out_free_bio:
while ( ++j < pi->raid_disks )
bio_put(r1_bio->bios[j]);
Expand All @@ -134,14 +151,16 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
static void r1buf_pool_free(void *__r1_bio, void *data)
{
struct pool_info *pi = data;
int i;
int i,j;
r1bio_t *r1bio = __r1_bio;
struct bio *bio = r1bio->bios[0];

for (i = 0; i < RESYNC_PAGES; i++) {
__free_page(bio->bi_io_vec[i].bv_page);
bio->bi_io_vec[i].bv_page = NULL;
}
for (i = 0; i < RESYNC_PAGES; i++)
for (j = pi->raid_disks; j-- ;) {
if (j == 0 ||
r1bio->bios[j]->bi_io_vec[i].bv_page !=
r1bio->bios[0]->bi_io_vec[i].bv_page)
__free_page(r1bio->bios[j]->bi_io_vec[i].bv_page);
}
for (i=0 ; i < pi->raid_disks; i++)
bio_put(r1bio->bios[i]);

Expand Down Expand Up @@ -1077,21 +1096,26 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
{
r1bio_t * r1_bio = (r1bio_t *)(bio->bi_private);
int i;

if (bio->bi_size)
return 1;

if (r1_bio->bios[r1_bio->read_disk] != bio)
BUG();
update_head_pos(r1_bio->read_disk, r1_bio);
for (i=r1_bio->mddev->raid_disks; i--; )
if (r1_bio->bios[i] == bio)
break;
BUG_ON(i < 0);
update_head_pos(i, r1_bio);
/*
* we have read a block, now it needs to be re-written,
* or re-read if the read failed.
* We don't do much here, just schedule handling by raid1d
*/
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
set_bit(R1BIO_Uptodate, &r1_bio->state);
reschedule_retry(r1_bio);

if (atomic_dec_and_test(&r1_bio->remaining))
reschedule_retry(r1_bio);
return 0;
}

Expand Down Expand Up @@ -1134,9 +1158,65 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
bio = r1_bio->bios[r1_bio->read_disk];


/*
* schedule writes
*/
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
/* We have read all readable devices. If we haven't
* got the block, then there is no hope left.
* If we have, then we want to do a comparison
* and skip the write if everything is the same.
* If any blocks failed to read, then we need to
* attempt an over-write
*/
int primary;
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
for (i=0; i<mddev->raid_disks; i++)
if (r1_bio->bios[i]->bi_end_io == end_sync_read)
md_error(mddev, conf->mirrors[i].rdev);

md_done_sync(mddev, r1_bio->sectors, 1);
put_buf(r1_bio);
return;
}
for (primary=0; primary<mddev->raid_disks; primary++)
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
r1_bio->bios[primary]->bi_end_io = NULL;
break;
}
r1_bio->read_disk = primary;
for (i=0; i<mddev->raid_disks; i++)
if (r1_bio->bios[i]->bi_end_io == end_sync_read &&
test_bit(BIO_UPTODATE, &r1_bio->bios[i]->bi_flags)) {
int j;
int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
struct bio *pbio = r1_bio->bios[primary];
struct bio *sbio = r1_bio->bios[i];
for (j = vcnt; j-- ; )
if (memcmp(page_address(pbio->bi_io_vec[j].bv_page),
page_address(sbio->bi_io_vec[j].bv_page),
PAGE_SIZE))
break;
if (j >= 0)
mddev->resync_mismatches += r1_bio->sectors;
if (j < 0 || test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
sbio->bi_end_io = NULL;
else {
/* fixup the bio for reuse */
sbio->bi_vcnt = vcnt;
sbio->bi_size = r1_bio->sectors << 9;
sbio->bi_idx = 0;
sbio->bi_phys_segments = 0;
sbio->bi_hw_segments = 0;
sbio->bi_hw_front_size = 0;
sbio->bi_hw_back_size = 0;
sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
sbio->bi_flags |= 1 << BIO_UPTODATE;
sbio->bi_next = NULL;
sbio->bi_sector = r1_bio->sector +
conf->mirrors[i].rdev->data_offset;
sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
}
}
}
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
/* ouch - failed to read all of that.
* Try some synchronous reads of other devices to get
Expand Down Expand Up @@ -1216,6 +1296,10 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
idx ++;
}
}

/*
* schedule writes
*/
atomic_set(&r1_bio->remaining, 1);
for (i = 0; i < disks ; i++) {
wbio = r1_bio->bios[i];
Expand Down Expand Up @@ -1618,10 +1702,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
for (i=0 ; i < conf->raid_disks; i++) {
bio = r1_bio->bios[i];
if (bio->bi_end_io) {
page = r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page;
page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
if (bio_add_page(bio, page, len, 0) == 0) {
/* stop here */
r1_bio->bios[0]->bi_io_vec[bio->bi_vcnt].bv_page = page;
bio->bi_io_vec[bio->bi_vcnt].bv_page = page;
while (i > 0) {
i--;
bio = r1_bio->bios[i];
Expand All @@ -1641,12 +1725,28 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
sync_blocks -= (len>>9);
} while (r1_bio->bios[disk]->bi_vcnt < RESYNC_PAGES);
bio_full:
bio = r1_bio->bios[r1_bio->read_disk];
r1_bio->sectors = nr_sectors;

md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev, nr_sectors);
/* For a user-requested sync, we read all readable devices and do a
* compare
*/
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
atomic_set(&r1_bio->remaining, read_targets);
for (i=0; i<conf->raid_disks; i++) {
bio = r1_bio->bios[i];
if (bio->bi_end_io == end_sync_read) {
md_sync_acct(conf->mirrors[i].rdev->bdev, nr_sectors);
generic_make_request(bio);
}
}
} else {
atomic_set(&r1_bio->remaining, 1);
bio = r1_bio->bios[r1_bio->read_disk];
md_sync_acct(conf->mirrors[r1_bio->read_disk].rdev->bdev,
nr_sectors);
generic_make_request(bio);

generic_make_request(bio);
}

return nr_sectors;
}
Expand Down

0 comments on commit d11c171

Please sign in to comment.