Skip to content

Commit

Permalink
[PATCH] md: raid10 read-error handling - resync and read-only
Browse files Browse the repository at this point in the history
Add in correct read-error handling for resync and read-only situations.

When read-only, we don't over-write, so we need to mark the failed drive in
the r10_bio so we don't re-try it.  During resync, we always read all blocks,
so if there is a read error, we simply over-write it with the good block that
we found (assuming we found one).

Note that the recovery case still isn't handled in an interesting way.  There
is nothing useful to do for the 2-copies case.  If there are 3 or more copies,
then we could try reading from one of the non-missing copies, but this is a
bit complicated and very rarely would be used, so I'm leaving it for now.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
NeilBrown authored and Linus Torvalds committed Jan 6, 2006
1 parent 4443ae1 commit 0eb3ff1
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 21 deletions.
56 changes: 35 additions & 21 deletions drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)

for (i = 0; i < conf->copies; i++) {
struct bio **bio = & r10_bio->devs[i].bio;
if (*bio)
if (*bio && *bio != IO_BLOCKED)
bio_put(*bio);
*bio = NULL;
}
Expand Down Expand Up @@ -500,6 +500,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
disk = r10_bio->devs[slot].devnum;

while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
r10_bio->devs[slot].bio == IO_BLOCKED ||
!test_bit(In_sync, &rdev->flags)) {
slot++;
if (slot == conf->copies) {
Expand All @@ -517,6 +518,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
slot = 0;
disk = r10_bio->devs[slot].devnum;
while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
r10_bio->devs[slot].bio == IO_BLOCKED ||
!test_bit(In_sync, &rdev->flags)) {
slot ++;
if (slot == conf->copies) {
Expand All @@ -537,6 +539,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)


if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
r10_bio->devs[nslot].bio == IO_BLOCKED ||
!test_bit(In_sync, &rdev->flags))
continue;

Expand Down Expand Up @@ -1104,7 +1107,6 @@ static int raid10_remove_disk(mddev_t *mddev, int number)

static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
{
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
conf_t *conf = mddev_to_conf(r10_bio->mddev);
int i,d;
Expand All @@ -1119,7 +1121,10 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
BUG();
update_head_pos(i, r10_bio);
d = r10_bio->devs[i].devnum;
if (!uptodate)

if (test_bit(BIO_UPTODATE, &bio->bi_flags))
set_bit(R10BIO_Uptodate, &r10_bio->state);
else if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
md_error(r10_bio->mddev,
conf->mirrors[d].rdev);

Expand Down Expand Up @@ -1209,25 +1214,30 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio)
fbio = r10_bio->devs[i].bio;

/* now find blocks with errors */
for (i=first+1 ; i < conf->copies ; i++) {
int vcnt, j, d;
for (i=0 ; i < conf->copies ; i++) {
int j, d;
int vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);

if (!test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
continue;
/* We know that the bi_io_vec layout is the same for
* both 'first' and 'i', so we just compare them.
* All vec entries are PAGE_SIZE;
*/
tbio = r10_bio->devs[i].bio;
vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
for (j = 0; j < vcnt; j++)
if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
page_address(tbio->bi_io_vec[j].bv_page),
PAGE_SIZE))
break;
if (j == vcnt)

if (tbio->bi_end_io != end_sync_read)
continue;
if (i == first)
continue;
mddev->resync_mismatches += r10_bio->sectors;
if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
/* We know that the bi_io_vec layout is the same for
* both 'first' and 'i', so we just compare them.
* All vec entries are PAGE_SIZE;
*/
for (j = 0; j < vcnt; j++)
if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
page_address(tbio->bi_io_vec[j].bv_page),
PAGE_SIZE))
break;
if (j == vcnt)
continue;
mddev->resync_mismatches += r10_bio->sectors;
}
if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
/* Don't fix anything. */
continue;
Expand Down Expand Up @@ -1308,7 +1318,10 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)

atomic_inc(&conf->mirrors[d].rdev->nr_pending);
md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
generic_make_request(wbio);
if (test_bit(R10BIO_Uptodate, &r10_bio->state))
generic_make_request(wbio);
else
bio_endio(wbio, wbio->bi_size, -EIO);
}


Expand Down Expand Up @@ -1445,7 +1458,8 @@ static void raid10d(mddev_t *mddev)
unfreeze_array(conf);

bio = r10_bio->devs[r10_bio->read_slot].bio;
r10_bio->devs[r10_bio->read_slot].bio = NULL;
r10_bio->devs[r10_bio->read_slot].bio =
mddev->ro ? IO_BLOCKED : NULL;
bio_put(bio);
mirror = read_balance(conf, r10_bio);
if (mirror == -1) {
Expand Down
7 changes: 7 additions & 0 deletions include/linux/raid/raid10.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,13 @@ struct r10bio_s {
} devs[0];
};

/* when we get a read error on a read-only array, we redirect to another
* device without failing the first device, or trying to over-write to
* correct the read error. To keep track of bad blocks on a per-bio
* level, we store IO_BLOCKED in the appropriate 'bios' pointer
*/
#define IO_BLOCKED ((struct bio*)1)

/* bits for r10bio.state */
#define R10BIO_Uptodate 0
#define R10BIO_IsSync 1
Expand Down

0 comments on commit 0eb3ff1

Please sign in to comment.