Skip to content

Commit

Permalink
md/raid10: handle further errors during fix_read_error better.
Browse files Browse the repository at this point in the history
If we find more read/write errors we should record a bad block before
failing the device.

Signed-off-by: NeilBrown <neilb@suse.de>
  • Loading branch information
NeilBrown committed Jul 28, 2011
1 parent 5e57028 commit 58c54fc
Showing 1 changed file with 44 additions and 15 deletions.
59 changes: 44 additions & 15 deletions drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -1749,6 +1749,26 @@ static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev)
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
}

static int r10_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
int sectors, struct page *page, int rw)
{
sector_t first_bad;
int bad_sectors;

if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
&& (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
return -1;
if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
/* success */
return 1;
if (rw == WRITE)
set_bit(WriteErrorSeen, &rdev->flags);
/* need to record an error - either for the block or the device */
if (!rdev_set_badblocks(rdev, sector, sectors, 0))
md_error(rdev->mddev, rdev);
return 0;
}

/*
* This is a kernel thread which:
*
Expand Down Expand Up @@ -1832,9 +1852,19 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
rcu_read_unlock();

if (!success) {
/* Cannot read from anywhere -- bye bye array */
/* Cannot read from anywhere, just mark the block
* as bad on the first device to discourage future
* reads.
*/
int dn = r10_bio->devs[r10_bio->read_slot].devnum;
md_error(mddev, conf->mirrors[dn].rdev);
rdev = conf->mirrors[dn].rdev;

if (!rdev_set_badblocks(
rdev,
r10_bio->devs[r10_bio->read_slot].addr
+ sect,
s, 0))
md_error(mddev, rdev);
break;
}

Expand All @@ -1855,10 +1885,10 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)

atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
if (sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect,
s<<9, conf->tmppage, WRITE, false)
if (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect,
s<<9, conf->tmppage, WRITE)
== 0) {
/* Well, this device is dead */
printk(KERN_NOTICE
Expand All @@ -1873,7 +1903,6 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
"drive\n",
mdname(mddev),
bdevname(rdev->bdev, b));
md_error(mddev, rdev);
}
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
Expand All @@ -1893,11 +1922,12 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)

atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
if (sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect,
s<<9, conf->tmppage,
READ, false) == 0) {
switch (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect,
s<<9, conf->tmppage,
READ)) {
case 0:
/* Well, this device is dead */
printk(KERN_NOTICE
"md/raid10:%s: unable to read back "
Expand All @@ -1911,9 +1941,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
"drive\n",
mdname(mddev),
bdevname(rdev->bdev, b));

md_error(mddev, rdev);
} else {
break;
case 1:
printk(KERN_INFO
"md/raid10:%s: read error corrected"
" (%d sectors at %llu on %s)\n",
Expand Down

0 comments on commit 58c54fc

Please sign in to comment.