Skip to content

Commit

Permalink
md/raid1: Handle write errors by updating badblock log.
Browse files Browse the repository at this point in the history
When we get a write error (in the data area, not in metadata),
update the badblock log rather than failing the whole device.

As the write may well be many blocks, we trying writing each
block individually and only log the ones which fail.

Signed-off-by: NeilBrown <neilb@suse.de>
Reviewed-by: Namhyung Kim <namhyung@gmail.com>
  • Loading branch information
NeilBrown committed Jul 28, 2011
1 parent 2ca68f5 commit cd5ff9a
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 24 deletions.
168 changes: 145 additions & 23 deletions drivers/md/raid1.c
Original file line number Diff line number Diff line change
Expand Up @@ -318,25 +318,34 @@ static void raid1_end_read_request(struct bio *bio, int error)
rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
}

static void close_write(r1bio_t *r1_bio)
{
/* it really is the end of this request */
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
/* free extra copy of the data pages */
int i = r1_bio->behind_page_count;
while (i--)
safe_put_page(r1_bio->behind_bvecs[i].bv_page);
kfree(r1_bio->behind_bvecs);
r1_bio->behind_bvecs = NULL;
}
/* clear the bitmap if all writes complete successfully */
bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
r1_bio->sectors,
!test_bit(R1BIO_Degraded, &r1_bio->state),
test_bit(R1BIO_BehindIO, &r1_bio->state));
md_write_end(r1_bio->mddev);
}

static void r1_bio_write_done(r1bio_t *r1_bio)
{
if (atomic_dec_and_test(&r1_bio->remaining))
{
/* it really is the end of this request */
if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
/* free extra copy of the data pages */
int i = r1_bio->behind_page_count;
while (i--)
safe_put_page(r1_bio->behind_bvecs[i].bv_page);
kfree(r1_bio->behind_bvecs);
r1_bio->behind_bvecs = NULL;
}
/* clear the bitmap if all writes complete successfully */
bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
r1_bio->sectors,
!test_bit(R1BIO_Degraded, &r1_bio->state),
test_bit(R1BIO_BehindIO, &r1_bio->state));
md_write_end(r1_bio->mddev);
if (!atomic_dec_and_test(&r1_bio->remaining))
return;

if (test_bit(R1BIO_WriteError, &r1_bio->state))
reschedule_retry(r1_bio);
else {
close_write(r1_bio);
if (test_bit(R1BIO_MadeGood, &r1_bio->state))
reschedule_retry(r1_bio);
else
Expand All @@ -360,12 +369,10 @@ static void raid1_end_write_request(struct bio *bio, int error)
/*
* 'one mirror IO has finished' event handler:
*/
r1_bio->bios[mirror] = NULL;
to_put = bio;
if (!uptodate) {
md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
/* an I/O failed, we can't clear the bitmap */
set_bit(R1BIO_Degraded, &r1_bio->state);
set_bit(WriteErrorSeen,
&conf->mirrors[mirror].rdev->flags);
set_bit(R1BIO_WriteError, &r1_bio->state);
} else {
/*
* Set R1BIO_Uptodate in our master bio, so that we
Expand All @@ -380,6 +387,8 @@ static void raid1_end_write_request(struct bio *bio, int error)
sector_t first_bad;
int bad_sectors;

r1_bio->bios[mirror] = NULL;
to_put = bio;
set_bit(R1BIO_Uptodate, &r1_bio->state);

/* Maybe we can clear some bad blocks. */
Expand Down Expand Up @@ -1724,6 +1733,101 @@ static void fix_read_error(conf_t *conf, int read_disk,
}
}

static void bi_complete(struct bio *bio, int error)
{
complete((struct completion *)bio->bi_private);
}

static int submit_bio_wait(int rw, struct bio *bio)
{
struct completion event;
rw |= REQ_SYNC;

init_completion(&event);
bio->bi_private = &event;
bio->bi_end_io = bi_complete;
submit_bio(rw, bio);
wait_for_completion(&event);

return test_bit(BIO_UPTODATE, &bio->bi_flags);
}

static int narrow_write_error(r1bio_t *r1_bio, int i)
{
mddev_t *mddev = r1_bio->mddev;
conf_t *conf = mddev->private;
mdk_rdev_t *rdev = conf->mirrors[i].rdev;
int vcnt, idx;
struct bio_vec *vec;

/* bio has the data to be written to device 'i' where
* we just recently had a write error.
* We repeatedly clone the bio and trim down to one block,
* then try the write. Where the write fails we record
* a bad block.
* It is conceivable that the bio doesn't exactly align with
* blocks. We must handle this somehow.
*
* We currently own a reference on the rdev.
*/

int block_sectors;
sector_t sector;
int sectors;
int sect_to_write = r1_bio->sectors;
int ok = 1;

if (rdev->badblocks.shift < 0)
return 0;

block_sectors = 1 << rdev->badblocks.shift;
sector = r1_bio->sector;
sectors = ((sector + block_sectors)
& ~(sector_t)(block_sectors - 1))
- sector;

if (test_bit(R1BIO_BehindIO, &r1_bio->state)) {
vcnt = r1_bio->behind_page_count;
vec = r1_bio->behind_bvecs;
idx = 0;
while (vec[idx].bv_page == NULL)
idx++;
} else {
vcnt = r1_bio->master_bio->bi_vcnt;
vec = r1_bio->master_bio->bi_io_vec;
idx = r1_bio->master_bio->bi_idx;
}
while (sect_to_write) {
struct bio *wbio;
if (sectors > sect_to_write)
sectors = sect_to_write;
/* Write at 'sector' for 'sectors'*/

wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev);
memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec));
wbio->bi_sector = r1_bio->sector;
wbio->bi_rw = WRITE;
wbio->bi_vcnt = vcnt;
wbio->bi_size = r1_bio->sectors << 9;
wbio->bi_idx = idx;

md_trim_bio(wbio, sector - r1_bio->sector, sectors);
wbio->bi_sector += rdev->data_offset;
wbio->bi_bdev = rdev->bdev;
if (submit_bio_wait(WRITE, wbio) == 0)
/* failure! */
ok = rdev_set_badblocks(rdev, sector,
sectors, 0)
&& ok;

bio_put(wbio);
sect_to_write -= sectors;
sector += sectors;
sectors = block_sectors;
}
return ok;
}

static void raid1d(mddev_t *mddev)
{
r1bio_t *r1_bio;
Expand Down Expand Up @@ -1775,7 +1879,8 @@ static void raid1d(mddev_t *mddev)
md_done_sync(mddev, s, 1);
} else
sync_request_write(mddev, r1_bio);
} else if (test_bit(R1BIO_MadeGood, &r1_bio->state)) {
} else if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
test_bit(R1BIO_WriteError, &r1_bio->state)) {
int m;
for (m = 0; m < conf->raid_disks ; m++)
if (r1_bio->bios[m] == IO_MADE_GOOD) {
Expand All @@ -1785,7 +1890,24 @@ static void raid1d(mddev_t *mddev)
r1_bio->sector,
r1_bio->sectors);
rdev_dec_pending(rdev, mddev);
} else if (r1_bio->bios[m] != NULL) {
/* This drive got a write error. We
* need to narrow down and record
* precise write errors.
*/
if (!narrow_write_error(r1_bio, m)) {
md_error(mddev,
conf->mirrors[m].rdev);
/* an I/O failed, we can't clear
* the bitmap */
set_bit(R1BIO_Degraded,
&r1_bio->state);
}
rdev_dec_pending(conf->mirrors[m].rdev,
mddev);
}
if (test_bit(R1BIO_WriteError, &r1_bio->state))
close_write(r1_bio);
raid_end_bio_io(r1_bio);
} else if (test_bit(R1BIO_ReadError, &r1_bio->state)) {
int disk;
Expand Down
3 changes: 2 additions & 1 deletion drivers/md/raid1.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,8 @@ struct r1bio_s {
/* If a write for this request means we can clear some
* known-bad-block records, we set this flag
*/
#define R1BIO_MadeGood 7
#define R1BIO_MadeGood 7
#define R1BIO_WriteError 8

extern int md_raid1_congested(mddev_t *mddev, int bits);

Expand Down

0 comments on commit cd5ff9a

Please sign in to comment.