Skip to content

Commit

Permalink
md/raid10: avoid writing to known bad blocks on known bad drives.
Browse files Browse the repository at this point in the history
Writing to known bad blocks on drives that have seen a write error
is asking for trouble.  So try to avoid these blocks.

Signed-off-by: NeilBrown <neilb@suse.de>
  • Loading branch information
NeilBrown committed Jul 28, 2011
1 parent e875ece commit d4432c2
Showing 1 changed file with 93 additions and 12 deletions.
105 changes: 93 additions & 12 deletions drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -807,6 +807,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
unsigned long flags;
mdk_rdev_t *blocked_rdev;
int plugged;
int sectors_handled;
int max_sectors;

if (unlikely(bio->bi_rw & REQ_FLUSH)) {
md_flush_request(mddev, bio);
Expand Down Expand Up @@ -895,7 +897,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
/*
* read balancing logic:
*/
int max_sectors;
int disk;
int slot;

Expand Down Expand Up @@ -925,8 +926,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
/* Could not read all from this device, so we will
* need another r10_bio.
*/
int sectors_handled;

sectors_handled = (r10_bio->sectors + max_sectors
- bio->bi_sector);
r10_bio->sectors = max_sectors;
Expand Down Expand Up @@ -963,13 +962,22 @@ static int make_request(mddev_t *mddev, struct bio * bio)
/* first select target devices under rcu_lock and
* inc refcount on their rdev. Record them by setting
* bios[x] to bio
* If there are known/acknowledged bad blocks on any device
* on which we have seen a write error, we want to avoid
* writing to those blocks. This potentially requires several
* writes to write around the bad blocks. Each set of writes
* gets its own r10_bio with a set of bios attached. The number
* of r10_bios is recored in bio->bi_phys_segments just as with
* the read case.
*/
plugged = mddev_check_plugged(mddev);

raid10_find_phys(conf, r10_bio);
retry_write:
retry_write:
blocked_rdev = NULL;
rcu_read_lock();
max_sectors = r10_bio->sectors;

for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
Expand All @@ -978,13 +986,55 @@ static int make_request(mddev_t *mddev, struct bio * bio)
blocked_rdev = rdev;
break;
}
if (rdev && !test_bit(Faulty, &rdev->flags)) {
atomic_inc(&rdev->nr_pending);
r10_bio->devs[i].bio = bio;
} else {
r10_bio->devs[i].bio = NULL;
r10_bio->devs[i].bio = NULL;
if (!rdev || test_bit(Faulty, &rdev->flags)) {
set_bit(R10BIO_Degraded, &r10_bio->state);
continue;
}
if (test_bit(WriteErrorSeen, &rdev->flags)) {
sector_t first_bad;
sector_t dev_sector = r10_bio->devs[i].addr;
int bad_sectors;
int is_bad;

is_bad = is_badblock(rdev, dev_sector,
max_sectors,
&first_bad, &bad_sectors);
if (is_bad < 0) {
/* Mustn't write here until the bad block
* is acknowledged
*/
atomic_inc(&rdev->nr_pending);
set_bit(BlockedBadBlocks, &rdev->flags);
blocked_rdev = rdev;
break;
}
if (is_bad && first_bad <= dev_sector) {
/* Cannot write here at all */
bad_sectors -= (dev_sector - first_bad);
if (bad_sectors < max_sectors)
/* Mustn't write more than bad_sectors
* to other devices yet
*/
max_sectors = bad_sectors;
/* We don't set R10BIO_Degraded as that
* only applies if the disk is missing,
* so it might be re-added, and we want to
* know to recover this chunk.
* In this case the device is here, and the
* fact that this chunk is not in-sync is
* recorded in the bad block log.
*/
continue;
}
if (is_bad) {
int good_sectors = first_bad - dev_sector;
if (good_sectors < max_sectors)
max_sectors = good_sectors;
}
}
r10_bio->devs[i].bio = bio;
atomic_inc(&rdev->nr_pending);
}
rcu_read_unlock();

Expand All @@ -1004,8 +1054,22 @@ static int make_request(mddev_t *mddev, struct bio * bio)
goto retry_write;
}

if (max_sectors < r10_bio->sectors) {
/* We are splitting this into multiple parts, so
* we need to prepare for allocating another r10_bio.
*/
r10_bio->sectors = max_sectors;
spin_lock_irq(&conf->device_lock);
if (bio->bi_phys_segments == 0)
bio->bi_phys_segments = 2;
else
bio->bi_phys_segments++;
spin_unlock_irq(&conf->device_lock);
}
sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector;

atomic_set(&r10_bio->remaining, 1);
bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0);
bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);

for (i = 0; i < conf->copies; i++) {
struct bio *mbio;
Expand All @@ -1014,10 +1078,12 @@ static int make_request(mddev_t *mddev, struct bio * bio)
continue;

mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
max_sectors);
r10_bio->devs[i].bio = mbio;

mbio->bi_sector = r10_bio->devs[i].addr+
conf->mirrors[d].rdev->data_offset;
mbio->bi_sector = (r10_bio->devs[i].addr+
conf->mirrors[d].rdev->data_offset);
mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
mbio->bi_end_io = raid10_end_write_request;
mbio->bi_rw = WRITE | do_sync | do_fua;
Expand All @@ -1042,6 +1108,21 @@ static int make_request(mddev_t *mddev, struct bio * bio)
/* In case raid10d snuck in to freeze_array */
wake_up(&conf->wait_barrier);

if (sectors_handled < (bio->bi_size >> 9)) {
/* We need another r1_bio. It has already been counted
* in bio->bi_phys_segments.
*/
r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);

r10_bio->master_bio = bio;
r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled;

r10_bio->mddev = mddev;
r10_bio->sector = bio->bi_sector + sectors_handled;
r10_bio->state = 0;
goto retry_write;
}

if (do_sync || !mddev->bitmap || !plugged)
md_wakeup_thread(mddev->thread);
return 0;
Expand Down

0 comments on commit d4432c2

Please sign in to comment.