Skip to content

Commit

Permalink
md/raid10: preferentially read from replacement device if possible.
Browse files Browse the repository at this point in the history
When reading (for array reads, not for recovery etc) we read from the
replacement device if it has recovered far enough.
This requires storing the chosen rdev in the 'r10_bio' so we can make
sure to drop the ref on the right device when the read finishes.

Signed-off-by: NeilBrown <neilb@suse.de>
  • Loading branch information
NeilBrown committed Dec 22, 2011
1 parent 96c3fd1 commit abbf098
Showing 1 changed file with 23 additions and 13 deletions.
36 changes: 23 additions & 13 deletions drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -324,11 +324,13 @@ static void raid10_end_read_request(struct bio *bio, int error)
int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
struct r10bio *r10_bio = bio->bi_private;
int slot, dev;
struct md_rdev *rdev;
struct r10conf *conf = r10_bio->mddev->private;


slot = r10_bio->read_slot;
dev = r10_bio->devs[slot].devnum;
rdev = r10_bio->devs[slot].rdev;
/*
* this branch is our 'one mirror IO has finished' event handler:
*/
Expand All @@ -346,7 +348,7 @@ static void raid10_end_read_request(struct bio *bio, int error)
*/
set_bit(R10BIO_Uptodate, &r10_bio->state);
raid_end_bio_io(r10_bio);
rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev);
rdev_dec_pending(rdev, conf->mddev);
} else {
/*
* oops, read error - keep the refcount on the rdev
Expand All @@ -355,7 +357,7 @@ static void raid10_end_read_request(struct bio *bio, int error)
printk_ratelimited(KERN_ERR
"md/raid10:%s: %s: rescheduling sector %llu\n",
mdname(conf->mddev),
bdevname(conf->mirrors[dev].rdev->bdev, b),
bdevname(rdev->bdev, b),
(unsigned long long)r10_bio->sector);
set_bit(R10BIO_ReadError, &r10_bio->state);
reschedule_retry(r10_bio);
Expand Down Expand Up @@ -599,7 +601,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
int sectors = r10_bio->sectors;
int best_good_sectors;
sector_t new_distance, best_dist;
struct md_rdev *rdev;
struct md_rdev *rdev, *best_rdev;
int do_balance;
int best_slot;

Expand All @@ -608,6 +610,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
retry:
sectors = r10_bio->sectors;
best_slot = -1;
best_rdev = NULL;
best_dist = MaxSector;
best_good_sectors = 0;
do_balance = 1;
Expand All @@ -629,10 +632,16 @@ static struct md_rdev *read_balance(struct r10conf *conf,
if (r10_bio->devs[slot].bio == IO_BLOCKED)
continue;
disk = r10_bio->devs[slot].devnum;
rdev = rcu_dereference(conf->mirrors[disk].rdev);
rdev = rcu_dereference(conf->mirrors[disk].replacement);
if (rdev == NULL || test_bit(Faulty, &rdev->flags) ||
r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (rdev == NULL)
continue;
if (!test_bit(In_sync, &rdev->flags))
if (test_bit(Faulty, &rdev->flags))
continue;
if (!test_bit(In_sync, &rdev->flags) &&
r10_bio->devs[slot].addr + sectors > rdev->recovery_offset)
continue;

dev_sector = r10_bio->devs[slot].addr;
Expand All @@ -657,6 +666,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
if (good_sectors > best_good_sectors) {
best_good_sectors = good_sectors;
best_slot = slot;
best_rdev = rdev;
}
if (!do_balance)
/* Must read from here */
Expand Down Expand Up @@ -685,16 +695,15 @@ static struct md_rdev *read_balance(struct r10conf *conf,
if (new_distance < best_dist) {
best_dist = new_distance;
best_slot = slot;
best_rdev = rdev;
}
}
if (slot == conf->copies)
if (slot >= conf->copies) {
slot = best_slot;
rdev = best_rdev;
}

if (slot >= 0) {
disk = r10_bio->devs[slot].devnum;
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (!rdev)
goto retry;
atomic_inc(&rdev->nr_pending);
if (test_bit(Faulty, &rdev->flags)) {
/* Cannot risk returning a device that failed
Expand Down Expand Up @@ -990,6 +999,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
max_sectors);

r10_bio->devs[slot].bio = read_bio;
r10_bio->devs[slot].rdev = rdev;

read_bio->bi_sector = r10_bio->devs[slot].addr +
rdev->data_offset;
Expand Down Expand Up @@ -2088,10 +2098,9 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
{
int slot = r10_bio->read_slot;
int mirror = r10_bio->devs[slot].devnum;
struct bio *bio;
struct r10conf *conf = mddev->private;
struct md_rdev *rdev;
struct md_rdev *rdev = r10_bio->devs[slot].rdev;
char b[BDEVNAME_SIZE];
unsigned long do_sync;
int max_sectors;
Expand All @@ -2109,7 +2118,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
fix_read_error(conf, mddev, r10_bio);
unfreeze_array(conf);
}
rdev_dec_pending(conf->mirrors[mirror].rdev, mddev);
rdev_dec_pending(rdev, mddev);

bio = r10_bio->devs[slot].bio;
bdevname(bio->bi_bdev, b);
Expand Down Expand Up @@ -2144,6 +2153,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
r10_bio->sector - bio->bi_sector,
max_sectors);
r10_bio->devs[slot].bio = bio;
r10_bio->devs[slot].rdev = rdev;
bio->bi_sector = r10_bio->devs[slot].addr
+ rdev->data_offset;
bio->bi_bdev = rdev->bdev;
Expand Down

0 comments on commit abbf098

Please sign in to comment.