Skip to content

Commit

Permalink
md/raid1: Allocate spare to store replacement devices and their bios.
Browse files Browse the repository at this point in the history
In RAID1, a replacement is much like a normal device, so we just
double the size of the relevant arrays and look at all possible
devices for reads and writes.

This means that the array looks like it is now double the size in some
way - we need to be careful about that.
In particular, we checking if the array is still degraded while
creating a recovery request we need to only consider the first 'half'
- i.e. the real (non-replacement) devices.

Signed-off-by: NeilBrown <neilb@suse.de>
  • Loading branch information
NeilBrown committed Dec 22, 2011
1 parent 3019463 commit 8f19ccb
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 31 deletions.
64 changes: 34 additions & 30 deletions drivers/md/raid1.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
j = -1;
out_free_bio:
while ( ++j < pi->raid_disks )
while (++j < pi->raid_disks)
bio_put(r1_bio->bios[j]);
r1bio_pool_free(r1_bio, data);
return NULL;
Expand Down Expand Up @@ -164,7 +164,7 @@ static void put_all_bios(struct r1conf *conf, struct r1bio *r1_bio)
{
int i;

for (i = 0; i < conf->raid_disks; i++) {
for (i = 0; i < conf->raid_disks * 2; i++) {
struct bio **bio = r1_bio->bios + i;
if (!BIO_SPECIAL(*bio))
bio_put(*bio);
Expand All @@ -185,7 +185,7 @@ static void put_buf(struct r1bio *r1_bio)
struct r1conf *conf = r1_bio->mddev->private;
int i;

for (i=0; i<conf->raid_disks; i++) {
for (i = 0; i < conf->raid_disks * 2; i++) {
struct bio *bio = r1_bio->bios[i];
if (bio->bi_end_io)
rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev);
Expand Down Expand Up @@ -280,11 +280,11 @@ static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
struct r1conf *conf = r1_bio->mddev->private;
int raid_disks = conf->raid_disks;

for (mirror = 0; mirror < raid_disks; mirror++)
for (mirror = 0; mirror < raid_disks * 2; mirror++)
if (r1_bio->bios[mirror] == bio)
break;

BUG_ON(mirror == raid_disks);
BUG_ON(mirror == raid_disks * 2);
update_head_pos(mirror, r1_bio);

return mirror;
Expand Down Expand Up @@ -506,7 +506,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
start_disk = conf->last_used;
}

for (i = 0 ; i < conf->raid_disks ; i++) {
for (i = 0 ; i < conf->raid_disks * 2 ; i++) {
sector_t dist;
sector_t first_bad;
int bad_sectors;
Expand Down Expand Up @@ -975,7 +975,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
*/
plugged = mddev_check_plugged(mddev);

disks = conf->raid_disks;
disks = conf->raid_disks * 2;
retry_write:
blocked_rdev = NULL;
rcu_read_lock();
Expand All @@ -989,7 +989,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
}
r1_bio->bios[i] = NULL;
if (!rdev || test_bit(Faulty, &rdev->flags)) {
set_bit(R1BIO_Degraded, &r1_bio->state);
if (i < conf->raid_disks)
set_bit(R1BIO_Degraded, &r1_bio->state);
continue;
}

Expand Down Expand Up @@ -1493,7 +1494,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
}
}
d++;
if (d == conf->raid_disks)
if (d == conf->raid_disks * 2)
d = 0;
} while (!success && d != r1_bio->read_disk);

Expand All @@ -1510,7 +1511,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
mdname(mddev),
bdevname(bio->bi_bdev, b),
(unsigned long long)r1_bio->sector);
for (d = 0; d < conf->raid_disks; d++) {
for (d = 0; d < conf->raid_disks * 2; d++) {
rdev = conf->mirrors[d].rdev;
if (!rdev || test_bit(Faulty, &rdev->flags))
continue;
Expand All @@ -1536,7 +1537,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
/* write it back and re-read */
while (d != r1_bio->read_disk) {
if (d == 0)
d = conf->raid_disks;
d = conf->raid_disks * 2;
d--;
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue;
Expand All @@ -1551,7 +1552,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
d = start;
while (d != r1_bio->read_disk) {
if (d == 0)
d = conf->raid_disks;
d = conf->raid_disks * 2;
d--;
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue;
Expand Down Expand Up @@ -1584,15 +1585,15 @@ static int process_checks(struct r1bio *r1_bio)
int primary;
int i;

for (primary = 0; primary < conf->raid_disks; primary++)
for (primary = 0; primary < conf->raid_disks * 2; primary++)
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
r1_bio->bios[primary]->bi_end_io = NULL;
rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
break;
}
r1_bio->read_disk = primary;
for (i = 0; i < conf->raid_disks; i++) {
for (i = 0; i < conf->raid_disks * 2; i++) {
int j;
int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
struct bio *pbio = r1_bio->bios[primary];
Expand Down Expand Up @@ -1656,7 +1657,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
{
struct r1conf *conf = mddev->private;
int i;
int disks = conf->raid_disks;
int disks = conf->raid_disks * 2;
struct bio *bio, *wbio;

bio = r1_bio->bios[r1_bio->read_disk];
Expand Down Expand Up @@ -1737,7 +1738,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
success = 1;
else {
d++;
if (d == conf->raid_disks)
if (d == conf->raid_disks * 2)
d = 0;
}
} while (!success && d != read_disk);
Expand All @@ -1753,7 +1754,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
start = d;
while (d != read_disk) {
if (d==0)
d = conf->raid_disks;
d = conf->raid_disks * 2;
d--;
rdev = conf->mirrors[d].rdev;
if (rdev &&
Expand All @@ -1765,7 +1766,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
while (d != read_disk) {
char b[BDEVNAME_SIZE];
if (d==0)
d = conf->raid_disks;
d = conf->raid_disks * 2;
d--;
rdev = conf->mirrors[d].rdev;
if (rdev &&
Expand Down Expand Up @@ -1887,7 +1888,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
{
int m;
int s = r1_bio->sectors;
for (m = 0; m < conf->raid_disks ; m++) {
for (m = 0; m < conf->raid_disks * 2 ; m++) {
struct md_rdev *rdev = conf->mirrors[m].rdev;
struct bio *bio = r1_bio->bios[m];
if (bio->bi_end_io == NULL)
Expand All @@ -1909,7 +1910,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
{
int m;
for (m = 0; m < conf->raid_disks ; m++)
for (m = 0; m < conf->raid_disks * 2 ; m++)
if (r1_bio->bios[m] == IO_MADE_GOOD) {
struct md_rdev *rdev = conf->mirrors[m].rdev;
rdev_clear_badblocks(rdev,
Expand Down Expand Up @@ -2184,7 +2185,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
r1_bio->state = 0;
set_bit(R1BIO_IsSync, &r1_bio->state);

for (i=0; i < conf->raid_disks; i++) {
for (i = 0; i < conf->raid_disks * 2; i++) {
struct md_rdev *rdev;
bio = r1_bio->bios[i];

Expand All @@ -2203,7 +2204,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev == NULL ||
test_bit(Faulty, &rdev->flags)) {
still_degraded = 1;
if (i < conf->raid_disks)
still_degraded = 1;
} else if (!test_bit(In_sync, &rdev->flags)) {
bio->bi_rw = WRITE;
bio->bi_end_io = end_sync_write;
Expand Down Expand Up @@ -2254,7 +2256,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
* need to mark them bad on all write targets
*/
int ok = 1;
for (i = 0 ; i < conf->raid_disks ; i++)
for (i = 0 ; i < conf->raid_disks * 2 ; i++)
if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
struct md_rdev *rdev =
rcu_dereference(conf->mirrors[i].rdev);
Expand Down Expand Up @@ -2323,7 +2325,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
len = sync_blocks<<9;
}

for (i=0 ; i < conf->raid_disks; i++) {
for (i = 0 ; i < conf->raid_disks * 2; i++) {
bio = r1_bio->bios[i];
if (bio->bi_end_io) {
page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
Expand Down Expand Up @@ -2356,7 +2358,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
*/
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
atomic_set(&r1_bio->remaining, read_targets);
for (i=0; i<conf->raid_disks; i++) {
for (i = 0; i < conf->raid_disks * 2; i++) {
bio = r1_bio->bios[i];
if (bio->bi_end_io == end_sync_read) {
md_sync_acct(bio->bi_bdev, nr_sectors);
Expand Down Expand Up @@ -2393,7 +2395,8 @@ static struct r1conf *setup_conf(struct mddev *mddev)
if (!conf)
goto abort;

conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
conf->mirrors = kzalloc(sizeof(struct mirror_info)
* mddev->raid_disks * 2,
GFP_KERNEL);
if (!conf->mirrors)
goto abort;
Expand All @@ -2405,7 +2408,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
if (!conf->poolinfo)
goto abort;
conf->poolinfo->raid_disks = mddev->raid_disks;
conf->poolinfo->raid_disks = mddev->raid_disks * 2;
conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
r1bio_pool_free,
conf->poolinfo);
Expand Down Expand Up @@ -2438,7 +2441,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
conf->recovery_disabled = mddev->recovery_disabled - 1;

conf->last_used = -1;
for (i = 0; i < conf->raid_disks; i++) {
for (i = 0; i < conf->raid_disks * 2; i++) {

disk = conf->mirrors + i;

Expand Down Expand Up @@ -2665,15 +2668,16 @@ static int raid1_reshape(struct mddev *mddev)
if (!newpoolinfo)
return -ENOMEM;
newpoolinfo->mddev = mddev;
newpoolinfo->raid_disks = raid_disks;
newpoolinfo->raid_disks = raid_disks * 2;

newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
r1bio_pool_free, newpoolinfo);
if (!newpool) {
kfree(newpoolinfo);
return -ENOMEM;
}
newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL);
newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks * 2,
GFP_KERNEL);
if (!newmirrors) {
kfree(newpoolinfo);
mempool_destroy(newpool);
Expand Down
7 changes: 6 additions & 1 deletion drivers/md/raid1.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ struct mirror_info {
* pool was allocated for, so they know how much to allocate and free.
* mddev->raid_disks cannot be used, as it can change while a pool is active
* These two datums are stored in a kmalloced struct.
* The 'raid_disks' here is twice the raid_disks in r1conf.
* This allows space for each 'real' device can have a replacement in the
* second half of the array.
*/

struct pool_info {
Expand All @@ -21,7 +24,9 @@ struct pool_info {

struct r1conf {
struct mddev *mddev;
struct mirror_info *mirrors;
struct mirror_info *mirrors; /* twice 'raid_disks' to
* allow for replacements.
*/
int raid_disks;

/* When choose the best device for a read (read_balance())
Expand Down

0 comments on commit 8f19ccb

Please sign in to comment.