Skip to content

Commit

Permalink
[PATCH] md: Final stages of raid5 expand code
Browse files Browse the repository at this point in the history
This patch adds raid5_reshape and end_reshape which will start and finish the
reshape processes.

raid5_reshape is only enabled in CONFIG_MD_RAID5_RESHAPE is set, to discourage
accidental use.

Read the 'help' for the CONFIG_MD_RAID5_RESHAPE entry.

and Make sure that you have backups, just in case.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
NeilBrown authored and Linus Torvalds committed Mar 27, 2006
1 parent ccfcc3c commit 2926955
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 4 deletions.
26 changes: 26 additions & 0 deletions drivers/md/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,32 @@ config MD_RAID5

If unsure, say Y.

config MD_RAID5_RESHAPE
bool "Support adding drives to a raid-5 array (experimental)"
depends on MD_RAID5 && EXPERIMENTAL
---help---
A RAID-5 set can be expanded by adding extra drives. This
requires "restriping" the array which means (almost) every
block must be written to a different place.

This option allows such restriping to be done while the array
is online. However it is still EXPERIMENTAL code. It should
work, but please be sure that you have backups.

You will need a version of mdadm newer than 2.3.1. During the
early stage of reshape there is a critical section where live data
is being over-written. A crash during this time needs extra care
for recovery. The newer mdadm takes a copy of the data in the
critical section and will restore it, if necessary, after a crash.

The mdadm usage is e.g.
mdadm --grow /dev/md1 --raid-disks=6
to grow '/dev/md1' to having 6 disks.

Note: The array can only be expanded, not contracted.
There should be enough spares already present to make the new
array workable.

config MD_RAID6
tristate "RAID-6 mode"
depends on BLK_DEV_MD
Expand Down
6 changes: 4 additions & 2 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -158,11 +158,12 @@ static int start_readonly;
*/
static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
static atomic_t md_event_count;
static void md_new_event(mddev_t *mddev)
void md_new_event(mddev_t *mddev)
{
atomic_inc(&md_event_count);
wake_up(&md_event_waiters);
}
EXPORT_SYMBOL_GPL(md_new_event);

/*
* Enables to iterate over all existing md arrays
Expand Down Expand Up @@ -4467,7 +4468,7 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);

#define SYNC_MARKS 10
#define SYNC_MARK_STEP (3*HZ)
static void md_do_sync(mddev_t *mddev)
void md_do_sync(mddev_t *mddev)
{
mddev_t *mddev2;
unsigned int currspeed = 0,
Expand Down Expand Up @@ -4704,6 +4705,7 @@ static void md_do_sync(mddev_t *mddev)
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
EXPORT_SYMBOL_GPL(md_do_sync);


/*
Expand Down
123 changes: 122 additions & 1 deletion drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,8 @@ static int grow_stripes(raid5_conf_t *conf, int num)
}
return 0;
}

#ifdef CONFIG_MD_RAID5_RESHAPE
static int resize_stripes(raid5_conf_t *conf, int newsize)
{
/* Make all the stripes able to hold 'newsize' devices.
Expand Down Expand Up @@ -451,7 +453,7 @@ static int resize_stripes(raid5_conf_t *conf, int newsize)
conf->pool_size = newsize;
return err;
}

#endif

static int drop_one_stripe(raid5_conf_t *conf)
{
Expand Down Expand Up @@ -1034,6 +1036,8 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
return 0;
}

static void end_reshape(raid5_conf_t *conf);

static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks)
{
int sectors_per_chunk = conf->chunk_size >> 9;
Expand Down Expand Up @@ -1844,6 +1848,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
if (sector_nr >= max_sector) {
/* just being told to finish up .. nothing much to do */
unplug_slaves(mddev);
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
end_reshape(conf);
return 0;
}

if (mddev->curr_resync < max_sector) /* aborted */
bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
Expand Down Expand Up @@ -2464,6 +2472,116 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors)
return 0;
}

#ifdef CONFIG_MD_RAID5_RESHAPE
static int raid5_reshape(mddev_t *mddev, int raid_disks)
{
raid5_conf_t *conf = mddev_to_conf(mddev);
int err;
mdk_rdev_t *rdev;
struct list_head *rtmp;
int spares = 0;
int added_devices = 0;

if (mddev->degraded ||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
if (conf->raid_disks > raid_disks)
return -EINVAL; /* Cannot shrink array yet */
if (conf->raid_disks == raid_disks)
return 0; /* nothing to do */

/* Can only proceed if there are plenty of stripe_heads.
* We need a minimum of one full stripe,, and for sensible progress
* it is best to have about 4 times that.
* If we require 4 times, then the default 256 4K stripe_heads will
* allow for chunk sizes up to 256K, which is probably OK.
* If the chunk size is greater, user-space should request more
* stripe_heads first.
*/
if ((mddev->chunk_size / STRIPE_SIZE) * 4 > conf->max_nr_stripes) {
printk(KERN_WARNING "raid5: reshape: not enough stripes. Needed %lu\n",
(mddev->chunk_size / STRIPE_SIZE)*4);
return -ENOSPC;
}

ITERATE_RDEV(mddev, rdev, rtmp)
if (rdev->raid_disk < 0 &&
!test_bit(Faulty, &rdev->flags))
spares++;
if (conf->raid_disks + spares < raid_disks-1)
/* Not enough devices even to make a degraded array
* of that size
*/
return -EINVAL;

err = resize_stripes(conf, raid_disks);
if (err)
return err;

spin_lock_irq(&conf->device_lock);
conf->previous_raid_disks = conf->raid_disks;
mddev->raid_disks = conf->raid_disks = raid_disks;
conf->expand_progress = 0;
spin_unlock_irq(&conf->device_lock);

/* Add some new drives, as many as will fit.
* We know there are enough to make the newly sized array work.
*/
ITERATE_RDEV(mddev, rdev, rtmp)
if (rdev->raid_disk < 0 &&
!test_bit(Faulty, &rdev->flags)) {
if (raid5_add_disk(mddev, rdev)) {
char nm[20];
set_bit(In_sync, &rdev->flags);
conf->working_disks++;
added_devices++;
sprintf(nm, "rd%d", rdev->raid_disk);
sysfs_create_link(&mddev->kobj, &rdev->kobj, nm);
} else
break;
}

mddev->degraded = (raid_disks - conf->previous_raid_disks) - added_devices;
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
mddev->sync_thread = md_register_thread(md_do_sync, mddev,
"%s_reshape");
if (!mddev->sync_thread) {
mddev->recovery = 0;
spin_lock_irq(&conf->device_lock);
mddev->raid_disks = conf->raid_disks = conf->previous_raid_disks;
conf->expand_progress = MaxSector;
spin_unlock_irq(&conf->device_lock);
return -EAGAIN;
}
md_wakeup_thread(mddev->sync_thread);
md_new_event(mddev);
return 0;
}
#endif

static void end_reshape(raid5_conf_t *conf)
{
struct block_device *bdev;

conf->mddev->array_size = conf->mddev->size * (conf->mddev->raid_disks-1);
set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1);
conf->mddev->changed = 1;

bdev = bdget_disk(conf->mddev->gendisk, 0);
if (bdev) {
mutex_lock(&bdev->bd_inode->i_mutex);
i_size_write(bdev->bd_inode, conf->mddev->array_size << 10);
mutex_unlock(&bdev->bd_inode->i_mutex);
bdput(bdev);
}
spin_lock_irq(&conf->device_lock);
conf->expand_progress = MaxSector;
spin_unlock_irq(&conf->device_lock);
}

static void raid5_quiesce(mddev_t *mddev, int state)
{
raid5_conf_t *conf = mddev_to_conf(mddev);
Expand Down Expand Up @@ -2502,6 +2620,9 @@ static struct mdk_personality raid5_personality =
.spare_active = raid5_spare_active,
.sync_request = sync_request,
.resize = raid5_resize,
#ifdef CONFIG_MD_RAID5_RESHAPE
.reshape = raid5_reshape,
#endif
.quiesce = raid5_quiesce,
};

Expand Down
3 changes: 2 additions & 1 deletion include/linux/raid/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
extern void md_super_wait(mddev_t *mddev);
extern int sync_page_io(struct block_device *bdev, sector_t sector, int size,
struct page *page, int rw);

extern void md_do_sync(mddev_t *mddev);
extern void md_new_event(mddev_t *mddev);

#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }

Expand Down

0 comments on commit 2926955

Please sign in to comment.