Skip to content

Commit

Permalink
drbd: Allow online change of al-stripes and al-stripe-size
Browse files Browse the repository at this point in the history
Allow to change the AL layout with an resize operation. For that
the reisze command gets two new fields: al_stripes and al_stripe_size.

In order to make the operation crash save:
1) Lock out all IO and MD-IO
2) Write the super block with MDF_PRIMARY_IND clear
3) write the bitmap to the new location (all zeros, since
   we allow only while connected)
4) Initialize the new AL-area
5) Write the super block with the restored MDF_PRIMARY_IND.
6) Unfreeze all IO

Since the AL-layout has no influence on the protocol, this operation
needs to be beforemed on both sides of a resource (if intended).

Signed-off-by: Andreas Gruenbacher <agruen@linbit.com>
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
Philipp Reisner authored and Jens Axboe committed Jun 28, 2013
1 parent e96c963 commit d752b26
Show file tree
Hide file tree
Showing 8 changed files with 188 additions and 53 deletions.
21 changes: 21 additions & 0 deletions drivers/block/drbd/drbd_actlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,27 @@ void drbd_al_shrink(struct drbd_conf *mdev)
wake_up(&mdev->al_wait);
}

int drbd_initialize_al(struct drbd_conf *mdev, void *buffer)
{
struct al_transaction_on_disk *al = buffer;
struct drbd_md *md = &mdev->ldev->md;
sector_t al_base = md->md_offset + md->al_offset;
int al_size_4k = md->al_stripes * md->al_stripe_size_4k;
int i;

memset(al, 0, 4096);
al->magic = cpu_to_be32(DRBD_AL_MAGIC);
al->transaction_type = cpu_to_be16(AL_TR_INITIALIZED);
al->crc32c = cpu_to_be32(crc32c(0, al, 4096));

for (i = 0; i < al_size_4k; i++) {
int err = drbd_md_sync_page_io(mdev, mdev->ldev, al_base + i * 8, WRITE);
if (err)
return err;
}
return 0;
}

static int w_update_odbm(struct drbd_work *w, int unused)
{
struct update_odbm_work *udw = container_of(w, struct update_odbm_work, w);
Expand Down
7 changes: 6 additions & 1 deletion drivers/block/drbd/drbd_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -1133,6 +1133,7 @@ extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
void drbd_print_uuids(struct drbd_conf *mdev, const char *text);

extern void conn_md_sync(struct drbd_tconn *tconn);
extern void drbd_md_write(struct drbd_conf *mdev, void *buffer);
extern void drbd_md_sync(struct drbd_conf *mdev);
extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
Expand Down Expand Up @@ -1468,12 +1469,15 @@ extern void drbd_resume_io(struct drbd_conf *mdev);
extern char *ppsize(char *buf, unsigned long long size);
extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, sector_t, int);
enum determine_dev_size {
DS_ERROR_SHRINK = -3,
DS_ERROR_SPACE_MD = -2,
DS_ERROR = -1,
DS_UNCHANGED = 0,
DS_SHRUNK = 1,
DS_GREW = 2
};
extern enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *, enum dds_flags) __must_hold(local);
extern enum determine_dev_size
drbd_determine_dev_size(struct drbd_conf *, enum dds_flags, struct resize_parms *) __must_hold(local);
extern void resync_after_online_grow(struct drbd_conf *);
extern void drbd_reconsider_max_bio_size(struct drbd_conf *mdev);
extern enum drbd_state_rv drbd_set_role(struct drbd_conf *mdev,
Expand Down Expand Up @@ -1639,6 +1643,7 @@ extern int __drbd_set_out_of_sync(struct drbd_conf *mdev, sector_t sector,
#define drbd_set_out_of_sync(mdev, sector, size) \
__drbd_set_out_of_sync(mdev, sector, size, __FILE__, __LINE__)
extern void drbd_al_shrink(struct drbd_conf *mdev);
extern int drbd_initialize_al(struct drbd_conf *, void *);

/* drbd_nl.c */
/* state info broadcast */
Expand Down
57 changes: 33 additions & 24 deletions drivers/block/drbd/drbd_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2879,34 +2879,14 @@ struct meta_data_on_disk {
u8 reserved_u8[4096 - (7*8 + 10*4)];
} __packed;

/**
* drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
* @mdev: DRBD device.
*/
void drbd_md_sync(struct drbd_conf *mdev)


void drbd_md_write(struct drbd_conf *mdev, void *b)
{
struct meta_data_on_disk *buffer;
struct meta_data_on_disk *buffer = b;
sector_t sector;
int i;

/* Don't accidentally change the DRBD meta data layout. */
BUILD_BUG_ON(UI_SIZE != 4);
BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);

del_timer(&mdev->md_sync_timer);
/* timer may be rearmed by drbd_md_mark_dirty() now. */
if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
return;

/* We use here D_FAILED and not D_ATTACHING because we try to write
* metadata even if we detach due to a disk failure! */
if (!get_ldev_if_state(mdev, D_FAILED))
return;

buffer = drbd_md_get_buffer(mdev);
if (!buffer)
goto out;

memset(buffer, 0, sizeof(*buffer));

buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
Expand Down Expand Up @@ -2935,6 +2915,35 @@ void drbd_md_sync(struct drbd_conf *mdev)
dev_err(DEV, "meta data update failed!\n");
drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR);
}
}

/**
* drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
* @mdev: DRBD device.
*/
void drbd_md_sync(struct drbd_conf *mdev)
{
struct meta_data_on_disk *buffer;

/* Don't accidentally change the DRBD meta data layout. */
BUILD_BUG_ON(UI_SIZE != 4);
BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);

del_timer(&mdev->md_sync_timer);
/* timer may be rearmed by drbd_md_mark_dirty() now. */
if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
return;

/* We use here D_FAILED and not D_ATTACHING because we try to write
* metadata even if we detach due to a disk failure! */
if (!get_ldev_if_state(mdev, D_FAILED))
return;

buffer = drbd_md_get_buffer(mdev);
if (!buffer)
goto out;

drbd_md_write(mdev, buffer);

/* Update mdev->ldev->md.la_size_sect,
* since we updated it on metadata. */
Expand Down
137 changes: 111 additions & 26 deletions drivers/block/drbd/drbd_nl.c
Original file line number Diff line number Diff line change
Expand Up @@ -827,12 +827,17 @@ void drbd_resume_io(struct drbd_conf *mdev)
* Returns 0 on success, negative return values indicate errors.
* You should call drbd_md_sync() after calling this function.
*/
enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags) __must_hold(local)
enum determine_dev_size
drbd_determine_dev_size(struct drbd_conf *mdev, enum dds_flags flags, struct resize_parms *rs) __must_hold(local)
{
sector_t prev_first_sect, prev_size; /* previous meta location */
sector_t la_size_sect, u_size;
struct drbd_md *md = &mdev->ldev->md;
u32 prev_al_stripe_size_4k;
u32 prev_al_stripes;
sector_t size;
char ppb[10];
void *buffer;

int md_moved, la_size_changed;
enum determine_dev_size rv = DS_UNCHANGED;
Expand All @@ -847,6 +852,11 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
* still lock the act_log to not trigger ASSERTs there.
*/
drbd_suspend_io(mdev);
buffer = drbd_md_get_buffer(mdev); /* Lock meta-data IO */
if (!buffer) {
drbd_resume_io(mdev);
return DS_ERROR;
}

/* no wait necessary anymore, actually we could assert that */
wait_event(mdev->al_wait, lc_try_lock(mdev->act_log));
Expand All @@ -855,14 +865,39 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
prev_size = mdev->ldev->md.md_size_sect;
la_size_sect = mdev->ldev->md.la_size_sect;

/* TODO: should only be some assert here, not (re)init... */
if (rs) {
/* rs is non NULL if we should change the AL layout only */

prev_al_stripes = md->al_stripes;
prev_al_stripe_size_4k = md->al_stripe_size_4k;

md->al_stripes = rs->al_stripes;
md->al_stripe_size_4k = rs->al_stripe_size / 4;
md->al_size_4k = (u64)rs->al_stripes * rs->al_stripe_size / 4;
}

drbd_md_set_sector_offsets(mdev, mdev->ldev);

rcu_read_lock();
u_size = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
rcu_read_unlock();
size = drbd_new_dev_size(mdev, mdev->ldev, u_size, flags & DDSF_FORCED);

if (size < la_size_sect) {
if (rs && u_size == 0) {
/* Remove "rs &&" later. This check should always be active, but
right now the receiver expects the permissive behavior */
dev_warn(DEV, "Implicit shrink not allowed. "
"Use --size=%llus for explicit shrink.\n",
(unsigned long long)size);
rv = DS_ERROR_SHRINK;
}
if (u_size > size)
rv = DS_ERROR_SPACE_MD;
if (rv != DS_UNCHANGED)
goto err_out;
}

if (drbd_get_capacity(mdev->this_bdev) != size ||
drbd_bm_capacity(mdev) != size) {
int err;
Expand All @@ -886,38 +921,57 @@ enum determine_dev_size drbd_determine_dev_size(struct drbd_conf *mdev, enum dds
dev_info(DEV, "size = %s (%llu KB)\n", ppsize(ppb, size>>1),
(unsigned long long)size>>1);
}
if (rv == DS_ERROR)
goto out;
if (rv <= DS_ERROR)
goto err_out;

la_size_changed = (la_size_sect != mdev->ldev->md.la_size_sect);

md_moved = prev_first_sect != drbd_md_first_sector(mdev->ldev)
|| prev_size != mdev->ldev->md.md_size_sect;

if (la_size_changed || md_moved) {
int err;
if (la_size_changed || md_moved || rs) {
u32 prev_flags;

drbd_al_shrink(mdev); /* All extents inactive. */

prev_flags = md->flags;
md->flags &= ~MDF_PRIMARY_IND;
drbd_md_write(mdev, buffer);

dev_info(DEV, "Writing the whole bitmap, %s\n",
la_size_changed && md_moved ? "size changed and md moved" :
la_size_changed ? "size changed" : "md moved");
/* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
err = drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
"size changed", BM_LOCKED_MASK);
if (err) {
rv = DS_ERROR;
goto out;
}
drbd_md_mark_dirty(mdev);
drbd_bitmap_io(mdev, md_moved ? &drbd_bm_write_all : &drbd_bm_write,
"size changed", BM_LOCKED_MASK);
drbd_initialize_al(mdev, buffer);

md->flags = prev_flags;
drbd_md_write(mdev, buffer);

if (rs)
dev_info(DEV, "Changed AL layout to al-stripes = %d, al-stripe-size-kB = %d\n",
md->al_stripes, md->al_stripe_size_4k * 4);
}

if (size > la_size_sect)
rv = DS_GREW;
if (size < la_size_sect)
rv = DS_SHRUNK;
out:

if (0) {
err_out:
if (rs) {
md->al_stripes = prev_al_stripes;
md->al_stripe_size_4k = prev_al_stripe_size_4k;
md->al_size_4k = (u64)prev_al_stripes * prev_al_stripe_size_4k;

drbd_md_set_sector_offsets(mdev, mdev->ldev);
}
}
lc_unlock(mdev->act_log);
wake_up(&mdev->al_wait);
drbd_md_put_buffer(mdev);
drbd_resume_io(mdev);

return rv;
Expand Down Expand Up @@ -1618,8 +1672,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info)
!drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
set_bit(USE_DEGR_WFC_T, &mdev->flags);

dd = drbd_determine_dev_size(mdev, 0);
if (dd == DS_ERROR) {
dd = drbd_determine_dev_size(mdev, 0, NULL);
if (dd <= DS_ERROR) {
retcode = ERR_NOMEM_BITMAP;
goto force_diskless_dec;
} else if (dd == DS_GREW)
Expand Down Expand Up @@ -2316,6 +2370,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
struct drbd_conf *mdev;
enum drbd_ret_code retcode;
enum determine_dev_size dd;
bool change_al_layout = false;
enum dds_flags ddsf;
sector_t u_size;
int err;
Expand All @@ -2326,31 +2381,33 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
if (retcode != NO_ERROR)
goto fail;

mdev = adm_ctx.mdev;
if (!get_ldev(mdev)) {
retcode = ERR_NO_DISK;
goto fail;
}

memset(&rs, 0, sizeof(struct resize_parms));
rs.al_stripes = mdev->ldev->md.al_stripes;
rs.al_stripe_size = mdev->ldev->md.al_stripe_size_4k * 4;
if (info->attrs[DRBD_NLA_RESIZE_PARMS]) {
err = resize_parms_from_attrs(&rs, info);
if (err) {
retcode = ERR_MANDATORY_TAG;
drbd_msg_put_info(from_attrs_err_to_txt(err));
goto fail;
goto fail_ldev;
}
}

mdev = adm_ctx.mdev;
if (mdev->state.conn > C_CONNECTED) {
retcode = ERR_RESIZE_RESYNC;
goto fail;
goto fail_ldev;
}

if (mdev->state.role == R_SECONDARY &&
mdev->state.peer == R_SECONDARY) {
retcode = ERR_NO_PRIMARY;
goto fail;
}

if (!get_ldev(mdev)) {
retcode = ERR_NO_DISK;
goto fail;
goto fail_ldev;
}

if (rs.no_resync && mdev->tconn->agreed_pro_version < 93) {
Expand All @@ -2369,6 +2426,28 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
}
}

if (mdev->ldev->md.al_stripes != rs.al_stripes ||
mdev->ldev->md.al_stripe_size_4k != rs.al_stripe_size / 4) {
u32 al_size_k = rs.al_stripes * rs.al_stripe_size;

if (al_size_k > (16 * 1024 * 1024)) {
retcode = ERR_MD_LAYOUT_TOO_BIG;
goto fail_ldev;
}

if (al_size_k < MD_32kB_SECT/2) {
retcode = ERR_MD_LAYOUT_TOO_SMALL;
goto fail_ldev;
}

if (mdev->state.conn != C_CONNECTED) {
retcode = ERR_MD_LAYOUT_CONNECTED;
goto fail_ldev;
}

change_al_layout = true;
}

if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev))
mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);

Expand All @@ -2384,12 +2463,18 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info)
}

ddsf = (rs.resize_force ? DDSF_FORCED : 0) | (rs.no_resync ? DDSF_NO_RESYNC : 0);
dd = drbd_determine_dev_size(mdev, ddsf);
dd = drbd_determine_dev_size(mdev, ddsf, change_al_layout ? &rs : NULL);
drbd_md_sync(mdev);
put_ldev(mdev);
if (dd == DS_ERROR) {
retcode = ERR_NOMEM_BITMAP;
goto fail;
} else if (dd == DS_ERROR_SPACE_MD) {
retcode = ERR_MD_LAYOUT_NO_FIT;
goto fail;
} else if (dd == DS_ERROR_SHRINK) {
retcode = ERR_IMPLICIT_SHRINK;
goto fail;
}

if (mdev->state.conn == C_CONNECTED) {
Expand Down
2 changes: 1 addition & 1 deletion drivers/block/drbd/drbd_receiver.c
Original file line number Diff line number Diff line change
Expand Up @@ -3617,7 +3617,7 @@ static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)

ddsf = be16_to_cpu(p->dds_flags);
if (get_ldev(mdev)) {
dd = drbd_determine_dev_size(mdev, ddsf);
dd = drbd_determine_dev_size(mdev, ddsf, NULL);
put_ldev(mdev);
if (dd == DS_ERROR)
return -EIO;
Expand Down
Loading

0 comments on commit d752b26

Please sign in to comment.