Skip to content

Commit

Permalink
drbd: Consider the disk-timeout also for meta-data IO operations
Browse files Browse the repository at this point in the history
If the backing device is already frozen during attach, we failed
to recognize that. The current disk-timeout code works on top
of the drbd_request objects. During attach we do not allow IO
and therefore never generate a drbd_request object but block
before that in drbd_make_request().

This patch adds the timeout to all drbd_md_sync_page_io().

Before this patch we used to go from D_ATTACHING directly
to D_DISKLESS if IO failed during attach. We can no longer
do this since we have to stay in D_FAILED until all IO
ops issued to the backing device returned.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
  • Loading branch information
Philipp Reisner committed May 9, 2012
1 parent 4afc433 commit 7caacb6
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 39 deletions.
13 changes: 10 additions & 3 deletions drivers/block/drbd/drbd_actlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,16 @@ static bool md_io_allowed(struct drbd_conf *mdev)
return ds >= D_NEGOTIATING || ds == D_ATTACHING;
}

void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done)
void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
unsigned int *done)
{
wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev));
long dt = bdev->dc.disk_timeout * HZ / 10;
if (dt == 0)
dt = MAX_SCHEDULE_TIMEOUT;

dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt);
if (dt == 0)
dev_err(DEV, "meta-data IO operation timed out\n");
}

static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
Expand Down Expand Up @@ -130,7 +137,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
bio_endio(bio, -EIO);
else
submit_bio(rw, bio);
wait_until_done_or_disk_failure(mdev, &mdev->md_io.done);
wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done);
ok = bio_flagged(bio, BIO_UPTODATE) && mdev->md_io.error == 0;

out:
Expand Down
4 changes: 2 additions & 2 deletions drivers/block/drbd/drbd_bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1088,7 +1088,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id
* "in_flight reached zero, all done" event.
*/
if (!atomic_dec_and_test(&ctx->in_flight))
wait_until_done_or_disk_failure(mdev, &ctx->done);
wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
else
kref_put(&ctx->kref, &bm_aio_ctx_destroy);

Expand Down Expand Up @@ -1195,7 +1195,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
}

bm_page_io_async(ctx, idx, WRITE_SYNC);
wait_until_done_or_disk_failure(mdev, &ctx->done);
wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);

if (ctx->error)
drbd_chk_io_error(mdev, 1, true);
Expand Down
3 changes: 2 additions & 1 deletion drivers/block/drbd/drbd_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -1541,7 +1541,8 @@ extern void *drbd_md_get_buffer(struct drbd_conf *mdev);
extern void drbd_md_put_buffer(struct drbd_conf *mdev);
extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
struct drbd_backing_dev *bdev, sector_t sector, int rw);
extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done);
extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
unsigned int *done);
extern void drbd_ov_oos_found(struct drbd_conf*, sector_t, int);
extern void drbd_rs_controller_reset(struct drbd_conf *mdev);

Expand Down
64 changes: 31 additions & 33 deletions drivers/block/drbd/drbd_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -916,11 +916,6 @@ static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state
if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
ns.disk = D_DISKLESS;

/* if we are only D_ATTACHING yet,
* we can (and should) go directly to D_DISKLESS. */
if (ns.disk == D_FAILED && os.disk == D_ATTACHING)
ns.disk = D_DISKLESS;

/* After C_DISCONNECTING only C_STANDALONE may follow */
if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
ns.conn = os.conn;
Expand Down Expand Up @@ -1592,35 +1587,38 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
/* first half of local IO error, failure to attach,
* or administrative detach */
if (os.disk != D_FAILED && ns.disk == D_FAILED) {
enum drbd_io_error_p eh;
int was_io_error;
enum drbd_io_error_p eh = EP_PASS_ON;
int was_io_error = 0;
/* corresponding get_ldev was in __drbd_set_state, to serialize
* our cleanup here with the transition to D_DISKLESS,
* so it is safe to dreference ldev here. */
eh = mdev->ldev->dc.on_io_error;
was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);

/* Immediately allow completion of all application IO, that waits
for completion from the local disk. */
tl_abort_disk_io(mdev);

/* current state still has to be D_FAILED,
* there is only one way out: to D_DISKLESS,
* and that may only happen after our put_ldev below. */
if (mdev->state.disk != D_FAILED)
dev_err(DEV,
"ASSERT FAILED: disk is %s during detach\n",
drbd_disk_str(mdev->state.disk));

if (ns.conn >= C_CONNECTED)
drbd_send_state(mdev, ns);

drbd_rs_cancel_all(mdev);

/* In case we want to get something to stable storage still,
* this may be the last chance.
* Following put_ldev may transition to D_DISKLESS. */
drbd_md_sync(mdev);
* our cleanup here with the transition to D_DISKLESS.
* But is is still not save to dreference ldev here, since
* we might come from an failed Attach before ldev was set. */
if (mdev->ldev) {
eh = mdev->ldev->dc.on_io_error;
was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);

/* Immediately allow completion of all application IO, that waits
for completion from the local disk. */
tl_abort_disk_io(mdev);

/* current state still has to be D_FAILED,
* there is only one way out: to D_DISKLESS,
* and that may only happen after our put_ldev below. */
if (mdev->state.disk != D_FAILED)
dev_err(DEV,
"ASSERT FAILED: disk is %s during detach\n",
drbd_disk_str(mdev->state.disk));

if (ns.conn >= C_CONNECTED)
drbd_send_state(mdev, ns);

drbd_rs_cancel_all(mdev);

/* In case we want to get something to stable storage still,
* this may be the last chance.
* Following put_ldev may transition to D_DISKLESS. */
drbd_md_sync(mdev);
}
put_ldev(mdev);

if (was_io_error && eh == EP_CALL_HELPER)
Expand Down

0 comments on commit 7caacb6

Please sign in to comment.