Skip to content

Commit

Permalink
drbd: Consider the disk-timeout also for meta-data IO operations
Browse files Browse the repository at this point in the history
If the backing device is already frozen during attach, we failed
to recognize that. The current disk-timeout code works on top
of the drbd_request objects. During attach we do not allow IO
and therefore never generate a drbd_request object but block
before that in drbd_make_request().

This patch adds the timeout to all drbd_md_sync_page_io().

Before this patch we used to go from D_ATTACHING directly
to D_DISKLESS if IO failed during attach. We can no longer
do this since we have to stay in D_FAILED until all IO
ops issued to the backing device returned.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
  • Loading branch information
Philipp Reisner committed Nov 8, 2012
1 parent 25b0d6c commit 32db80f
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 31 deletions.
18 changes: 15 additions & 3 deletions drivers/block/drbd/drbd_actlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,21 @@ static bool md_io_allowed(struct drbd_conf *mdev)
return ds >= D_NEGOTIATING || ds == D_ATTACHING;
}

void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done)
void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
unsigned int *done)
{
wait_event(mdev->misc_wait, *done || !md_io_allowed(mdev));
long dt;

rcu_read_lock();
dt = rcu_dereference(bdev->disk_conf)->disk_timeout;
rcu_read_unlock();
dt = dt * HZ / 10;
if (dt == 0)
dt = MAX_SCHEDULE_TIMEOUT;

dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt);
if (dt == 0)
dev_err(DEV, "meta-data IO operation timed out\n");
}

static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
Expand Down Expand Up @@ -171,7 +183,7 @@ static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
bio_endio(bio, -EIO);
else
submit_bio(rw, bio);
wait_until_done_or_disk_failure(mdev, &mdev->md_io.done);
wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done);
if (bio_flagged(bio, BIO_UPTODATE))
err = mdev->md_io.error;

Expand Down
4 changes: 2 additions & 2 deletions drivers/block/drbd/drbd_bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1123,7 +1123,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w
* "in_flight reached zero, all done" event.
*/
if (!atomic_dec_and_test(&ctx->in_flight))
wait_until_done_or_disk_failure(mdev, &ctx->done);
wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);
else
kref_put(&ctx->kref, &bm_aio_ctx_destroy);

Expand Down Expand Up @@ -1242,7 +1242,7 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
}

bm_page_io_async(ctx, idx, WRITE_SYNC);
wait_until_done_or_disk_failure(mdev, &ctx->done);
wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done);

if (ctx->error)
drbd_chk_io_error(mdev, 1, true);
Expand Down
3 changes: 2 additions & 1 deletion drivers/block/drbd/drbd_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -1428,7 +1428,8 @@ extern void drbd_md_put_buffer(struct drbd_conf *mdev);
extern int drbd_md_sync_page_io(struct drbd_conf *mdev,
struct drbd_backing_dev *bdev, sector_t sector, int rw);
extern void drbd_ov_out_of_sync_found(struct drbd_conf *, sector_t, int);
extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, unsigned int *done);
extern void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
unsigned int *done);
extern void drbd_rs_controller_reset(struct drbd_conf *mdev);

static inline void ov_out_of_sync_print(struct drbd_conf *mdev)
Expand Down
54 changes: 29 additions & 25 deletions drivers/block/drbd/drbd_state.c
Original file line number Diff line number Diff line change
Expand Up @@ -1308,37 +1308,41 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
/* first half of local IO error, failure to attach,
* or administrative detach */
if (os.disk != D_FAILED && ns.disk == D_FAILED) {
enum drbd_io_error_p eh;
int was_io_error;
enum drbd_io_error_p eh = EP_PASS_ON;
int was_io_error = 0;
/* corresponding get_ldev was in __drbd_set_state, to serialize
* our cleanup here with the transition to D_DISKLESS,
* so it is safe to dreference ldev here. */
rcu_read_lock();
eh = rcu_dereference(mdev->ldev->disk_conf)->on_io_error;
rcu_read_unlock();
was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
* our cleanup here with the transition to D_DISKLESS.
* But is is still not save to dreference ldev here, since
* we might come from an failed Attach before ldev was set. */
if (mdev->ldev) {
rcu_read_lock();
eh = rcu_dereference(mdev->ldev->disk_conf)->on_io_error;
rcu_read_unlock();

/* Immediately allow completion of all application IO, that waits
for completion from the local disk. */
tl_abort_disk_io(mdev);
was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);

/* current state still has to be D_FAILED,
* there is only one way out: to D_DISKLESS,
* and that may only happen after our put_ldev below. */
if (mdev->state.disk != D_FAILED)
dev_err(DEV,
"ASSERT FAILED: disk is %s during detach\n",
drbd_disk_str(mdev->state.disk));
/* Immediately allow completion of all application IO, that waits
for completion from the local disk. */
tl_abort_disk_io(mdev);

if (ns.conn >= C_CONNECTED)
drbd_send_state(mdev, ns);
/* current state still has to be D_FAILED,
* there is only one way out: to D_DISKLESS,
* and that may only happen after our put_ldev below. */
if (mdev->state.disk != D_FAILED)
dev_err(DEV,
"ASSERT FAILED: disk is %s during detach\n",
drbd_disk_str(mdev->state.disk));

drbd_rs_cancel_all(mdev);
if (ns.conn >= C_CONNECTED)
drbd_send_state(mdev, ns);

/* In case we want to get something to stable storage still,
* this may be the last chance.
* Following put_ldev may transition to D_DISKLESS. */
drbd_md_sync(mdev);
drbd_rs_cancel_all(mdev);

/* In case we want to get something to stable storage still,
* this may be the last chance.
* Following put_ldev may transition to D_DISKLESS. */
drbd_md_sync(mdev);
}
put_ldev(mdev);

if (was_io_error && eh == EP_CALL_HELPER)
Expand Down

0 comments on commit 32db80f

Please sign in to comment.