Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 345696
b: refs/heads/master
c: a2a3c74
h: refs/heads/master
v: v3
  • Loading branch information
Lars Ellenberg authored and Jens Axboe committed Oct 30, 2012
1 parent b62eb19 commit a5356a8
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 20 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 06f10adbdb027b225fd51584a218fa8344169514
refs/heads/master: a2a3c74f243d5d1793f89ccdceaa6918851f7fce
36 changes: 30 additions & 6 deletions trunk/drivers/block/drbd/drbd_int.h
Original file line number Diff line number Diff line change
Expand Up @@ -831,7 +831,8 @@ enum drbd_flag {
once no more io in flight, start bitmap io */
BITMAP_IO_QUEUED, /* Started bitmap IO */
GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */
WAS_IO_ERROR, /* Local disk failed returned IO error */
WAS_IO_ERROR, /* Local disk failed, returned IO error */
WAS_READ_ERROR, /* Local disk READ failed (set additionally to the above) */
FORCE_DETACH, /* Force-detach from local disk, aborting any pending local IO */
RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
NET_CONGESTED, /* The data socket is congested */
Expand Down Expand Up @@ -1879,30 +1880,53 @@ static inline int drbd_request_state(struct drbd_conf *mdev,
}

enum drbd_force_detach_flags {
DRBD_IO_ERROR,
DRBD_READ_ERROR,
DRBD_WRITE_ERROR,
DRBD_META_IO_ERROR,
DRBD_FORCE_DETACH,
};

#define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
enum drbd_force_detach_flags forcedetach,
enum drbd_force_detach_flags df,
const char *where)
{
switch (mdev->ldev->dc.on_io_error) {
case EP_PASS_ON:
if (forcedetach == DRBD_IO_ERROR) {
if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) {
if (__ratelimit(&drbd_ratelimit_state))
dev_err(DEV, "Local IO failed in %s.\n", where);
if (mdev->state.disk > D_INCONSISTENT)
_drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
break;
}
/* NOTE fall through to detach case if forcedetach set */
/* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */
case EP_DETACH:
case EP_CALL_HELPER:
/* Remember whether we saw a READ or WRITE error.
*
* Recovery of the affected area for WRITE failure is covered
* by the activity log.
* READ errors may fall outside that area though. Certain READ
* errors can be "healed" by writing good data to the affected
* blocks, which triggers block re-allocation in lower layers.
*
* If we can not write the bitmap after a READ error,
* we may need to trigger a full sync (see w_go_diskless()).
*
* Force-detach is not really an IO error, but rather a
* desperate measure to try to deal with a completely
* unresponsive lower level IO stack.
* Still it should be treated as a WRITE error.
*
* Meta IO error is always WRITE error:
* we read meta data only once during attach,
* which will fail in case of errors.
*/
drbd_set_flag(mdev, WAS_IO_ERROR);
if (forcedetach == DRBD_FORCE_DETACH)
if (df == DRBD_READ_ERROR)
drbd_set_flag(mdev, WAS_READ_ERROR);
if (df == DRBD_FORCE_DETACH)
drbd_set_flag(mdev, FORCE_DETACH);
if (mdev->state.disk > D_FAILED) {
_drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
Expand Down
41 changes: 32 additions & 9 deletions trunk/drivers/block/drbd/drbd_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1617,17 +1617,20 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
/* first half of local IO error, failure to attach,
* or administrative detach */
if (os.disk != D_FAILED && ns.disk == D_FAILED) {
enum drbd_io_error_p eh = EP_PASS_ON;
int was_io_error = 0;
/* corresponding get_ldev was in __drbd_set_state, to serialize
* our cleanup here with the transition to D_DISKLESS.
* But is is still not save to dreference ldev here, since
* we might come from an failed Attach before ldev was set. */
* But it is still not safe to dreference ldev here, we may end
* up here from a failed attach, before ldev was even set. */
if (mdev->ldev) {
eh = mdev->ldev->dc.on_io_error;
was_io_error = drbd_test_and_clear_flag(mdev, WAS_IO_ERROR);

if (was_io_error && eh == EP_CALL_HELPER)
enum drbd_io_error_p eh = mdev->ldev->dc.on_io_error;

/* In some setups, this handler triggers a suicide,
* basically mapping IO error to node failure, to
* reduce the number of different failure scenarios.
*
* This handler intentionally runs before we abort IO,
* notify the peer, or try to update our meta data. */
if (eh == EP_CALL_HELPER && drbd_test_flag(mdev, WAS_IO_ERROR))
drbd_khelper(mdev, "local-io-error");

/* Immediately allow completion of all application IO,
Expand All @@ -1643,7 +1646,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
* So aborting local requests may cause crashes,
* or even worse, silent data corruption.
*/
if (drbd_test_and_clear_flag(mdev, FORCE_DETACH))
if (drbd_test_flag(mdev, FORCE_DETACH))
tl_abort_disk_io(mdev);

/* current state still has to be D_FAILED,
Expand Down Expand Up @@ -4220,6 +4223,26 @@ static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused
* inc/dec it frequently. Once we are D_DISKLESS, no one will touch
* the protected members anymore, though, so once put_ldev reaches zero
* again, it will be safe to free them. */

/* Try to write changed bitmap pages, read errors may have just
* set some bits outside the area covered by the activity log.
*
* If we have an IO error during the bitmap writeout,
* we will want a full sync next time, just in case.
* (Do we want a specific meta data flag for this?)
*
* If that does not make it to stable storage either,
* we cannot do anything about that anymore. */
if (mdev->bitmap) {
if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write,
"detach", BM_LOCKED_MASK)) {
if (drbd_test_flag(mdev, WAS_READ_ERROR)) {
drbd_md_set_flag(mdev, MDF_FULL_SYNC);
drbd_md_sync(mdev);
}
}
}

drbd_force_state(mdev, NS(disk, D_DISKLESS));
return 1;
}
Expand Down
2 changes: 2 additions & 0 deletions trunk/drivers/block/drbd/drbd_nl.c
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp

/* make sure there is no leftover from previous force-detach attempts */
drbd_clear_flag(mdev, FORCE_DETACH);
drbd_clear_flag(mdev, WAS_IO_ERROR);
drbd_clear_flag(mdev, WAS_READ_ERROR);

/* and no leftover from previously aborted resync or verify, either */
mdev->rs_total = 0;
Expand Down
4 changes: 2 additions & 2 deletions trunk/drivers/block/drbd/drbd_req.c
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
req->rq_state |= RQ_LOCAL_COMPLETED;
req->rq_state &= ~RQ_LOCAL_PENDING;

__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
__drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
_req_may_be_done_not_susp(req, m);
break;

Expand All @@ -477,7 +477,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
break;
}

__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
__drbd_chk_io_error(mdev, DRBD_READ_ERROR);

goto_queue_for_net_read:

Expand Down
4 changes: 2 additions & 2 deletions trunk/drivers/block/drbd/drbd_worker.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
if (list_empty(&mdev->read_ee))
wake_up(&mdev->ee_wait);
if (test_bit(__EE_WAS_ERROR, &e->flags))
__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
__drbd_chk_io_error(mdev, DRBD_READ_ERROR);
spin_unlock_irqrestore(&mdev->req_lock, flags);

drbd_queue_work(&mdev->data.work, &e->w);
Expand Down Expand Up @@ -154,7 +154,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
: list_empty(&mdev->active_ee);

if (test_bit(__EE_WAS_ERROR, &e->flags))
__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
__drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
spin_unlock_irqrestore(&mdev->req_lock, flags);

if (is_syncer_req)
Expand Down

0 comments on commit a5356a8

Please sign in to comment.