---

yaml --- r: 345696 b: refs/heads/master c: a2a3c74 h: refs/heads/master v: v3
git-mirror · Oct 30, 2012 · a5356a8 · a5356a8
1 parent b62eb19
commit a5356a8
Show file tree

Hide file tree

Showing 6 changed files with 69 additions and 20 deletions.
diff --git a/[refs] b/[refs]
@@ -1,2 +1,2 @@
 ---
-refs/heads/master: 06f10adbdb027b225fd51584a218fa8344169514
+refs/heads/master: a2a3c74f243d5d1793f89ccdceaa6918851f7fce
diff --git a/trunk/drivers/block/drbd/drbd_int.h b/trunk/drivers/block/drbd/drbd_int.h
@@ -831,7 +831,8 @@ enum drbd_flag {
 				   once no more io in flight, start bitmap io */
 	BITMAP_IO_QUEUED,       /* Started bitmap IO */
 	GO_DISKLESS,		/* Disk is being detached, on io-error or admin request. */
-	WAS_IO_ERROR,		/* Local disk failed returned IO error */
+	WAS_IO_ERROR,		/* Local disk failed, returned IO error */
+	WAS_READ_ERROR,		/* Local disk READ failed (set additionally to the above) */
 	FORCE_DETACH,		/* Force-detach from local disk, aborting any pending local IO */
 	RESYNC_AFTER_NEG,       /* Resync after online grow after the attach&negotiate finished. */
 	NET_CONGESTED,		/* The data socket is congested */
@@ -1879,30 +1880,53 @@ static inline int drbd_request_state(struct drbd_conf *mdev,
 }
 
 enum drbd_force_detach_flags {
-	DRBD_IO_ERROR,
+	DRBD_READ_ERROR,
+	DRBD_WRITE_ERROR,
 	DRBD_META_IO_ERROR,
 	DRBD_FORCE_DETACH,
 };
 
 #define __drbd_chk_io_error(m,f) __drbd_chk_io_error_(m,f, __func__)
 static inline void __drbd_chk_io_error_(struct drbd_conf *mdev,
-		enum drbd_force_detach_flags forcedetach,
+		enum drbd_force_detach_flags df,
 		const char *where)
 {
 	switch (mdev->ldev->dc.on_io_error) {
 	case EP_PASS_ON:
-		if (forcedetach == DRBD_IO_ERROR) {
+		if (df == DRBD_READ_ERROR || df == DRBD_WRITE_ERROR) {
 			if (__ratelimit(&drbd_ratelimit_state))
 				dev_err(DEV, "Local IO failed in %s.\n", where);
 			if (mdev->state.disk > D_INCONSISTENT)
 				_drbd_set_state(_NS(mdev, disk, D_INCONSISTENT), CS_HARD, NULL);
 			break;
 		}
-		/* NOTE fall through to detach case if forcedetach set */
+		/* NOTE fall through for DRBD_META_IO_ERROR or DRBD_FORCE_DETACH */
 	case EP_DETACH:
 	case EP_CALL_HELPER:
+		/* Remember whether we saw a READ or WRITE error.
+		 *
+		 * Recovery of the affected area for WRITE failure is covered
+		 * by the activity log.
+		 * READ errors may fall outside that area though. Certain READ
+		 * errors can be "healed" by writing good data to the affected
+		 * blocks, which triggers block re-allocation in lower layers.
+		 *
+		 * If we can not write the bitmap after a READ error,
+		 * we may need to trigger a full sync (see w_go_diskless()).
+		 *
+		 * Force-detach is not really an IO error, but rather a
+		 * desperate measure to try to deal with a completely
+		 * unresponsive lower level IO stack.
+		 * Still it should be treated as a WRITE error.
+		 *
+		 * Meta IO error is always WRITE error:
+		 * we read meta data only once during attach,
+		 * which will fail in case of errors.
+		 */
 		drbd_set_flag(mdev, WAS_IO_ERROR);
-		if (forcedetach == DRBD_FORCE_DETACH)
+		if (df == DRBD_READ_ERROR)
+			drbd_set_flag(mdev, WAS_READ_ERROR);
+		if (df == DRBD_FORCE_DETACH)
 			drbd_set_flag(mdev, FORCE_DETACH);
 		if (mdev->state.disk > D_FAILED) {
 			_drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);

diff --git a/trunk/drivers/block/drbd/drbd_main.c b/trunk/drivers/block/drbd/drbd_main.c
@@ -1617,17 +1617,20 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 	/* first half of local IO error, failure to attach,
 	 * or administrative detach */
 	if (os.disk != D_FAILED && ns.disk == D_FAILED) {
-		enum drbd_io_error_p eh = EP_PASS_ON;
-		int was_io_error = 0;
 		/* corresponding get_ldev was in __drbd_set_state, to serialize
 		 * our cleanup here with the transition to D_DISKLESS.
-		 * But is is still not save to dreference ldev here, since
-		 * we might come from an failed Attach before ldev was set. */
+		 * But it is still not safe to dreference ldev here, we may end
+		 * up here from a failed attach, before ldev was even set.  */
 		if (mdev->ldev) {
-			eh = mdev->ldev->dc.on_io_error;
-			was_io_error = drbd_test_and_clear_flag(mdev, WAS_IO_ERROR);
-
-			if (was_io_error && eh == EP_CALL_HELPER)
+			enum drbd_io_error_p eh = mdev->ldev->dc.on_io_error;
+
+			/* In some setups, this handler triggers a suicide,
+			 * basically mapping IO error to node failure, to
+			 * reduce the number of different failure scenarios.
+			 *
+			 * This handler intentionally runs before we abort IO,
+			 * notify the peer, or try to update our meta data. */
+			if (eh == EP_CALL_HELPER && drbd_test_flag(mdev, WAS_IO_ERROR))
 				drbd_khelper(mdev, "local-io-error");
 
 			/* Immediately allow completion of all application IO,
@@ -1643,7 +1646,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
 			 * So aborting local requests may cause crashes,
 			 * or even worse, silent data corruption.
 			 */
-			if (drbd_test_and_clear_flag(mdev, FORCE_DETACH))
+			if (drbd_test_flag(mdev, FORCE_DETACH))
 				tl_abort_disk_io(mdev);
 
 			/* current state still has to be D_FAILED,
@@ -4220,6 +4223,26 @@ static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused
 	 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
 	 * the protected members anymore, though, so once put_ldev reaches zero
 	 * again, it will be safe to free them. */
+
+	/* Try to write changed bitmap pages, read errors may have just
+	 * set some bits outside the area covered by the activity log.
+	 *
+	 * If we have an IO error during the bitmap writeout,
+	 * we will want a full sync next time, just in case.
+	 * (Do we want a specific meta data flag for this?)
+	 *
+	 * If that does not make it to stable storage either,
+	 * we cannot do anything about that anymore.  */
+	if (mdev->bitmap) {
+		if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write,
+					"detach", BM_LOCKED_MASK)) {
+			if (drbd_test_flag(mdev, WAS_READ_ERROR)) {
+				drbd_md_set_flag(mdev, MDF_FULL_SYNC);
+				drbd_md_sync(mdev);
+			}
+		}
+	}
+
 	drbd_force_state(mdev, NS(disk, D_DISKLESS));
 	return 1;
 }

diff --git a/trunk/drivers/block/drbd/drbd_nl.c b/trunk/drivers/block/drbd/drbd_nl.c
@@ -959,6 +959,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
 	/* make sure there is no leftover from previous force-detach attempts */
 	drbd_clear_flag(mdev, FORCE_DETACH);
+	drbd_clear_flag(mdev, WAS_IO_ERROR);
+	drbd_clear_flag(mdev, WAS_READ_ERROR);
 
 	/* and no leftover from previously aborted resync or verify, either */
 	mdev->rs_total = 0;

diff --git a/trunk/drivers/block/drbd/drbd_req.c b/trunk/drivers/block/drbd/drbd_req.c
@@ -455,7 +455,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 		req->rq_state |= RQ_LOCAL_COMPLETED;
 		req->rq_state &= ~RQ_LOCAL_PENDING;
 
-		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
+		__drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
 		_req_may_be_done_not_susp(req, m);
 		break;
 
@@ -477,7 +477,7 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what,
 			break;
 		}
 
-		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
+		__drbd_chk_io_error(mdev, DRBD_READ_ERROR);
 
 	goto_queue_for_net_read:
 

diff --git a/trunk/drivers/block/drbd/drbd_worker.c b/trunk/drivers/block/drbd/drbd_worker.c
@@ -111,7 +111,7 @@ void drbd_endio_read_sec_final(struct drbd_epoch_entry *e) __releases(local)
 	if (list_empty(&mdev->read_ee))
 		wake_up(&mdev->ee_wait);
 	if (test_bit(__EE_WAS_ERROR, &e->flags))
-		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
+		__drbd_chk_io_error(mdev, DRBD_READ_ERROR);
 	spin_unlock_irqrestore(&mdev->req_lock, flags);
 
 	drbd_queue_work(&mdev->data.work, &e->w);
@@ -154,7 +154,7 @@ static void drbd_endio_write_sec_final(struct drbd_epoch_entry *e) __releases(lo
 		: list_empty(&mdev->active_ee);
 
 	if (test_bit(__EE_WAS_ERROR, &e->flags))
-		__drbd_chk_io_error(mdev, DRBD_IO_ERROR);
+		__drbd_chk_io_error(mdev, DRBD_WRITE_ERROR);
 	spin_unlock_irqrestore(&mdev->req_lock, flags);
 
 	if (is_syncer_req)