Merge branch 'xfs-4.7-error-cfg' into for-next

mariux64 · May 20, 2016 · 544ad71 · 544ad71
2 parents 2a4ad58 + e6b3bb7
commit 544ad71
Show file tree

Hide file tree

Showing 8 changed files with 450 additions and 54 deletions.
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
@@ -1100,22 +1100,18 @@ xfs_bwrite(
 	return error;
 }
 
-STATIC void
+static void
 xfs_buf_bio_end_io(
 	struct bio		*bio)
 {
-	xfs_buf_t		*bp = (xfs_buf_t *)bio->bi_private;
+	struct xfs_buf		*bp = (struct xfs_buf *)bio->bi_private;
 
 	/*
 	 * don't overwrite existing errors - otherwise we can lose errors on
 	 * buffers that require multiple bios to complete.
 	 */
-	if (bio->bi_error) {
-		spin_lock(&bp->b_lock);
-		if (!bp->b_io_error)
-			bp->b_io_error = bio->bi_error;
-		spin_unlock(&bp->b_lock);
-	}
+	if (bio->bi_error)
+		cmpxchg(&bp->b_io_error, 0, bio->bi_error);
 
 	if (!bp->b_error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
 		invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));

diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
@@ -183,6 +183,26 @@ typedef struct xfs_buf {
 	unsigned int		b_page_count;	/* size of page array */
 	unsigned int		b_offset;	/* page offset in first page */
 	int			b_error;	/* error code on I/O */
+
+	/*
+	 * async write failure retry count. Initialised to zero on the first
+	 * failure, then when it exceeds the maximum configured without a
+	 * success the write is considered to be failed permanently and the
+	 * iodone handler will take appropriate action.
+	 *
+	 * For retry timeouts, we record the jiffie of the first failure. This
+	 * means that we can change the retry timeout for buffers already under
+	 * I/O and thus avoid getting stuck in a retry loop with a long timeout.
+	 *
+	 * last_error is used to ensure that we are getting repeated errors, not
+	 * different errors. e.g. a block device might change ENOSPC to EIO when
+	 * a failure timeout occurs, so we want to re-initialise the error
+	 * retry behaviour appropriately when that happens.
+	 */
+	int			b_retries;
+	unsigned long		b_first_retry_time; /* in jiffies */
+	int			b_last_error;
+
 	const struct xfs_buf_ops	*b_ops;
 
 #ifdef XFS_BUF_LOCK_TRACKING

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
@@ -1042,35 +1042,22 @@ xfs_buf_do_callbacks(
 	}
 }
 
-/*
- * This is the iodone() function for buffers which have had callbacks
- * attached to them by xfs_buf_attach_iodone().  It should remove each
- * log item from the buffer's list and call the callback of each in turn.
- * When done, the buffer's fsprivate field is set to NULL and the buffer
- * is unlocked with a call to iodone().
- */
-void
-xfs_buf_iodone_callbacks(
+static bool
+xfs_buf_iodone_callback_error(
 	struct xfs_buf		*bp)
 {
 	struct xfs_log_item	*lip = bp->b_fspriv;
 	struct xfs_mount	*mp = lip->li_mountp;
 	static ulong		lasttime;
 	static xfs_buftarg_t	*lasttarg;
-
-	if (likely(!bp->b_error))
-		goto do_callbacks;
+	struct xfs_error_cfg	*cfg;
 
 	/*
 	 * If we've already decided to shutdown the filesystem because of
 	 * I/O errors, there's no point in giving this a retry.
 	 */
-	if (XFS_FORCED_SHUTDOWN(mp)) {
-		xfs_buf_stale(bp);
-		bp->b_flags |= XBF_DONE;
-		trace_xfs_buf_item_iodone(bp, _RET_IP_);
-		goto do_callbacks;
-	}
+	if (XFS_FORCED_SHUTDOWN(mp))
+		goto out_stale;
 
 	if (bp->b_target != lasttarg ||
 	    time_after(jiffies, (lasttime + 5*HZ))) {
@@ -1079,45 +1066,93 @@ xfs_buf_iodone_callbacks(
 	}
 	lasttarg = bp->b_target;
 
+	/* synchronous writes will have callers process the error */
+	if (!(bp->b_flags & XBF_ASYNC))
+		goto out_stale;
+
+	trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
+	ASSERT(bp->b_iodone != NULL);
+
 	/*
 	 * If the write was asynchronous then no one will be looking for the
-	 * error.  Clear the error state and write the buffer out again.
-	 *
-	 * XXX: This helps against transient write errors, but we need to find
-	 * a way to shut the filesystem down if the writes keep failing.
-	 *
-	 * In practice we'll shut the filesystem down soon as non-transient
-	 * errors tend to affect the whole device and a failing log write
-	 * will make us give up.  But we really ought to do better here.
+	 * error.  If this is the first failure of this type, clear the error
+	 * state and write the buffer out again. This means we always retry an
+	 * async write failure at least once, but we also need to set the buffer
+	 * up to behave correctly now for repeated failures.
 	 */
-	if (bp->b_flags & XBF_ASYNC) {
-		ASSERT(bp->b_iodone != NULL);
+	if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL)) ||
+	     bp->b_last_error != bp->b_error) {
+		bp->b_flags |= (XBF_WRITE | XBF_ASYNC |
+			        XBF_DONE | XBF_WRITE_FAIL);
+		bp->b_last_error = bp->b_error;
+		bp->b_retries = 0;
+		bp->b_first_retry_time = jiffies;
+
+		xfs_buf_ioerror(bp, 0);
+		xfs_buf_submit(bp);
+		return true;
+	}
 
-		trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
+	/*
+	 * Repeated failure on an async write. Take action according to the
+	 * error configuration we have been set up to use.
+	 */
+	cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error);
 
-		xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
+	if (cfg->max_retries != XFS_ERR_RETRY_FOREVER &&
+	    ++bp->b_retries > cfg->max_retries)
+			goto permanent_error;
+	if (cfg->retry_timeout &&
+	    time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time))
+			goto permanent_error;
 
-		if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
-			bp->b_flags |= XBF_WRITE | XBF_ASYNC |
-				       XBF_DONE | XBF_WRITE_FAIL;
-			xfs_buf_submit(bp);
-		} else {
-			xfs_buf_relse(bp);
-		}
+	/* At unmount we may treat errors differently */
+	if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount)
+		goto permanent_error;
 
-		return;
-	}
+	/* still a transient error, higher layers will retry */
+	xfs_buf_ioerror(bp, 0);
+	xfs_buf_relse(bp);
+	return true;
 
 	/*
-	 * If the write of the buffer was synchronous, we want to make
-	 * sure to return the error to the caller of xfs_bwrite().
+	 * Permanent error - we need to trigger a shutdown if we haven't already
+	 * to indicate that inconsistency will result from this action.
 	 */
+permanent_error:
+	xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+out_stale:
 	xfs_buf_stale(bp);
 	bp->b_flags |= XBF_DONE;
-
 	trace_xfs_buf_error_relse(bp, _RET_IP_);
+	return false;
+}
+
+/*
+ * This is the iodone() function for buffers which have had callbacks attached
+ * to them by xfs_buf_attach_iodone(). We need to iterate the items on the
+ * callback list, mark the buffer as having no more callbacks and then push the
+ * buffer through IO completion processing.
+ */
+void
+xfs_buf_iodone_callbacks(
+	struct xfs_buf		*bp)
+{
+	/*
+	 * If there is an error, process it. Some errors require us
+	 * to run callbacks after failure processing is done so we
+	 * detect that and take appropriate action.
+	 */
+	if (bp->b_error && xfs_buf_iodone_callback_error(bp))
+		return;
+
+	/*
+	 * Successful IO or permanent error. Either way, we can clear the
+	 * retry state here in preparation for the next error that may occur.
+	 */
+	bp->b_last_error = 0;
+	bp->b_retries = 0;
 
-do_callbacks:
 	xfs_buf_do_callbacks(bp);
 	bp->b_fspriv = NULL;
 	bp->b_iodone = NULL;

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
@@ -680,6 +680,9 @@ xfs_mountfs(
 
 	xfs_set_maxicount(mp);
 
+	/* enable fail_at_unmount as default */
+	mp->m_fail_unmount = 1;
+
 	error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
 	if (error)
 		goto out;
@@ -689,10 +692,15 @@ xfs_mountfs(
 	if (error)
 		goto out_remove_sysfs;
 
-	error = xfs_uuid_mount(mp);
+	error = xfs_error_sysfs_init(mp);
 	if (error)
 		goto out_del_stats;
 
+
+	error = xfs_uuid_mount(mp);
+	if (error)
+		goto out_remove_error_sysfs;
+
 	/*
 	 * Set the minimum read and write sizes
 	 */
@@ -956,6 +964,7 @@ xfs_mountfs(
 	cancel_delayed_work_sync(&mp->m_reclaim_work);
 	xfs_reclaim_inodes(mp, SYNC_WAIT);
  out_log_dealloc:
+	mp->m_flags |= XFS_MOUNT_UNMOUNTING;
 	xfs_log_mount_cancel(mp);
  out_fail_wait:
 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
@@ -967,6 +976,8 @@ xfs_mountfs(
 	xfs_da_unmount(mp);
  out_remove_uuid:
 	xfs_uuid_unmount(mp);
+ out_remove_error_sysfs:
+	xfs_error_sysfs_del(mp);
  out_del_stats:
 	xfs_sysfs_del(&mp->m_stats.xs_kobj);
  out_remove_sysfs:
@@ -1004,6 +1015,14 @@ xfs_unmountfs(
 	 */
 	xfs_log_force(mp, XFS_LOG_SYNC);
 
+	/*
+	 * We now need to tell the world we are unmounting. This will allow
+	 * us to detect that the filesystem is going away and we should error
+	 * out anything that we have been retrying in the background. This will
+	 * prevent neverending retries in AIL pushing from hanging the unmount.
+	 */
+	mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+
 	/*
 	 * Flush all pending changes from the AIL.
 	 */
@@ -1055,6 +1074,7 @@ xfs_unmountfs(
 #endif
 	xfs_free_perag(mp);
 
+	xfs_error_sysfs_del(mp);
 	xfs_sysfs_del(&mp->m_stats.xs_kobj);
 	xfs_sysfs_del(&mp->m_kobj);
 }

diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
@@ -37,6 +37,32 @@ enum {
 	XFS_LOWSP_MAX,
 };
 
+/*
+ * Error Configuration
+ *
+ * Error classes define the subsystem the configuration belongs to.
+ * Error numbers define the errors that are configurable.
+ */
+enum {
+	XFS_ERR_METADATA,
+	XFS_ERR_CLASS_MAX,
+};
+enum {
+	XFS_ERR_DEFAULT,
+	XFS_ERR_EIO,
+	XFS_ERR_ENOSPC,
+	XFS_ERR_ENODEV,
+	XFS_ERR_ERRNO_MAX,
+};
+
+#define XFS_ERR_RETRY_FOREVER	-1
+
+struct xfs_error_cfg {
+	struct xfs_kobj	kobj;
+	int		max_retries;
+	unsigned long	retry_timeout;	/* in jiffies, 0 = no timeout */
+};
+
 typedef struct xfs_mount {
 	struct super_block	*m_super;
 	xfs_tid_t		m_tid;		/* next unused tid for fs */
@@ -127,6 +153,9 @@ typedef struct xfs_mount {
 	int64_t			m_low_space[XFS_LOWSP_MAX];
 						/* low free space thresholds */
 	struct xfs_kobj		m_kobj;
+	struct xfs_kobj		m_error_kobj;
+	struct xfs_kobj		m_error_meta_kobj;
+	struct xfs_error_cfg	m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX];
 	struct xstats		m_stats;	/* per-fs stats */
 
 	struct workqueue_struct *m_buf_workqueue;
@@ -148,6 +177,7 @@ typedef struct xfs_mount {
 	 */
 	__uint32_t		m_generation;
 
+	bool			m_fail_unmount;
 #ifdef DEBUG
 	/*
 	 * DEBUG mode instrumentation to test and/or trigger delayed allocation
@@ -166,6 +196,7 @@ typedef struct xfs_mount {
 #define XFS_MOUNT_WSYNC		(1ULL << 0)	/* for nfs - all metadata ops
 						   must be synchronous except
 						   for space allocations */
+#define XFS_MOUNT_UNMOUNTING	(1ULL << 1)	/* filesystem is unmounting */
 #define XFS_MOUNT_WAS_CLEAN	(1ULL << 3)
 #define XFS_MOUNT_FS_SHUTDOWN	(1ULL << 4)	/* atomic stop of all filesystem
 						   operations, typically for
@@ -364,4 +395,7 @@ extern void	xfs_set_low_space_thresholds(struct xfs_mount *);
 int	xfs_zero_extent(struct xfs_inode *ip, xfs_fsblock_t start_fsb,
 			xfs_off_t count_fsb);
 
+struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp,
+		int error_class, int error);
+
 #endif	/* __XFS_MOUNT_H__ */