Skip to content

Commit

Permalink
[XFS] write barrier support Issue all log sync operations as ordered
Browse files Browse the repository at this point in the history
writes.  In addition flush the disk cache on fsync if the sync cached
operation didn't sync the log to disk (this requires some additional
bookeping in the transaction and log code). If the device doesn't claim to
support barriers, the filesystem has an extern log volume or the trial
superblock write with barriers enabled failed we disable barriers and
print a warning.  We should probably fail the mount completely, but that
could lead to nasty boot failures for the root filesystem.  Not enabled by
default yet, needs more destructive testing first.

SGI-PV: 912426
SGI-Modid: xfs-linux:xfs-kern:198723a

Signed-off-by: Christoph Hellwig <hch@sgi.com>
Signed-off-by: Nathan Scott <nathans@sgi.com>
  • Loading branch information
Christoph Hellwig authored and Nathan Scott committed Nov 1, 2005
1 parent 739cafd commit f538d4d
Show file tree
Hide file tree
Showing 12 changed files with 190 additions and 59 deletions.
5 changes: 5 additions & 0 deletions fs/xfs/linux-2.6/xfs_buf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1295,6 +1295,11 @@ _pagebuf_ioapply(
rw = (pb->pb_flags & PBF_READ) ? READ : WRITE;
}

if (pb->pb_flags & PBF_ORDERED) {
ASSERT(!(pb->pb_flags & PBF_READ));
rw = WRITE_BARRIER;
}

/* Special code path for reading a sub page size pagebuf in --
* we populate up the whole page, and hence the other metadata
* in the same page. This optimization is only valid when the
Expand Down
8 changes: 4 additions & 4 deletions fs/xfs/linux-2.6/xfs_buf.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ typedef enum page_buf_flags_e { /* pb_flags values */
PBF_DELWRI = (1 << 6), /* buffer has dirty pages */
PBF_STALE = (1 << 7), /* buffer has been staled, do not find it */
PBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
PBF_FLUSH = (1 << 11), /* flush disk write cache */
PBF_ORDERED = (1 << 11), /* use ordered writes */
PBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */

/* flags used only as arguments to access routines */
Expand Down Expand Up @@ -383,9 +383,9 @@ extern void pagebuf_trace(
#define XFS_BUF_UNASYNC(x) ((x)->pb_flags &= ~PBF_ASYNC)
#define XFS_BUF_ISASYNC(x) ((x)->pb_flags & PBF_ASYNC)

#define XFS_BUF_FLUSH(x) ((x)->pb_flags |= PBF_FLUSH)
#define XFS_BUF_UNFLUSH(x) ((x)->pb_flags &= ~PBF_FLUSH)
#define XFS_BUF_ISFLUSH(x) ((x)->pb_flags & PBF_FLUSH)
#define XFS_BUF_ORDERED(x) ((x)->pb_flags |= PBF_ORDERED)
#define XFS_BUF_UNORDERED(x) ((x)->pb_flags &= ~PBF_ORDERED)
#define XFS_BUF_ISORDERED(x) ((x)->pb_flags & PBF_ORDERED)

#define XFS_BUF_SHUT(x) printk("XFS_BUF_SHUT not implemented yet\n")
#define XFS_BUF_UNSHUT(x) printk("XFS_BUF_UNSHUT not implemented yet\n")
Expand Down
66 changes: 66 additions & 0 deletions fs/xfs/linux-2.6/xfs_super.c
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,72 @@ xfs_blkdev_put(
close_bdev_excl(bdev);
}

/*
* Try to write out the superblock using barriers.
*/
STATIC int
xfs_barrier_test(
xfs_mount_t *mp)
{
xfs_buf_t *sbp = xfs_getsb(mp, 0);
int error;

XFS_BUF_UNDONE(sbp);
XFS_BUF_UNREAD(sbp);
XFS_BUF_UNDELAYWRITE(sbp);
XFS_BUF_WRITE(sbp);
XFS_BUF_UNASYNC(sbp);
XFS_BUF_ORDERED(sbp);

xfsbdstrat(mp, sbp);
error = xfs_iowait(sbp);

/*
* Clear all the flags we set and possible error state in the
* buffer. We only did the write to try out whether barriers
* worked and shouldn't leave any traces in the superblock
* buffer.
*/
XFS_BUF_DONE(sbp);
XFS_BUF_ERROR(sbp, 0);
XFS_BUF_UNORDERED(sbp);

xfs_buf_relse(sbp);
return error;
}

void
xfs_mountfs_check_barriers(xfs_mount_t *mp)
{
int error;

if (mp->m_logdev_targp != mp->m_ddev_targp) {
xfs_fs_cmn_err(CE_NOTE, mp,
"Disabling barriers, not supported with external log device");
mp->m_flags &= ~XFS_MOUNT_BARRIER;
}

if (mp->m_ddev_targp->pbr_bdev->bd_disk->queue->ordered ==
QUEUE_ORDERED_NONE) {
xfs_fs_cmn_err(CE_NOTE, mp,
"Disabling barriers, not supported by the underlying device");
mp->m_flags &= ~XFS_MOUNT_BARRIER;
}

error = xfs_barrier_test(mp);
if (error) {
xfs_fs_cmn_err(CE_NOTE, mp,
"Disabling barriers, trial barrier write failed");
mp->m_flags &= ~XFS_MOUNT_BARRIER;
}
}

void
xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg)
{
blkdev_issue_flush(buftarg->pbr_bdev, NULL);
}

STATIC struct inode *
linvfs_alloc_inode(
Expand Down
1 change: 1 addition & 0 deletions fs/xfs/linux-2.6/xfs_super.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ extern void xfs_flush_device(struct xfs_inode *);
extern int xfs_blkdev_get(struct xfs_mount *, const char *,
struct block_device **);
extern void xfs_blkdev_put(struct block_device *);
extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);

extern struct export_operations linvfs_export_ops;

Expand Down
2 changes: 1 addition & 1 deletion fs/xfs/xfs_clnt.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ struct xfs_mount_args {
* enforcement */
#define XFSMNT_NOUUID 0x01000000 /* Ignore fs uuid */
#define XFSMNT_DMAPI 0x02000000 /* enable dmapi/xdsm */
#define XFSMNT_NOLOGFLUSH 0x04000000 /* Don't flush for log blocks */
#define XFSMNT_BARRIER 0x04000000 /* use write barriers */
#define XFSMNT_IDELETE 0x08000000 /* inode cluster delete */
#define XFSMNT_SWALLOC 0x10000000 /* turn on stripe width
* allocation */
Expand Down
65 changes: 37 additions & 28 deletions fs/xfs/xfs_log.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,11 @@ STATIC int xlog_state_release_iclog(xlog_t *log,
STATIC void xlog_state_switch_iclogs(xlog_t *log,
xlog_in_core_t *iclog,
int eventual_size);
STATIC int xlog_state_sync(xlog_t *log, xfs_lsn_t lsn, uint flags);
STATIC int xlog_state_sync_all(xlog_t *log, uint flags);
STATIC int xlog_state_sync(xlog_t *log,
xfs_lsn_t lsn,
uint flags,
int *log_flushed);
STATIC int xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed);
STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);

/* local functions to manipulate grant head */
Expand Down Expand Up @@ -312,12 +315,17 @@ xfs_log_done(xfs_mount_t *mp,
* semaphore.
*/
int
xfs_log_force(xfs_mount_t *mp,
xfs_lsn_t lsn,
uint flags)
_xfs_log_force(
xfs_mount_t *mp,
xfs_lsn_t lsn,
uint flags,
int *log_flushed)
{
int rval;
xlog_t *log = mp->m_log;
xlog_t *log = mp->m_log;
int dummy;

if (!log_flushed)
log_flushed = &dummy;

#if defined(DEBUG) || defined(XLOG_NOLOG)
if (!xlog_debug && xlog_target == log->l_targ)
Expand All @@ -328,17 +336,12 @@ xfs_log_force(xfs_mount_t *mp,

XFS_STATS_INC(xs_log_force);

if ((log->l_flags & XLOG_IO_ERROR) == 0) {
if (lsn == 0)
rval = xlog_state_sync_all(log, flags);
else
rval = xlog_state_sync(log, lsn, flags);
} else {
rval = XFS_ERROR(EIO);
}

return rval;

if (log->l_flags & XLOG_IO_ERROR)
return XFS_ERROR(EIO);
if (lsn == 0)
return xlog_state_sync_all(log, flags, log_flushed);
else
return xlog_state_sync(log, lsn, flags, log_flushed);
} /* xfs_log_force */

/*
Expand Down Expand Up @@ -1467,14 +1470,13 @@ xlog_sync(xlog_t *log,
XFS_BUF_BUSY(bp);
XFS_BUF_ASYNC(bp);
/*
* Do a disk write cache flush for the log block.
* This is a bit of a sledgehammer, it would be better
* to use a tag barrier here that just prevents reordering.
* Do an ordered write for the log block.
*
* It may not be needed to flush the first split block in the log wrap
* case, but do it anyways to be safe -AK
*/
if (!(log->l_mp->m_flags & XFS_MOUNT_NOLOGFLUSH))
XFS_BUF_FLUSH(bp);
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
XFS_BUF_ORDERED(bp);

ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
Expand Down Expand Up @@ -1505,8 +1507,8 @@ xlog_sync(xlog_t *log,
XFS_BUF_SET_FSPRIVATE(bp, iclog);
XFS_BUF_BUSY(bp);
XFS_BUF_ASYNC(bp);
if (!(log->l_mp->m_flags & XFS_MOUNT_NOLOGFLUSH))
XFS_BUF_FLUSH(bp);
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
XFS_BUF_ORDERED(bp);
dptr = XFS_BUF_PTR(bp);
/*
* Bump the cycle numbers at the start of each block
Expand Down Expand Up @@ -2951,7 +2953,7 @@ xlog_state_switch_iclogs(xlog_t *log,
* not in the active nor dirty state.
*/
STATIC int
xlog_state_sync_all(xlog_t *log, uint flags)
xlog_state_sync_all(xlog_t *log, uint flags, int *log_flushed)
{
xlog_in_core_t *iclog;
xfs_lsn_t lsn;
Expand Down Expand Up @@ -3000,6 +3002,7 @@ xlog_state_sync_all(xlog_t *log, uint flags)

if (xlog_state_release_iclog(log, iclog))
return XFS_ERROR(EIO);
*log_flushed = 1;
s = LOG_LOCK(log);
if (INT_GET(iclog->ic_header.h_lsn, ARCH_CONVERT) == lsn &&
iclog->ic_state != XLOG_STATE_DIRTY)
Expand Down Expand Up @@ -3043,6 +3046,7 @@ xlog_state_sync_all(xlog_t *log, uint flags)
*/
if (iclog->ic_state & XLOG_STATE_IOERROR)
return XFS_ERROR(EIO);
*log_flushed = 1;

} else {

Expand All @@ -3068,7 +3072,8 @@ xlog_state_sync_all(xlog_t *log, uint flags)
int
xlog_state_sync(xlog_t *log,
xfs_lsn_t lsn,
uint flags)
uint flags,
int *log_flushed)
{
xlog_in_core_t *iclog;
int already_slept = 0;
Expand Down Expand Up @@ -3120,6 +3125,7 @@ xlog_state_sync(xlog_t *log,
XFS_STATS_INC(xs_log_force_sleep);
sv_wait(&iclog->ic_prev->ic_writesema, PSWP,
&log->l_icloglock, s);
*log_flushed = 1;
already_slept = 1;
goto try_again;
} else {
Expand All @@ -3128,6 +3134,7 @@ xlog_state_sync(xlog_t *log,
LOG_UNLOCK(log, s);
if (xlog_state_release_iclog(log, iclog))
return XFS_ERROR(EIO);
*log_flushed = 1;
s = LOG_LOCK(log);
}
}
Expand All @@ -3152,6 +3159,7 @@ xlog_state_sync(xlog_t *log,
*/
if (iclog->ic_state & XLOG_STATE_IOERROR)
return XFS_ERROR(EIO);
*log_flushed = 1;
} else { /* just return */
LOG_UNLOCK(log, s);
}
Expand Down Expand Up @@ -3606,6 +3614,7 @@ xfs_log_force_umount(
xlog_ticket_t *tic;
xlog_t *log;
int retval;
int dummy;
SPLDECL(s);
SPLDECL(s2);

Expand Down Expand Up @@ -3684,7 +3693,7 @@ xfs_log_force_umount(
* Force the incore logs to disk before shutting the
* log down completely.
*/
xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC);
xlog_state_sync_all(log, XFS_LOG_FORCE|XFS_LOG_SYNC, &dummy);
s2 = LOG_LOCK(log);
retval = xlog_state_ioerror(log);
LOG_UNLOCK(log, s2);
Expand Down
9 changes: 6 additions & 3 deletions fs/xfs/xfs_log.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,12 @@ xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
xfs_log_ticket_t ticket,
void **iclog,
uint flags);
int xfs_log_force(struct xfs_mount *mp,
xfs_lsn_t lsn,
uint flags);
int _xfs_log_force(struct xfs_mount *mp,
xfs_lsn_t lsn,
uint flags,
int *log_forced);
#define xfs_log_force(mp, lsn, flags) \
_xfs_log_force(mp, lsn, flags, NULL);
int xfs_log_mount(struct xfs_mount *mp,
struct xfs_buftarg *log_target,
xfs_daddr_t start_block,
Expand Down
3 changes: 2 additions & 1 deletion fs/xfs/xfs_mount.h
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ typedef struct xfs_mount {
* 32 bits in size */
#define XFS_MOUNT_32BITINOOPT 0x00008000 /* saved mount option state */
#define XFS_MOUNT_NOUUID 0x00010000 /* ignore uuid during mount */
#define XFS_MOUNT_NOLOGFLUSH 0x00020000
#define XFS_MOUNT_BARRIER 0x00020000
#define XFS_MOUNT_IDELETE 0x00040000 /* delete empty inode clusters*/
#define XFS_MOUNT_SWALLOC 0x00080000 /* turn on stripe width
* allocation */
Expand Down Expand Up @@ -542,6 +542,7 @@ extern xfs_mount_t *xfs_mount_init(void);
extern void xfs_mod_sb(xfs_trans_t *, __int64_t);
extern void xfs_mount_free(xfs_mount_t *mp, int remove_bhv);
extern int xfs_mountfs(struct vfs *, xfs_mount_t *mp, int);
extern void xfs_mountfs_check_barriers(xfs_mount_t *mp);

extern int xfs_unmountfs(xfs_mount_t *, struct cred *);
extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *);
Expand Down
13 changes: 8 additions & 5 deletions fs/xfs/xfs_trans.c
Original file line number Diff line number Diff line change
Expand Up @@ -661,10 +661,11 @@ xfs_trans_unreserve_and_mod_sb(
*/
/*ARGSUSED*/
int
xfs_trans_commit(
_xfs_trans_commit(
xfs_trans_t *tp,
uint flags,
xfs_lsn_t *commit_lsn_p)
xfs_lsn_t *commit_lsn_p,
int *log_flushed)
{
xfs_log_iovec_t *log_vector;
int nvec;
Expand Down Expand Up @@ -893,9 +894,11 @@ xfs_trans_commit(
* log out now and wait for it.
*/
if (sync) {
if (!error)
error = xfs_log_force(mp, commit_lsn,
XFS_LOG_FORCE | XFS_LOG_SYNC);
if (!error) {
error = _xfs_log_force(mp, commit_lsn,
XFS_LOG_FORCE | XFS_LOG_SYNC,
log_flushed);
}
XFS_STATS_INC(xs_trans_sync);
} else {
XFS_STATS_INC(xs_trans_async);
Expand Down
7 changes: 6 additions & 1 deletion fs/xfs/xfs_trans.h
Original file line number Diff line number Diff line change
Expand Up @@ -1025,7 +1025,12 @@ void xfs_trans_log_efd_extent(xfs_trans_t *,
struct xfs_efd_log_item *,
xfs_fsblock_t,
xfs_extlen_t);
int xfs_trans_commit(xfs_trans_t *, uint flags, xfs_lsn_t *);
int _xfs_trans_commit(xfs_trans_t *,
uint flags,
xfs_lsn_t *,
int *);
#define xfs_trans_commit(tp, flags, lsn) \
_xfs_trans_commit(tp, flags, lsn, NULL)
void xfs_trans_cancel(xfs_trans_t *, int);
void xfs_trans_ail_init(struct xfs_mount *);
xfs_lsn_t xfs_trans_push_ail(struct xfs_mount *, xfs_lsn_t);
Expand Down
Loading

0 comments on commit f538d4d

Please sign in to comment.