Skip to content

Commit

Permalink
xfs: Don't issue buffer IO direct from AIL push V2
Browse files Browse the repository at this point in the history
All buffers logged into the AIL are marked as delayed write.
When the AIL needs to push the buffer out, it issues an async write of the
buffer. This means that IO patterns are dependent on the order of
buffers in the AIL.

Instead of flushing the buffer, promote the buffer in the delayed
write list so that the next time the xfsbufd is run the buffer will
be flushed by the xfsbufd. Return the state to the xfsaild that the
buffer was promoted so that the xfsaild knows that it needs to cause
the xfsbufd to run to flush the buffers that were promoted.

Using the xfsbufd for issuing the IO allows us to dispatch all
buffer IO from the one queue. This means that we can make much more
enlightened decisions on what order to flush buffers to disk as
we don't have multiple places issuing IO. Optimisations to xfsbufd
will be in a future patch.

Version 2
- kill XFS_ITEM_FLUSHING as it is now unused.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
  • Loading branch information
Dave Chinner committed Feb 1, 2010
1 parent c854363 commit d808f61
Show file tree
Hide file tree
Showing 10 changed files with 102 additions and 203 deletions.
29 changes: 29 additions & 0 deletions fs/xfs/linux-2.6/xfs_buf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1778,6 +1778,35 @@ xfs_buf_delwri_dequeue(
trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
}

/*
* If a delwri buffer needs to be pushed before it has aged out, then promote
* it to the head of the delwri queue so that it will be flushed on the next
* xfsbufd run. We do this by resetting the queuetime of the buffer to be older
* than the age currently needed to flush the buffer. Hence the next time the
* xfsbufd sees it is guaranteed to be considered old enough to flush.
*/
void
xfs_buf_delwri_promote(
struct xfs_buf *bp)
{
struct xfs_buftarg *btp = bp->b_target;
long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;

ASSERT(bp->b_flags & XBF_DELWRI);
ASSERT(bp->b_flags & _XBF_DELWRI_Q);

/*
* Check the buffer age before locking the delayed write queue as we
* don't need to promote buffers that are already past the flush age.
*/
if (bp->b_queuetime < jiffies - age)
return;
bp->b_queuetime = jiffies - age;
spin_lock(&btp->bt_delwrite_lock);
list_move(&bp->b_list, &btp->bt_delwrite_queue);
spin_unlock(&btp->bt_delwrite_lock);
}

STATIC void
xfs_buf_runall_queues(
struct workqueue_struct *queue)
Expand Down
2 changes: 2 additions & 0 deletions fs/xfs/linux-2.6/xfs_buf.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ extern int xfs_buf_ispin(xfs_buf_t *);

/* Delayed Write Buffer Routines */
extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
extern void xfs_buf_delwri_promote(xfs_buf_t *);

/* Buffer Daemon Setup Routines */
extern int xfs_buf_init(void);
Expand Down Expand Up @@ -395,6 +396,7 @@ extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
extern int xfs_flush_buftarg(xfs_buftarg_t *, int);

#ifdef CONFIG_KDB_MODULES
extern struct list_head *xfs_get_buftarg_list(void);
#endif
Expand Down
1 change: 1 addition & 0 deletions fs/xfs/linux-2.6/xfs_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -483,6 +483,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
Expand Down
85 changes: 12 additions & 73 deletions fs/xfs/quota/xfs_dquot_item.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,66 +212,31 @@ xfs_qm_dquot_logitem_pushbuf(
xfs_dquot_t *dqp;
xfs_mount_t *mp;
xfs_buf_t *bp;
uint dopush;

dqp = qip->qli_dquot;
ASSERT(XFS_DQ_IS_LOCKED(dqp));

/*
* The qli_pushbuf_flag keeps others from
* trying to duplicate our effort.
*/
ASSERT(qip->qli_pushbuf_flag != 0);
ASSERT(qip->qli_push_owner == current_pid());

/*
* If flushlock isn't locked anymore, chances are that the
* inode flush completed and the inode was taken off the AIL.
* So, just get out.
*/
if (completion_done(&dqp->q_flush) ||
((qip->qli_item.li_flags & XFS_LI_IN_AIL) == 0)) {
qip->qli_pushbuf_flag = 0;
xfs_dqunlock(dqp);
return;
}
mp = dqp->q_mount;
bp = xfs_incore(mp->m_ddev_targp, qip->qli_format.qlf_blkno,
XFS_QI_DQCHUNKLEN(mp), XBF_TRYLOCK);
if (bp != NULL) {
if (XFS_BUF_ISDELAYWRITE(bp)) {
dopush = ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
!completion_done(&dqp->q_flush));
qip->qli_pushbuf_flag = 0;
xfs_dqunlock(dqp);

if (XFS_BUF_ISPINNED(bp))
xfs_log_force(mp, 0);

if (dopush) {
int error;
#ifdef XFSRACEDEBUG
delay_for_intr();
delay(300);
#endif
error = xfs_bawrite(mp, bp);
if (error)
xfs_fs_cmn_err(CE_WARN, mp,
"xfs_qm_dquot_logitem_pushbuf: pushbuf error %d on qip %p, bp %p",
error, qip, bp);
} else {
xfs_buf_relse(bp);
}
} else {
qip->qli_pushbuf_flag = 0;
xfs_dqunlock(dqp);
xfs_buf_relse(bp);
}
xfs_dqunlock(dqp);
if (!bp)
return;
}
if (XFS_BUF_ISDELAYWRITE(bp))
xfs_buf_delwri_promote(bp);
xfs_buf_relse(bp);
return;

qip->qli_pushbuf_flag = 0;
xfs_dqunlock(dqp);
}

/*
Expand All @@ -289,50 +254,24 @@ xfs_qm_dquot_logitem_trylock(
xfs_dq_logitem_t *qip)
{
xfs_dquot_t *dqp;
uint retval;

dqp = qip->qli_dquot;
if (atomic_read(&dqp->q_pincount) > 0)
return (XFS_ITEM_PINNED);
return XFS_ITEM_PINNED;

if (! xfs_qm_dqlock_nowait(dqp))
return (XFS_ITEM_LOCKED);
return XFS_ITEM_LOCKED;

retval = XFS_ITEM_SUCCESS;
if (!xfs_dqflock_nowait(dqp)) {
/*
* The dquot is already being flushed. It may have been
* flushed delayed write, however, and we don't want to
* get stuck waiting for that to complete. So, we want to check
* to see if we can lock the dquot's buffer without sleeping.
* If we can and it is marked for delayed write, then we
* hold it and send it out from the push routine. We don't
* want to do that now since we might sleep in the device
* strategy routine. We also don't want to grab the buffer lock
* here because we'd like not to call into the buffer cache
* while holding the AIL lock.
* Make sure to only return PUSHBUF if we set pushbuf_flag
* ourselves. If someone else is doing it then we don't
* want to go to the push routine and duplicate their efforts.
* dquot has already been flushed to the backing buffer,
* leave it locked, pushbuf routine will unlock it.
*/
if (qip->qli_pushbuf_flag == 0) {
qip->qli_pushbuf_flag = 1;
ASSERT(qip->qli_format.qlf_blkno == dqp->q_blkno);
#ifdef DEBUG
qip->qli_push_owner = current_pid();
#endif
/*
* The dquot is left locked.
*/
retval = XFS_ITEM_PUSHBUF;
} else {
retval = XFS_ITEM_FLUSHING;
xfs_dqunlock_nonotify(dqp);
}
return XFS_ITEM_PUSHBUF;
}

ASSERT(qip->qli_item.li_flags & XFS_LI_IN_AIL);
return (retval);
return XFS_ITEM_SUCCESS;
}


Expand Down
4 changes: 0 additions & 4 deletions fs/xfs/quota/xfs_dquot_item.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,6 @@ typedef struct xfs_dq_logitem {
xfs_log_item_t qli_item; /* common portion */
struct xfs_dquot *qli_dquot; /* dquot ptr */
xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
unsigned short qli_pushbuf_flag; /* 1 bit used in push_ail */
#ifdef DEBUG
uint64_t qli_push_owner;
#endif
xfs_dq_logformat_t qli_format; /* logged structure */
} xfs_dq_logitem_t;

Expand Down
64 changes: 35 additions & 29 deletions fs/xfs/xfs_buf_item.c
Original file line number Diff line number Diff line change
Expand Up @@ -467,8 +467,10 @@ xfs_buf_item_unpin_remove(
/*
* This is called to attempt to lock the buffer associated with this
* buf log item. Don't sleep on the buffer lock. If we can't get
* the lock right away, return 0. If we can get the lock, pull the
* buffer from the free list, mark it busy, and return 1.
* the lock right away, return 0. If we can get the lock, take a
* reference to the buffer. If this is a delayed write buffer that
* needs AIL help to be written back, invoke the pushbuf routine
* rather than the normal success path.
*/
STATIC uint
xfs_buf_item_trylock(
Expand All @@ -477,24 +479,18 @@ xfs_buf_item_trylock(
xfs_buf_t *bp;

bp = bip->bli_buf;

if (XFS_BUF_ISPINNED(bp)) {
if (XFS_BUF_ISPINNED(bp))
return XFS_ITEM_PINNED;
}

if (!XFS_BUF_CPSEMA(bp)) {
if (!XFS_BUF_CPSEMA(bp))
return XFS_ITEM_LOCKED;
}

/*
* Remove the buffer from the free list. Only do this
* if it's on the free list. Private buffers like the
* superblock buffer are not.
*/
/* take a reference to the buffer. */
XFS_BUF_HOLD(bp);

ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
trace_xfs_buf_item_trylock(bip);
if (XFS_BUF_ISDELAYWRITE(bp))
return XFS_ITEM_PUSHBUF;
return XFS_ITEM_SUCCESS;
}

Expand Down Expand Up @@ -626,11 +622,9 @@ xfs_buf_item_committed(
}

/*
* This is called to asynchronously write the buffer associated with this
* buf log item out to disk. The buffer will already have been locked by
* a successful call to xfs_buf_item_trylock(). If the buffer still has
* B_DELWRI set, then get it going out to disk with a call to bawrite().
* If not, then just release the buffer.
* The buffer is locked, but is not a delayed write buffer. This happens
* if we race with IO completion and hence we don't want to try to write it
* again. Just release the buffer.
*/
STATIC void
xfs_buf_item_push(
Expand All @@ -642,17 +636,29 @@ xfs_buf_item_push(
trace_xfs_buf_item_push(bip);

bp = bip->bli_buf;
ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
xfs_buf_relse(bp);
}

if (XFS_BUF_ISDELAYWRITE(bp)) {
int error;
error = xfs_bawrite(bip->bli_item.li_mountp, bp);
if (error)
xfs_fs_cmn_err(CE_WARN, bip->bli_item.li_mountp,
"xfs_buf_item_push: pushbuf error %d on bip %p, bp %p",
error, bip, bp);
} else {
xfs_buf_relse(bp);
}
/*
* The buffer is locked and is a delayed write buffer. Promote the buffer
* in the delayed write queue as the caller knows that they must invoke
* the xfsbufd to get this buffer written. We have to unlock the buffer
* to allow the xfsbufd to write it, too.
*/
STATIC void
xfs_buf_item_pushbuf(
xfs_buf_log_item_t *bip)
{
xfs_buf_t *bp;

ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
trace_xfs_buf_item_pushbuf(bip);

bp = bip->bli_buf;
ASSERT(XFS_BUF_ISDELAYWRITE(bp));
xfs_buf_delwri_promote(bp);
xfs_buf_relse(bp);
}

/* ARGSUSED */
Expand All @@ -677,7 +683,7 @@ static struct xfs_item_ops xfs_buf_item_ops = {
.iop_committed = (xfs_lsn_t(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_buf_item_committed,
.iop_push = (void(*)(xfs_log_item_t*))xfs_buf_item_push,
.iop_pushbuf = NULL,
.iop_pushbuf = (void(*)(xfs_log_item_t*))xfs_buf_item_pushbuf,
.iop_committing = (void(*)(xfs_log_item_t*, xfs_lsn_t))
xfs_buf_item_committing
};
Expand Down
Loading

0 comments on commit d808f61

Please sign in to comment.