Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 181816
b: refs/heads/master
c: c854363
h: refs/heads/master
v: v3
  • Loading branch information
Dave Chinner committed Feb 6, 2010
1 parent b936d3f commit 065b70a
Show file tree
Hide file tree
Showing 7 changed files with 103 additions and 116 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 777df5afdb26c71634edd60582be620ff94e87a0
refs/heads/master: c854363e80b49dd04a4de18ebc379eb8c8806674
4 changes: 2 additions & 2 deletions trunk/fs/xfs/linux-2.6/xfs_super.c
Original file line number Diff line number Diff line change
Expand Up @@ -1064,15 +1064,15 @@ xfs_fs_write_inode(
xfs_ilock(ip, XFS_ILOCK_SHARED);
xfs_iflock(ip);

error = xfs_iflush(ip, XFS_IFLUSH_SYNC);
error = xfs_iflush(ip, SYNC_WAIT);
} else {
error = EAGAIN;
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
goto out;
if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
goto out_unlock;

error = xfs_iflush(ip, XFS_IFLUSH_ASYNC_NOBLOCK);
error = xfs_iflush(ip, 0);
}

out_unlock:
Expand Down
105 changes: 76 additions & 29 deletions trunk/fs/xfs/linux-2.6/xfs_sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,7 @@ xfs_sync_inode_attr(
goto out_unlock;
}

error = xfs_iflush(ip, (flags & SYNC_WAIT) ?
XFS_IFLUSH_SYNC : XFS_IFLUSH_DELWRI);
error = xfs_iflush(ip, flags);

out_unlock:
xfs_iunlock(ip, XFS_ILOCK_SHARED);
Expand Down Expand Up @@ -460,16 +459,18 @@ xfs_quiesce_fs(
{
int count = 0, pincount;

xfs_reclaim_inodes(mp, 0);
xfs_flush_buftarg(mp->m_ddev_targp, 0);
xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);

/*
* This loop must run at least twice. The first instance of the loop
* will flush most meta data but that will generate more meta data
* (typically directory updates). Which then must be flushed and
* logged before we can write the unmount record.
* logged before we can write the unmount record. We also so sync
* reclaim of inodes to catch any that the above delwri flush skipped.
*/
do {
xfs_reclaim_inodes(mp, SYNC_WAIT);
xfs_sync_attr(mp, SYNC_WAIT);
pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
if (!pincount) {
Expand Down Expand Up @@ -585,7 +586,7 @@ xfs_sync_worker(

if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
xfs_log_force(mp, 0);
xfs_reclaim_inodes(mp, XFS_IFLUSH_DELWRI_ELSE_ASYNC);
xfs_reclaim_inodes(mp, 0);
/* dgc: errors ignored here */
error = xfs_qm_sync(mp, SYNC_TRYLOCK);
error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
Expand Down Expand Up @@ -719,29 +720,50 @@ __xfs_inode_clear_reclaim_tag(
* shutdown EIO unpin and reclaim
* clean, unpinned 0 reclaim
* stale, unpinned 0 reclaim
* clean, pinned(*) 0 unpin and reclaim
* stale, pinned 0 unpin and reclaim
* dirty, async 0 block on flush lock, reclaim
* dirty, sync flush 0 block on flush lock, reclaim
* clean, pinned(*) 0 requeue
* stale, pinned EAGAIN requeue
* dirty, delwri ok 0 requeue
* dirty, delwri blocked EAGAIN requeue
* dirty, sync flush 0 reclaim
*
* (*) dgc: I don't think the clean, pinned state is possible but it gets
* handled anyway given the order of checks implemented.
*
* As can be seen from the table, the return value of xfs_iflush() is not
* sufficient to correctly decide the reclaim action here. The checks in
* xfs_iflush() might look like duplicates, but they are not.
*
* Also, because we get the flush lock first, we know that any inode that has
* been flushed delwri has had the flush completed by the time we check that
* the inode is clean. The clean inode check needs to be done before flushing
* the inode delwri otherwise we would loop forever requeuing clean inodes as
* we cannot tell apart a successful delwri flush and a clean inode from the
* return value of xfs_iflush().
*
* Note that because the inode is flushed delayed write by background
* writeback, the flush lock may already be held here and waiting on it can
* result in very long latencies. Hence for sync reclaims, where we wait on the
* flush lock, the caller should push out delayed write inodes first before
* trying to reclaim them to minimise the amount of time spent waiting. For
* background relaim, we just requeue the inode for the next pass.
*
* Hence the order of actions after gaining the locks should be:
* bad => reclaim
* shutdown => unpin and reclaim
* pinned => unpin
* pinned, delwri => requeue
* pinned, sync => unpin
* stale => reclaim
* clean => reclaim
* dirty => flush, wait and reclaim
* dirty, delwri => flush and requeue
* dirty, sync => flush, wait and reclaim
*/
STATIC int
xfs_reclaim_inode(
struct xfs_inode *ip,
struct xfs_perag *pag,
int sync_mode)
{
int error;
int error = 0;

/*
* The radix tree lock here protects a thread in xfs_iget from racing
Expand All @@ -761,44 +783,69 @@ xfs_reclaim_inode(
write_unlock(&pag->pag_ici_lock);

xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_iflock(ip);
if (!xfs_iflock_nowait(ip)) {
if (!(sync_mode & SYNC_WAIT))
goto out;
xfs_iflock(ip);
}

if (is_bad_inode(VFS_I(ip)))
goto reclaim;
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
xfs_iunpin_wait(ip);
goto reclaim;
}
if (xfs_ipincount(ip))
if (xfs_ipincount(ip)) {
if (!(sync_mode & SYNC_WAIT)) {
xfs_ifunlock(ip);
goto out;
}
xfs_iunpin_wait(ip);
}
if (xfs_iflags_test(ip, XFS_ISTALE))
goto reclaim;
if (xfs_inode_clean(ip))
goto reclaim;

/* Now we have an inode that needs flushing */
error = xfs_iflush(ip, sync_mode);
if (!error) {
switch(sync_mode) {
case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
case XFS_IFLUSH_DELWRI:
case XFS_IFLUSH_ASYNC:
case XFS_IFLUSH_DELWRI_ELSE_SYNC:
case XFS_IFLUSH_SYNC:
/* IO issued, synchronise with IO completion */
xfs_iflock(ip);
break;
default:
ASSERT(0);
break;
}
if (sync_mode & SYNC_WAIT) {
xfs_iflock(ip);
goto reclaim;
}

/*
* When we have to flush an inode but don't have SYNC_WAIT set, we
* flush the inode out using a delwri buffer and wait for the next
* call into reclaim to find it in a clean state instead of waiting for
* it now. We also don't return errors here - if the error is transient
* then the next reclaim pass will flush the inode, and if the error
* is permanent then the next sync reclaim will relcaim the inode and
* pass on the error.
*/
if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
xfs_fs_cmn_err(CE_WARN, ip->i_mount,
"inode 0x%llx background reclaim flush failed with %d",
(long long)ip->i_ino, error);
}
out:
xfs_iflags_clear(ip, XFS_IRECLAIM);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/*
* We could return EAGAIN here to make reclaim rescan the inode tree in
* a short while. However, this just burns CPU time scanning the tree
* waiting for IO to complete and xfssyncd never goes back to the idle
* state. Instead, return 0 to let the next scheduled background reclaim
* attempt to reclaim the inode again.
*/
return 0;

reclaim:
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_ireclaim(ip);
return 0;
return error;

}

int
Expand Down
75 changes: 5 additions & 70 deletions trunk/fs/xfs/xfs_inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -2835,8 +2835,6 @@ xfs_iflush(
xfs_dinode_t *dip;
xfs_mount_t *mp;
int error;
int noblock = (flags == XFS_IFLUSH_ASYNC_NOBLOCK);
enum { INT_DELWRI = (1 << 0), INT_ASYNC = (1 << 1) };

XFS_STATS_INC(xs_iflush_count);

Expand All @@ -2859,7 +2857,7 @@ xfs_iflush(
* in the same cluster are dirty, they will probably write the inode
* out for us if they occur after the log force completes.
*/
if (noblock && xfs_ipincount(ip)) {
if (!(flags & SYNC_WAIT) && xfs_ipincount(ip)) {
xfs_iunpin_nowait(ip);
xfs_ifunlock(ip);
return EAGAIN;
Expand Down Expand Up @@ -2892,61 +2890,11 @@ xfs_iflush(
return XFS_ERROR(EIO);
}

/*
* Decide how buffer will be flushed out. This is done before
* the call to xfs_iflush_int because this field is zeroed by it.
*/
if (iip != NULL && iip->ili_format.ilf_fields != 0) {
/*
* Flush out the inode buffer according to the directions
* of the caller. In the cases where the caller has given
* us a choice choose the non-delwri case. This is because
* the inode is in the AIL and we need to get it out soon.
*/
switch (flags) {
case XFS_IFLUSH_SYNC:
case XFS_IFLUSH_DELWRI_ELSE_SYNC:
flags = 0;
break;
case XFS_IFLUSH_ASYNC_NOBLOCK:
case XFS_IFLUSH_ASYNC:
case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
flags = INT_ASYNC;
break;
case XFS_IFLUSH_DELWRI:
flags = INT_DELWRI;
break;
default:
ASSERT(0);
flags = 0;
break;
}
} else {
switch (flags) {
case XFS_IFLUSH_DELWRI_ELSE_SYNC:
case XFS_IFLUSH_DELWRI_ELSE_ASYNC:
case XFS_IFLUSH_DELWRI:
flags = INT_DELWRI;
break;
case XFS_IFLUSH_ASYNC_NOBLOCK:
case XFS_IFLUSH_ASYNC:
flags = INT_ASYNC;
break;
case XFS_IFLUSH_SYNC:
flags = 0;
break;
default:
ASSERT(0);
flags = 0;
break;
}
}

/*
* Get the buffer containing the on-disk inode.
*/
error = xfs_itobp(mp, NULL, ip, &dip, &bp,
noblock ? XBF_TRYLOCK : XBF_LOCK);
(flags & SYNC_WAIT) ? XBF_LOCK : XBF_TRYLOCK);
if (error || !bp) {
xfs_ifunlock(ip);
return error;
Expand Down Expand Up @@ -2974,13 +2922,10 @@ xfs_iflush(
if (error)
goto cluster_corrupt_out;

if (flags & INT_DELWRI) {
xfs_bdwrite(mp, bp);
} else if (flags & INT_ASYNC) {
error = xfs_bawrite(mp, bp);
} else {
if (flags & SYNC_WAIT)
error = xfs_bwrite(mp, bp);
}
else
xfs_bdwrite(mp, bp);
return error;

corrupt_out:
Expand Down Expand Up @@ -3015,16 +2960,6 @@ xfs_iflush_int(
iip = ip->i_itemp;
mp = ip->i_mount;


/*
* If the inode isn't dirty, then just release the inode
* flush lock and do nothing.
*/
if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
return 0;
}

/* set *dip = inode's place in the buffer */
dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);

Expand Down
10 changes: 0 additions & 10 deletions trunk/fs/xfs/xfs_inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -419,16 +419,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)

/*
* Flags for xfs_iflush()
*/
#define XFS_IFLUSH_DELWRI_ELSE_SYNC 1
#define XFS_IFLUSH_DELWRI_ELSE_ASYNC 2
#define XFS_IFLUSH_SYNC 3
#define XFS_IFLUSH_ASYNC 4
#define XFS_IFLUSH_DELWRI 5
#define XFS_IFLUSH_ASYNC_NOBLOCK 6

/*
* Flags for xfs_itruncate_start().
*/
Expand Down
10 changes: 7 additions & 3 deletions trunk/fs/xfs/xfs_inode_item.c
Original file line number Diff line number Diff line change
Expand Up @@ -866,10 +866,14 @@ xfs_inode_item_push(
iip->ili_format.ilf_fields != 0);

/*
* Write out the inode. The completion routine ('iflush_done') will
* pull it from the AIL, mark it clean, unlock the flush lock.
* Push the inode to it's backing buffer. This will not remove the
* inode from the AIL - a further push will be required to trigger a
* buffer push. However, this allows all the dirty inodes to be pushed
* to the buffer before it is pushed to disk. THe buffer IO completion
* will pull th einode from the AIL, mark it clean and unlock the flush
* lock.
*/
(void) xfs_iflush(ip, XFS_IFLUSH_ASYNC);
(void) xfs_iflush(ip, 0);
xfs_iunlock(ip, XFS_ILOCK_SHARED);

return;
Expand Down
13 changes: 12 additions & 1 deletion trunk/fs/xfs/xfs_mount.c
Original file line number Diff line number Diff line change
Expand Up @@ -1468,7 +1468,18 @@ xfs_unmountfs(
* need to force the log first.
*/
xfs_log_force(mp, XFS_LOG_SYNC);
xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC);

/*
* Do a delwri reclaim pass first so that as many dirty inodes are
* queued up for IO as possible. Then flush the buffers before making
* a synchronous path to catch all the remaining inodes are reclaimed.
* This makes the reclaim process as quick as possible by avoiding
* synchronous writeout and blocking on inodes already in the delwri
* state as much as possible.
*/
xfs_reclaim_inodes(mp, 0);
XFS_bflush(mp->m_ddev_targp);
xfs_reclaim_inodes(mp, SYNC_WAIT);

xfs_qm_unmount(mp);

Expand Down

0 comments on commit 065b70a

Please sign in to comment.