Skip to content

Commit

Permalink
xfs: reclaim inodes under a write lock
Browse files Browse the repository at this point in the history
Make the inode tree reclaim walk exclusive to avoid races with
concurrent sync walkers and lookups. This is a version of a patch
posted by Christoph Hellwig that avoids all the code duplication.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
  • Loading branch information
Dave Chinner authored and Alex Elder committed Jan 15, 2010
1 parent 7284ce6 commit c8e20be
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 87 deletions.
154 changes: 69 additions & 85 deletions fs/xfs/linux-2.6/xfs_sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ xfs_inode_ag_lookup(
* as the tree is sparse and a gang lookup walks to find
* the number of objects requested.
*/
read_lock(&pag->pag_ici_lock);
if (tag == XFS_ICI_NO_TAG) {
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
(void **)&ip, *first_index, 1);
Expand All @@ -74,7 +73,7 @@ xfs_inode_ag_lookup(
(void **)&ip, *first_index, 1, tag);
}
if (!nr_found)
goto unlock;
return NULL;

/*
* Update the index for the next lookup. Catch overflows
Expand All @@ -84,13 +83,8 @@ xfs_inode_ag_lookup(
*/
*first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
goto unlock;

return NULL;
return ip;

unlock:
read_unlock(&pag->pag_ici_lock);
return NULL;
}

STATIC int
Expand All @@ -100,7 +94,8 @@ xfs_inode_ag_walk(
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags),
int flags,
int tag)
int tag,
int exclusive)
{
struct xfs_perag *pag = &mp->m_perag[ag];
uint32_t first_index;
Expand All @@ -114,20 +109,29 @@ xfs_inode_ag_walk(
int error = 0;
xfs_inode_t *ip;

if (exclusive)
write_lock(&pag->pag_ici_lock);
else
read_lock(&pag->pag_ici_lock);
ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
if (!ip)
if (!ip) {
if (exclusive)
write_unlock(&pag->pag_ici_lock);
else
read_unlock(&pag->pag_ici_lock);
break;
}

/* execute releases pag->pag_ici_lock */
error = execute(ip, pag, flags);
if (error == EAGAIN) {
skipped++;
continue;
}
if (error)
last_error = error;
/*
* bail out if the filesystem is corrupted.
*/

/* bail out if the filesystem is corrupted. */
if (error == EFSCORRUPTED)
break;

Expand All @@ -148,7 +152,8 @@ xfs_inode_ag_iterator(
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags),
int flags,
int tag)
int tag,
int exclusive)
{
int error = 0;
int last_error = 0;
Expand All @@ -157,7 +162,8 @@ xfs_inode_ag_iterator(
for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
if (!mp->m_perag[ag].pag_ici_init)
continue;
error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);
error = xfs_inode_ag_walk(mp, ag, execute, flags, tag,
exclusive);
if (error) {
last_error = error;
if (error == EFSCORRUPTED)
Expand All @@ -181,11 +187,7 @@ xfs_sync_inode_valid(
return EFSCORRUPTED;
}

/*
* If we can't get a reference on the inode, it must be in reclaim.
* Leave it for the reclaim code to flush. Also avoid inodes that
* haven't been fully initialised.
*/
/* If we can't get a reference on the inode, it must be in reclaim. */
if (!igrab(inode)) {
read_unlock(&pag->pag_ici_lock);
return ENOENT;
Expand Down Expand Up @@ -282,7 +284,7 @@ xfs_sync_data(
ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);

error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
XFS_ICI_NO_TAG);
XFS_ICI_NO_TAG, 0);
if (error)
return XFS_ERROR(error);

Expand All @@ -304,7 +306,7 @@ xfs_sync_attr(
ASSERT((flags & ~SYNC_WAIT) == 0);

return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
XFS_ICI_NO_TAG);
XFS_ICI_NO_TAG, 0);
}

STATIC int
Expand Down Expand Up @@ -664,60 +666,6 @@ xfs_syncd_stop(
kthread_stop(mp->m_sync_task);
}

STATIC int
xfs_reclaim_inode(
xfs_inode_t *ip,
int sync_mode)
{
xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);

/* The hash lock here protects a thread in xfs_iget_core from
* racing with us on linking the inode back with a vnode.
* Once we have the XFS_IRECLAIM flag set it will not touch
* us.
*/
write_lock(&pag->pag_ici_lock);
spin_lock(&ip->i_flags_lock);
if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
!__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
spin_unlock(&ip->i_flags_lock);
write_unlock(&pag->pag_ici_lock);
return -EAGAIN;
}
__xfs_iflags_set(ip, XFS_IRECLAIM);
spin_unlock(&ip->i_flags_lock);
write_unlock(&pag->pag_ici_lock);
xfs_put_perag(ip->i_mount, pag);

/*
* If the inode is still dirty, then flush it out. If the inode
* is not in the AIL, then it will be OK to flush it delwri as
* long as xfs_iflush() does not keep any references to the inode.
* We leave that decision up to xfs_iflush() since it has the
* knowledge of whether it's OK to simply do a delwri flush of
* the inode or whether we need to wait until the inode is
* pulled from the AIL.
* We get the flush lock regardless, though, just to make sure
* we don't free it while it is being flushed.
*/
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_iflock(ip);

/*
* In the case of a forced shutdown we rely on xfs_iflush() to
* wait for the inode to be unpinned before returning an error.
*/
if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
/* synchronize with xfs_iflush_done */
xfs_iflock(ip);
xfs_ifunlock(ip);
}

xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_ireclaim(ip);
return 0;
}

void
__xfs_inode_set_reclaim_tag(
struct xfs_perag *pag,
Expand Down Expand Up @@ -760,26 +708,62 @@ __xfs_inode_clear_reclaim_tag(
}

STATIC int
xfs_reclaim_inode_now(
xfs_reclaim_inode(
struct xfs_inode *ip,
struct xfs_perag *pag,
int flags)
int sync_mode)
{
/* ignore if already under reclaim */
if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
read_unlock(&pag->pag_ici_lock);
/*
* The radix tree lock here protects a thread in xfs_iget from racing
* with us starting reclaim on the inode. Once we have the
* XFS_IRECLAIM flag set it will not touch us.
*/
spin_lock(&ip->i_flags_lock);
ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
/* ignore as it is already under reclaim */
spin_unlock(&ip->i_flags_lock);
write_unlock(&pag->pag_ici_lock);
return 0;
}
read_unlock(&pag->pag_ici_lock);
__xfs_iflags_set(ip, XFS_IRECLAIM);
spin_unlock(&ip->i_flags_lock);
write_unlock(&pag->pag_ici_lock);

/*
* If the inode is still dirty, then flush it out. If the inode
* is not in the AIL, then it will be OK to flush it delwri as
* long as xfs_iflush() does not keep any references to the inode.
* We leave that decision up to xfs_iflush() since it has the
* knowledge of whether it's OK to simply do a delwri flush of
* the inode or whether we need to wait until the inode is
* pulled from the AIL.
* We get the flush lock regardless, though, just to make sure
* we don't free it while it is being flushed.
*/
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_iflock(ip);

return xfs_reclaim_inode(ip, flags);
/*
* In the case of a forced shutdown we rely on xfs_iflush() to
* wait for the inode to be unpinned before returning an error.
*/
if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
/* synchronize with xfs_iflush_done */
xfs_iflock(ip);
xfs_ifunlock(ip);
}

xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_ireclaim(ip);
return 0;
}

int
xfs_reclaim_inodes(
xfs_mount_t *mp,
int mode)
{
return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
XFS_ICI_RECLAIM_TAG);
return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
XFS_ICI_RECLAIM_TAG, 1);
}
2 changes: 1 addition & 1 deletion fs/xfs/linux-2.6/xfs_sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,6 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
int flags, int tag);
int flags, int tag, int write_lock);

#endif
2 changes: 1 addition & 1 deletion fs/xfs/quota/xfs_qm_syscalls.c
Original file line number Diff line number Diff line change
Expand Up @@ -891,7 +891,7 @@ xfs_qm_dqrele_all_inodes(
uint flags)
{
ASSERT(mp->m_quotainfo);
xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG);
xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
}

/*------------------------------------------------------------------------*/
Expand Down

0 comments on commit c8e20be

Please sign in to comment.