Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 229038
b: refs/heads/master
c: 1a3e8f3
h: refs/heads/master
v: v3
  • Loading branch information
Dave Chinner authored and Dave Chinner committed Dec 17, 2010
1 parent 52d604b commit dd8f120
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 43 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: d95b7aaf9ab6738bef1ebcc52ab66563085e44ac
refs/heads/master: 1a3e8f3da09c7082d25b512a0ffe569391e4c09a
84 changes: 66 additions & 18 deletions trunk/fs/xfs/linux-2.6/xfs_sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,30 @@ xfs_inode_ag_walk_grab(
{
struct inode *inode = VFS_I(ip);

ASSERT(rcu_read_lock_held());

/*
* check for stale RCU freed inode
*
* If the inode has been reallocated, it doesn't matter if it's not in
* the AG we are walking - we are walking for writeback, so if it
* passes all the "valid inode" checks and is dirty, then we'll write
* it back anyway. If it has been reallocated and still being
* initialised, the XFS_INEW check below will catch it.
*/
spin_lock(&ip->i_flags_lock);
if (!ip->i_ino)
goto out_unlock_noent;

/* avoid new or reclaimable inodes. Leave for reclaim code to flush */
if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
goto out_unlock_noent;
spin_unlock(&ip->i_flags_lock);

/* nothing to sync during shutdown */
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return EFSCORRUPTED;

/* avoid new or reclaimable inodes. Leave for reclaim code to flush */
if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
return ENOENT;

/* If we can't grab the inode, it must on it's way to reclaim. */
if (!igrab(inode))
return ENOENT;
Expand All @@ -72,6 +88,10 @@ xfs_inode_ag_walk_grab(

/* inode is valid */
return 0;

out_unlock_noent:
spin_unlock(&ip->i_flags_lock);
return ENOENT;
}

STATIC int
Expand All @@ -98,12 +118,12 @@ xfs_inode_ag_walk(
int error = 0;
int i;

read_lock(&pag->pag_ici_lock);
rcu_read_lock();
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
(void **)batch, first_index,
XFS_LOOKUP_BATCH);
if (!nr_found) {
read_unlock(&pag->pag_ici_lock);
rcu_read_unlock();
break;
}

Expand All @@ -118,18 +138,26 @@ xfs_inode_ag_walk(
batch[i] = NULL;

/*
* Update the index for the next lookup. Catch overflows
* into the next AG range which can occur if we have inodes
* in the last block of the AG and we are currently
* pointing to the last inode.
* Update the index for the next lookup. Catch
* overflows into the next AG range which can occur if
* we have inodes in the last block of the AG and we
* are currently pointing to the last inode.
*
* Because we may see inodes that are from the wrong AG
* due to RCU freeing and reallocation, only update the
* index if it lies in this AG. It was a race that lead
* us to see this inode, so another lookup from the
* same index will not find it again.
*/
if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
continue;
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
done = 1;
}

/* unlock now we've grabbed the inodes. */
read_unlock(&pag->pag_ici_lock);
rcu_read_unlock();

for (i = 0; i < nr_found; i++) {
if (!batch[i])
Expand Down Expand Up @@ -639,9 +667,14 @@ xfs_reclaim_inode_grab(
struct xfs_inode *ip,
int flags)
{
ASSERT(rcu_read_lock_held());

/* quick check for stale RCU freed inode */
if (!ip->i_ino)
return 1;

/*
* do some unlocked checks first to avoid unnecceary lock traffic.
* do some unlocked checks first to avoid unnecessary lock traffic.
* The first is a flush lock check, the second is a already in reclaim
* check. Only do these checks if we are not going to block on locks.
*/
Expand All @@ -654,11 +687,16 @@ xfs_reclaim_inode_grab(
* The radix tree lock here protects a thread in xfs_iget from racing
* with us starting reclaim on the inode. Once we have the
* XFS_IRECLAIM flag set it will not touch us.
*
* Due to RCU lookup, we may find inodes that have been freed and only
* have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that
* aren't candidates for reclaim at all, so we must check the
* XFS_IRECLAIMABLE is set first before proceeding to reclaim.
*/
spin_lock(&ip->i_flags_lock);
ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
/* ignore as it is already under reclaim */
if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
__xfs_iflags_test(ip, XFS_IRECLAIM)) {
/* not a reclaim candidate. */
spin_unlock(&ip->i_flags_lock);
return 1;
}
Expand Down Expand Up @@ -864,14 +902,14 @@ xfs_reclaim_inodes_ag(
struct xfs_inode *batch[XFS_LOOKUP_BATCH];
int i;

write_lock(&pag->pag_ici_lock);
rcu_read_lock();
nr_found = radix_tree_gang_lookup_tag(
&pag->pag_ici_root,
(void **)batch, first_index,
XFS_LOOKUP_BATCH,
XFS_ICI_RECLAIM_TAG);
if (!nr_found) {
write_unlock(&pag->pag_ici_lock);
rcu_read_unlock();
break;
}

Expand All @@ -891,14 +929,24 @@ xfs_reclaim_inodes_ag(
* occur if we have inodes in the last block of
* the AG and we are currently pointing to the
* last inode.
*
* Because we may see inodes that are from the
* wrong AG due to RCU freeing and
* reallocation, only update the index if it
* lies in this AG. It was a race that lead us
* to see this inode, so another lookup from
* the same index will not find it again.
*/
if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
pag->pag_agno)
continue;
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
done = 1;
}

/* unlock now we've grabbed the inodes. */
write_unlock(&pag->pag_ici_lock);
rcu_read_unlock();

for (i = 0; i < nr_found; i++) {
if (!batch[i])
Expand Down
47 changes: 35 additions & 12 deletions trunk/fs/xfs/xfs_iget.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ xfs_inode_alloc(
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
ASSERT(ip->i_ino == 0);

mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
Expand All @@ -98,9 +99,6 @@ xfs_inode_alloc(
ip->i_size = 0;
ip->i_new_size = 0;

/* prevent anyone from using this yet */
VFS_I(ip)->i_state = I_NEW;

return ip;
}

Expand Down Expand Up @@ -159,6 +157,16 @@ xfs_inode_free(
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));

/*
* Because we use RCU freeing we need to ensure the inode always
* appears to be reclaimed with an invalid inode number when in the
* free state. The ip->i_flags_lock provides the barrier against lookup
* races.
*/
spin_lock(&ip->i_flags_lock);
ip->i_flags = XFS_IRECLAIM;
ip->i_ino = 0;
spin_unlock(&ip->i_flags_lock);
call_rcu((struct rcu_head *)&VFS_I(ip)->i_dentry, __xfs_inode_free);
}

Expand All @@ -169,14 +177,29 @@ static int
xfs_iget_cache_hit(
struct xfs_perag *pag,
struct xfs_inode *ip,
xfs_ino_t ino,
int flags,
int lock_flags) __releases(pag->pag_ici_lock)
int lock_flags) __releases(RCU)
{
struct inode *inode = VFS_I(ip);
struct xfs_mount *mp = ip->i_mount;
int error;

/*
* check for re-use of an inode within an RCU grace period due to the
* radix tree nodes not being updated yet. We monitor for this by
* setting the inode number to zero before freeing the inode structure.
* If the inode has been reallocated and set up, then the inode number
* will not match, so check for that, too.
*/
spin_lock(&ip->i_flags_lock);
if (ip->i_ino != ino) {
trace_xfs_iget_skip(ip);
XFS_STATS_INC(xs_ig_frecycle);
error = EAGAIN;
goto out_error;
}


/*
* If we are racing with another cache hit that is currently
Expand Down Expand Up @@ -219,15 +242,15 @@ xfs_iget_cache_hit(
ip->i_flags |= XFS_IRECLAIM;

spin_unlock(&ip->i_flags_lock);
read_unlock(&pag->pag_ici_lock);
rcu_read_unlock();

error = -inode_init_always(mp->m_super, inode);
if (error) {
/*
* Re-initializing the inode failed, and we are in deep
* trouble. Try to re-add it to the reclaim list.
*/
read_lock(&pag->pag_ici_lock);
rcu_read_lock();
spin_lock(&ip->i_flags_lock);

ip->i_flags &= ~XFS_INEW;
Expand Down Expand Up @@ -261,7 +284,7 @@ xfs_iget_cache_hit(

/* We've got a live one. */
spin_unlock(&ip->i_flags_lock);
read_unlock(&pag->pag_ici_lock);
rcu_read_unlock();
trace_xfs_iget_hit(ip);
}

Expand All @@ -275,7 +298,7 @@ xfs_iget_cache_hit(

out_error:
spin_unlock(&ip->i_flags_lock);
read_unlock(&pag->pag_ici_lock);
rcu_read_unlock();
return error;
}

Expand Down Expand Up @@ -397,7 +420,7 @@ xfs_iget(
xfs_agino_t agino;

/* reject inode numbers outside existing AGs */
if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
if (!ino || XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount)
return EINVAL;

/* get the perag structure and ensure that it's inode capable */
Expand All @@ -406,15 +429,15 @@ xfs_iget(

again:
error = 0;
read_lock(&pag->pag_ici_lock);
rcu_read_lock();
ip = radix_tree_lookup(&pag->pag_ici_root, agino);

if (ip) {
error = xfs_iget_cache_hit(pag, ip, flags, lock_flags);
error = xfs_iget_cache_hit(pag, ip, ino, flags, lock_flags);
if (error)
goto out_error_or_again;
} else {
read_unlock(&pag->pag_ici_lock);
rcu_read_unlock();
XFS_STATS_INC(xs_ig_missed);

error = xfs_iget_cache_miss(mp, pag, tp, ino, &ip,
Expand Down
Loading

0 comments on commit dd8f120

Please sign in to comment.