Skip to content

Commit

Permalink
xfs: add a shrinker to background inode reclaim
Browse files Browse the repository at this point in the history
On low memory boxes or those with highmem, kernel can OOM before the
background reclaims inodes via xfssyncd. Add a shrinker to run inode
reclaim so that it inode reclaim is expedited when memory is low.

This is more complex than it needs to be because the VM folk don't
want a context added to the shrinker infrastructure. Hence we need
to add a global list of XFS mount structures so the shrinker can
traverse them.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
  • Loading branch information
Dave Chinner authored and Alex Elder committed Apr 29, 2010
1 parent 79dba2e commit 9bf729c
Show file tree
Hide file tree
Showing 6 changed files with 120 additions and 9 deletions.
5 changes: 5 additions & 0 deletions fs/xfs/linux-2.6/xfs_super.c
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,7 @@ xfs_fs_put_super(

xfs_unmountfs(mp);
xfs_freesb(mp);
xfs_inode_shrinker_unregister(mp);
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
xfs_dmops_put(mp);
Expand Down Expand Up @@ -1622,6 +1623,8 @@ xfs_fs_fill_super(
if (error)
goto fail_vnrele;

xfs_inode_shrinker_register(mp);

kfree(mtpt);
return 0;

Expand Down Expand Up @@ -1867,6 +1870,7 @@ init_xfs_fs(void)
goto out_cleanup_procfs;

vfs_initquota();
xfs_inode_shrinker_init();

error = register_filesystem(&xfs_fs_type);
if (error)
Expand Down Expand Up @@ -1894,6 +1898,7 @@ exit_xfs_fs(void)
{
vfs_exitquota();
unregister_filesystem(&xfs_fs_type);
xfs_inode_shrinker_destroy();
xfs_sysctl_unregister();
xfs_cleanup_procfs();
xfs_buf_terminate();
Expand Down
112 changes: 105 additions & 7 deletions fs/xfs/linux-2.6/xfs_sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,8 @@ xfs_inode_ag_walk(
struct xfs_perag *pag, int flags),
int flags,
int tag,
int exclusive)
int exclusive,
int *nr_to_scan)
{
uint32_t first_index;
int last_error = 0;
Expand Down Expand Up @@ -134,7 +135,7 @@ xfs_inode_ag_walk(
if (error == EFSCORRUPTED)
break;

} while (1);
} while ((*nr_to_scan)--);

if (skipped) {
delay(1);
Expand All @@ -150,12 +151,15 @@ xfs_inode_ag_iterator(
struct xfs_perag *pag, int flags),
int flags,
int tag,
int exclusive)
int exclusive,
int *nr_to_scan)
{
int error = 0;
int last_error = 0;
xfs_agnumber_t ag;
int nr;

nr = nr_to_scan ? *nr_to_scan : INT_MAX;
for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
struct xfs_perag *pag;

Expand All @@ -165,14 +169,18 @@ xfs_inode_ag_iterator(
continue;
}
error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
exclusive);
exclusive, &nr);
xfs_perag_put(pag);
if (error) {
last_error = error;
if (error == EFSCORRUPTED)
break;
}
if (nr <= 0)
break;
}
if (nr_to_scan)
*nr_to_scan = nr;
return XFS_ERROR(last_error);
}

Expand Down Expand Up @@ -291,7 +299,7 @@ xfs_sync_data(
ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);

error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
XFS_ICI_NO_TAG, 0);
XFS_ICI_NO_TAG, 0, NULL);
if (error)
return XFS_ERROR(error);

Expand All @@ -310,7 +318,7 @@ xfs_sync_attr(
ASSERT((flags & ~SYNC_WAIT) == 0);

return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
XFS_ICI_NO_TAG, 0);
XFS_ICI_NO_TAG, 0, NULL);
}

STATIC int
Expand Down Expand Up @@ -673,6 +681,7 @@ __xfs_inode_set_reclaim_tag(
radix_tree_tag_set(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
XFS_ICI_RECLAIM_TAG);
pag->pag_ici_reclaimable++;
}

/*
Expand Down Expand Up @@ -705,6 +714,7 @@ __xfs_inode_clear_reclaim_tag(
{
radix_tree_tag_clear(&pag->pag_ici_root,
XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
pag->pag_ici_reclaimable--;
}

/*
Expand Down Expand Up @@ -854,5 +864,93 @@ xfs_reclaim_inodes(
int mode)
{
return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
XFS_ICI_RECLAIM_TAG, 1);
XFS_ICI_RECLAIM_TAG, 1, NULL);
}

/*
* Shrinker infrastructure.
*
* This is all far more complex than it needs to be. It adds a global list of
* mounts because the shrinkers can only call a global context. We need to make
* the shrinkers pass a context to avoid the need for global state.
*/
static LIST_HEAD(xfs_mount_list);
static struct rw_semaphore xfs_mount_list_lock;

static int
xfs_reclaim_inode_shrink(
int nr_to_scan,
gfp_t gfp_mask)
{
struct xfs_mount *mp;
struct xfs_perag *pag;
xfs_agnumber_t ag;
int reclaimable = 0;

if (nr_to_scan) {
if (!(gfp_mask & __GFP_FS))
return -1;

down_read(&xfs_mount_list_lock);
list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
if (nr_to_scan <= 0)
break;
}
up_read(&xfs_mount_list_lock);
}

down_read(&xfs_mount_list_lock);
list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {

pag = xfs_perag_get(mp, ag);
if (!pag->pag_ici_init) {
xfs_perag_put(pag);
continue;
}
reclaimable += pag->pag_ici_reclaimable;
xfs_perag_put(pag);
}
}
up_read(&xfs_mount_list_lock);
return reclaimable;
}

static struct shrinker xfs_inode_shrinker = {
.shrink = xfs_reclaim_inode_shrink,
.seeks = DEFAULT_SEEKS,
};

void __init
xfs_inode_shrinker_init(void)
{
init_rwsem(&xfs_mount_list_lock);
register_shrinker(&xfs_inode_shrinker);
}

void
xfs_inode_shrinker_destroy(void)
{
ASSERT(list_empty(&xfs_mount_list));
unregister_shrinker(&xfs_inode_shrinker);
}

void
xfs_inode_shrinker_register(
struct xfs_mount *mp)
{
down_write(&xfs_mount_list_lock);
list_add_tail(&mp->m_mplist, &xfs_mount_list);
up_write(&xfs_mount_list_lock);
}

void
xfs_inode_shrinker_unregister(
struct xfs_mount *mp)
{
down_write(&xfs_mount_list_lock);
list_del(&mp->m_mplist);
up_write(&xfs_mount_list_lock);
}
7 changes: 6 additions & 1 deletion fs/xfs/linux-2.6/xfs_sync.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
int flags, int tag, int write_lock);
int flags, int tag, int write_lock, int *nr_to_scan);

void xfs_inode_shrinker_init(void);
void xfs_inode_shrinker_destroy(void);
void xfs_inode_shrinker_register(struct xfs_mount *mp);
void xfs_inode_shrinker_unregister(struct xfs_mount *mp);

#endif
3 changes: 2 additions & 1 deletion fs/xfs/quota/xfs_qm_syscalls.c
Original file line number Diff line number Diff line change
Expand Up @@ -891,7 +891,8 @@ xfs_qm_dqrele_all_inodes(
uint flags)
{
ASSERT(mp->m_quotainfo);
xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
XFS_ICI_NO_TAG, 0, NULL);
}

/*------------------------------------------------------------------------*/
Expand Down
1 change: 1 addition & 0 deletions fs/xfs/xfs_ag.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ typedef struct xfs_perag {
int pag_ici_init; /* incore inode cache initialised */
rwlock_t pag_ici_lock; /* incore inode lock */
struct radix_tree_root pag_ici_root; /* incore inode cache root */
int pag_ici_reclaimable; /* reclaimable inodes */
#endif
int pagb_count; /* pagb slots in use */
xfs_perag_busy_t pagb_list[XFS_PAGB_NUM_SLOTS]; /* unstable blocks */
Expand Down
1 change: 1 addition & 0 deletions fs/xfs/xfs_mount.h
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ typedef struct xfs_mount {
wait_queue_head_t m_wait_single_sync_task;
__int64_t m_update_flags; /* sb flags we need to update
on the next remount,rw */
struct list_head m_mplist; /* inode shrinker mount list */
} xfs_mount_t;

/*
Expand Down

0 comments on commit 9bf729c

Please sign in to comment.