Skip to content

Commit

Permalink
xfs: use per-filesystem radix trees for dquot lookup
Browse files Browse the repository at this point in the history
Replace the global hash tables for looking up in-memory dquot structures
with per-filesystem radix trees to allow scaling to a large number of
in-memory dquot structures.

Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ben Myers <bpm@sgi.com>
  • Loading branch information
Christoph Hellwig authored and Ben Myers committed Mar 14, 2012
1 parent f8739c3 commit 9f920f1
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 263 deletions.
188 changes: 48 additions & 140 deletions fs/xfs/xfs_dquot.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
* Lock order:
*
* ip->i_lock
* qh->qh_lock
* qi->qi_tree_lock
* qi->qi_dqlist_lock
* dquot->q_qlock (xfs_dqlock() and friends)
* dquot->q_flush (xfs_dqflock() and friends)
Expand Down Expand Up @@ -601,60 +601,6 @@ xfs_qm_dqread(
return error;
}

/*
* Lookup a dquot in the incore dquot hashtable. We keep two separate
* hashtables for user and group dquots; and, these are global tables
* inside the XQM, not per-filesystem tables.
* The hash chain must be locked by caller, and it is left locked
* on return. Returning dquot is locked.
*/
STATIC int
xfs_qm_dqlookup(
xfs_mount_t *mp,
xfs_dqid_t id,
xfs_dqhash_t *qh,
xfs_dquot_t **O_dqpp)
{
xfs_dquot_t *dqp;

ASSERT(mutex_is_locked(&qh->qh_lock));

/*
* Traverse the hashchain looking for a match
*/
list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
/*
* We already have the hashlock. We don't need the
* dqlock to look at the id field of the dquot, since the
* id can't be modified without the hashlock anyway.
*/
if (be32_to_cpu(dqp->q_core.d_id) != id || dqp->q_mount != mp)
continue;

trace_xfs_dqlookup_found(dqp);

xfs_dqlock(dqp);
if (dqp->dq_flags & XFS_DQ_FREEING) {
*O_dqpp = NULL;
xfs_dqunlock(dqp);
return -1;
}

dqp->q_nrefs++;

/*
* move the dquot to the front of the hashchain
*/
list_move(&dqp->q_hashlist, &qh->qh_list);
trace_xfs_dqlookup_done(dqp);
*O_dqpp = dqp;
return 0;
}

*O_dqpp = NULL;
return 1;
}

/*
* Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
* a locked dquot, doing an allocation (if requested) as needed.
Expand All @@ -672,18 +618,17 @@ xfs_qm_dqget(
uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */
{
xfs_dquot_t *dqp, *dqp1;
xfs_dqhash_t *h;
uint version;
int error;
struct xfs_quotainfo *qi = mp->m_quotainfo;
struct radix_tree_root *tree = XFS_DQUOT_TREE(qi, type);
struct xfs_dquot *dqp;
int error;

ASSERT(XFS_IS_QUOTA_RUNNING(mp));
if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
(! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
(! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
return (ESRCH);
}
h = XFS_DQ_HASH(mp, id, type);

#ifdef DEBUG
if (xfs_do_dqerror) {
Expand All @@ -704,34 +649,28 @@ xfs_qm_dqget(
#endif

restart:
mutex_lock(&h->qh_lock);
mutex_lock(&qi->qi_tree_lock);
dqp = radix_tree_lookup(tree, id);
if (dqp) {
xfs_dqlock(dqp);
if (dqp->dq_flags & XFS_DQ_FREEING) {
xfs_dqunlock(dqp);
mutex_unlock(&qi->qi_tree_lock);
trace_xfs_dqget_freeing(dqp);
delay(1);
goto restart;
}

/*
* Look in the cache (hashtable).
* The chain is kept locked during lookup.
*/
switch (xfs_qm_dqlookup(mp, id, h, O_dqpp)) {
case -1:
XFS_STATS_INC(xs_qm_dquot_dups);
mutex_unlock(&h->qh_lock);
delay(1);
goto restart;
case 0:
dqp->q_nrefs++;
mutex_unlock(&qi->qi_tree_lock);

trace_xfs_dqget_hit(dqp);
XFS_STATS_INC(xs_qm_dqcachehits);
/*
* The dquot was found, moved to the front of the chain,
* taken off the freelist if it was on it, and locked
* at this point. Just unlock the hashchain and return.
*/
ASSERT(*O_dqpp);
ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
mutex_unlock(&h->qh_lock);
trace_xfs_dqget_hit(*O_dqpp);
return 0; /* success */
default:
XFS_STATS_INC(xs_qm_dqcachemisses);
break;
*O_dqpp = dqp;
return 0;
}
mutex_unlock(&qi->qi_tree_lock);
XFS_STATS_INC(xs_qm_dqcachemisses);

/*
* Dquot cache miss. We don't want to keep the inode lock across
Expand All @@ -742,12 +681,6 @@ xfs_qm_dqget(
*/
if (ip)
xfs_iunlock(ip, XFS_ILOCK_EXCL);
/*
* Save the hashchain version stamp, and unlock the chain, so that
* we don't keep the lock across a disk read
*/
version = h->qh_version;
mutex_unlock(&h->qh_lock);

error = xfs_qm_dqread(mp, id, type, flags, &dqp);

Expand All @@ -757,15 +690,14 @@ xfs_qm_dqget(
if (error)
return error;

/*
* Dquot lock comes after hashlock in the lock ordering
*/
if (ip) {
/*
* A dquot could be attached to this inode by now, since
* we had dropped the ilock.
*/
if (xfs_this_quota_on(mp, type)) {
struct xfs_dquot *dqp1;

dqp1 = xfs_inode_dquot(ip, type);
if (dqp1) {
xfs_qm_dqdestroy(dqp);
Expand All @@ -780,62 +712,39 @@ xfs_qm_dqget(
}
}

/*
* Hashlock comes after ilock in lock order
*/
mutex_lock(&h->qh_lock);
if (version != h->qh_version) {
xfs_dquot_t *tmpdqp;
mutex_lock(&qi->qi_tree_lock);
error = -radix_tree_insert(tree, id, dqp);
if (unlikely(error)) {
WARN_ON(error != EEXIST);

/*
* Now, see if somebody else put the dquot in the
* hashtable before us. This can happen because we didn't
* keep the hashchain lock. We don't have to worry about
* lock order between the two dquots here since dqp isn't
* on any findable lists yet.
* Duplicate found. Just throw away the new dquot and start
* over.
*/
switch (xfs_qm_dqlookup(mp, id, h, &tmpdqp)) {
case 0:
case -1:
/*
* Duplicate found, either in cache or on its way out.
* Just throw away the new dquot and start over.
*/
if (tmpdqp)
xfs_qm_dqput(tmpdqp);
mutex_unlock(&h->qh_lock);
xfs_qm_dqdestroy(dqp);
XFS_STATS_INC(xs_qm_dquot_dups);
goto restart;
default:
break;
}
mutex_unlock(&qi->qi_tree_lock);
trace_xfs_dqget_dup(dqp);
xfs_qm_dqdestroy(dqp);
XFS_STATS_INC(xs_qm_dquot_dups);
goto restart;
}

/*
* Put the dquot at the beginning of the hash-chain and mp's list
* LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
*/
ASSERT(mutex_is_locked(&h->qh_lock));
dqp->q_hash = h;
list_add(&dqp->q_hashlist, &h->qh_list);
h->qh_version++;

/*
* Attach this dquot to this filesystem's list of all dquots,
* kept inside the mount structure in m_quotainfo field
*/
mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
mutex_lock(&qi->qi_dqlist_lock);

/*
* We return a locked dquot to the caller, with a reference taken
*/
xfs_dqlock(dqp);
dqp->q_nrefs = 1;

list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
mp->m_quotainfo->qi_dquots++;
mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
mutex_unlock(&h->qh_lock);
list_add(&dqp->q_mplist, &qi->qi_dqlist);
qi->qi_dquots++;
mutex_unlock(&qi->qi_dqlist_lock);
mutex_unlock(&qi->qi_tree_lock);

dqret:
ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
trace_xfs_dqget_miss(dqp);
Expand Down Expand Up @@ -1117,7 +1026,6 @@ xfs_qm_dqpurge(
struct xfs_dquot *dqp)
{
struct xfs_mount *mp = dqp->q_mount;
struct xfs_dqhash *qh = dqp->q_hash;
struct xfs_quotainfo *qi = mp->m_quotainfo;

xfs_dqlock(dqp);
Expand Down Expand Up @@ -1164,10 +1072,10 @@ xfs_qm_dqpurge(
xfs_dqfunlock(dqp);
xfs_dqunlock(dqp);

mutex_lock(&qh->qh_lock);
list_del_init(&dqp->q_hashlist);
qh->qh_version++;
mutex_unlock(&qh->qh_lock);
mutex_lock(&qi->qi_tree_lock);
radix_tree_delete(XFS_DQUOT_TREE(qi, dqp->q_core.d_flags),
be32_to_cpu(dqp->q_core.d_id));
mutex_unlock(&qi->qi_tree_lock);

mutex_lock(&qi->qi_dqlist_lock);
list_del_init(&dqp->q_mplist);
Expand Down
12 changes: 0 additions & 12 deletions fs/xfs/xfs_dquot.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,6 @@
* when quotas are off.
*/

/*
* The hash chain headers (hash buckets)
*/
typedef struct xfs_dqhash {
struct list_head qh_list;
struct mutex qh_lock;
uint qh_version; /* ever increasing version */
uint qh_nelems; /* number of dquots on the list */
} xfs_dqhash_t;

struct xfs_mount;
struct xfs_trans;

Expand All @@ -49,8 +39,6 @@ typedef struct xfs_dquot {
uint dq_flags; /* various flags (XFS_DQ_*) */
struct list_head q_lru; /* global free list of dquots */
struct list_head q_mplist; /* mount's list of dquots */
struct list_head q_hashlist; /* gloabl hash list of dquots */
xfs_dqhash_t *q_hash; /* the hashchain header */
struct xfs_mount*q_mount; /* filesystem this relates to */
struct xfs_trans*q_transp; /* trans this belongs to currently */
uint q_nrefs; /* # active refs from inodes */
Expand Down
Loading

0 comments on commit 9f920f1

Please sign in to comment.