Skip to content

Commit

Permalink
Btrfs: reduce CPU contention while waiting for delayed extent operations
Browse files Browse the repository at this point in the history
We batch up operations to the extent allocation tree, which allows
us to deal with the recursive nature of using the extent allocation
tree to allocate extents to the extent allocation tree.

It also provides a mechanism to sort and collect extent
operations, which makes it much more efficient to record extents
that are close together.

The delayed extent operations must all be finished before the
running transaction commits, so we have code to make sure and run a few
of the batched operations when closing our transaction handles.

This creates a great deal of contention for the locks in the
delayed extent operation tree, and also contention for the lock on the
extent allocation tree itself.  All the extra contention just slows
down the operations and doesn't get things done any faster.

This commit changes things to use a wait queue instead.  As procs
want to run the delayed operations, one of them races in and gets
permission to hit the tree, and the others step back and wait for
progress to be made.

Signed-off-by: Chris Mason <chris.mason@fusionio.com>
  • Loading branch information
Chris Mason committed Feb 1, 2013
1 parent 242e18c commit bb72170
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 5 deletions.
9 changes: 9 additions & 0 deletions fs/btrfs/delayed-ref.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,15 @@ struct btrfs_delayed_ref_root {
/* total number of head nodes ready for processing */
unsigned long num_heads_ready;

/*
* bumped when someone is making progress on the delayed
* refs, so that other procs know they are just adding to
* contention intead of helping
*/
atomic_t procs_running_refs;
atomic_t ref_seq;
wait_queue_head_t wait;

/*
* set when the tree is flushing before a transaction commit,
* used by the throttling code to decide if new updates need
Expand Down
60 changes: 56 additions & 4 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -2438,6 +2438,16 @@ int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
return ret;
}

static int refs_newer(struct btrfs_delayed_ref_root *delayed_refs, int seq,
int count)
{
int val = atomic_read(&delayed_refs->ref_seq);

if (val < seq || val >= seq + count)
return 1;
return 0;
}

/*
* this starts processing the delayed reference count updates and
* extent insertions we have queued up so far. count can be
Expand Down Expand Up @@ -2472,6 +2482,44 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,

delayed_refs = &trans->transaction->delayed_refs;
INIT_LIST_HEAD(&cluster);
if (count == 0) {
count = delayed_refs->num_entries * 2;
run_most = 1;
}

if (!run_all && !run_most) {
int old;
int seq = atomic_read(&delayed_refs->ref_seq);

progress:
old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
if (old) {
DEFINE_WAIT(__wait);
if (delayed_refs->num_entries < 16348)
return 0;

prepare_to_wait(&delayed_refs->wait, &__wait,
TASK_UNINTERRUPTIBLE);

old = atomic_cmpxchg(&delayed_refs->procs_running_refs, 0, 1);
if (old) {
schedule();
finish_wait(&delayed_refs->wait, &__wait);

if (!refs_newer(delayed_refs, seq, 256))
goto progress;
else
return 0;
} else {
finish_wait(&delayed_refs->wait, &__wait);
goto again;
}
}

} else {
atomic_inc(&delayed_refs->procs_running_refs);
}

again:
loops = 0;
spin_lock(&delayed_refs->lock);
Expand All @@ -2480,10 +2528,6 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
delayed_refs->run_delayed_start = find_middle(&delayed_refs->root);
#endif

if (count == 0) {
count = delayed_refs->num_entries * 2;
run_most = 1;
}
while (1) {
if (!(run_all || run_most) &&
delayed_refs->num_heads_ready < 64)
Expand All @@ -2505,9 +2549,12 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
if (ret < 0) {
spin_unlock(&delayed_refs->lock);
btrfs_abort_transaction(trans, root, ret);
atomic_dec(&delayed_refs->procs_running_refs);
return ret;
}

atomic_add(ret, &delayed_refs->ref_seq);

count -= min_t(unsigned long, ret, count);

if (count == 0)
Expand Down Expand Up @@ -2576,6 +2623,11 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
goto again;
}
out:
atomic_dec(&delayed_refs->procs_running_refs);
smp_mb();
if (waitqueue_active(&delayed_refs->wait))
wake_up(&delayed_refs->wait);

spin_unlock(&delayed_refs->lock);
assert_qgroups_uptodate(trans);
return 0;
Expand Down
6 changes: 5 additions & 1 deletion fs/btrfs/transaction.c
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ static noinline int join_transaction(struct btrfs_root *root, int type)

spin_lock_init(&cur_trans->commit_lock);
spin_lock_init(&cur_trans->delayed_refs.lock);
atomic_set(&cur_trans->delayed_refs.procs_running_refs, 0);
atomic_set(&cur_trans->delayed_refs.ref_seq, 0);
init_waitqueue_head(&cur_trans->delayed_refs.wait);

INIT_LIST_HEAD(&cur_trans->pending_snapshots);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
Expand Down Expand Up @@ -577,7 +580,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
if (!list_empty(&trans->new_bgs))
btrfs_create_pending_block_groups(trans, root);

while (count < 2) {
while (count < 1) {
unsigned long cur = trans->delayed_ref_updates;
trans->delayed_ref_updates = 0;
if (cur &&
Expand All @@ -589,6 +592,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
}
count++;
}

btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;

Expand Down

0 comments on commit bb72170

Please sign in to comment.