Skip to content

Commit

Permalink
Btrfs: switch the btrfs tree locks to reader/writer
Browse files Browse the repository at this point in the history
The btrfs metadata btree is the source of significant
lock contention, especially in the root node.   This
commit changes our locking to use a reader/writer
lock.

The lock is built on top of rw spinlocks, and it
extends the lock tracking to remember if we have a
read lock or a write lock when we go to blocking.  Atomics
count the number of blocking readers or writers at any
given time.

It removes all of the adaptive spinning from the old code
and uses only the spinning/blocking hints inside of btrfs
to decide when it should continue spinning.

In read heavy workloads this is dramatically faster.  In write
heavy workloads we're still faster because of less contention
on the root node lock.

We suffer slightly in dbench because we schedule more often
during write locks, but all other benchmarks so far are improved.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
  • Loading branch information
Chris Mason committed Jul 27, 2011
1 parent 81317fd commit bd68151
Show file tree
Hide file tree
Showing 9 changed files with 431 additions and 218 deletions.
268 changes: 209 additions & 59 deletions fs/btrfs/ctree.c

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2333,7 +2333,7 @@ struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p);
void btrfs_set_path_blocking(struct btrfs_path *p);
void btrfs_clear_path_blocking(struct btrfs_path *p,
struct extent_buffer *held);
struct extent_buffer *held, int held_rw);
void btrfs_unlock_up_safe(struct btrfs_path *p, int level);

int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/delayed-inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans,
}

/* reset all the locked nodes in the patch to spinning locks. */
btrfs_clear_path_blocking(path, NULL);
btrfs_clear_path_blocking(path, NULL, 0);

/* insert the keys of the items */
ret = setup_items_for_insert(trans, root, path, keys, data_size,
Expand Down
20 changes: 10 additions & 10 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -5912,7 +5912,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
return 1;

if (path->locks[level] && !wc->keep_locks) {
btrfs_tree_unlock(eb);
btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
}
return 0;
Expand All @@ -5936,7 +5936,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
* keep the tree lock
*/
if (path->locks[level] && level > 0) {
btrfs_tree_unlock(eb);
btrfs_tree_unlock_rw(eb, path->locks[level]);
path->locks[level] = 0;
}
return 0;
Expand Down Expand Up @@ -6049,7 +6049,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
BUG_ON(level != btrfs_header_level(next));
path->nodes[level] = next;
path->slots[level] = 0;
path->locks[level] = 1;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
wc->level = level;
if (wc->level == 1)
wc->reada_slot = 0;
Expand Down Expand Up @@ -6120,7 +6120,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
BUG_ON(level == 0);
btrfs_tree_lock(eb);
btrfs_set_lock_blocking(eb);
path->locks[level] = 1;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;

ret = btrfs_lookup_extent_info(trans, root,
eb->start, eb->len,
Expand All @@ -6129,8 +6129,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
BUG_ON(ret);
BUG_ON(wc->refs[level] == 0);
if (wc->refs[level] == 1) {
btrfs_tree_unlock(eb);
path->locks[level] = 0;
btrfs_tree_unlock_rw(eb, path->locks[level]);
return 1;
}
}
Expand All @@ -6152,7 +6151,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
btrfs_header_generation(eb) == trans->transid) {
btrfs_tree_lock(eb);
btrfs_set_lock_blocking(eb);
path->locks[level] = 1;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
}
clean_tree_block(trans, root, eb);
}
Expand Down Expand Up @@ -6231,7 +6230,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
return 0;

if (path->locks[level]) {
btrfs_tree_unlock(path->nodes[level]);
btrfs_tree_unlock_rw(path->nodes[level],
path->locks[level]);
path->locks[level] = 0;
}
free_extent_buffer(path->nodes[level]);
Expand Down Expand Up @@ -6283,7 +6283,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
path->nodes[level] = btrfs_lock_root_node(root);
btrfs_set_lock_blocking(path->nodes[level]);
path->slots[level] = 0;
path->locks[level] = 1;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
memset(&wc->update_progress, 0,
sizeof(wc->update_progress));
} else {
Expand Down Expand Up @@ -6451,7 +6451,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
level = btrfs_header_level(node);
path->nodes[level] = node;
path->slots[level] = 0;
path->locks[level] = 1;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;

wc->refs[parent_level] = 1;
wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF;
Expand Down
11 changes: 9 additions & 2 deletions fs/btrfs/extent_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -3017,8 +3017,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
return NULL;
eb->start = start;
eb->len = len;
spin_lock_init(&eb->lock);
init_waitqueue_head(&eb->lock_wq);
rwlock_init(&eb->lock);
atomic_set(&eb->write_locks, 0);
atomic_set(&eb->read_locks, 0);
atomic_set(&eb->blocking_readers, 0);
atomic_set(&eb->blocking_writers, 0);
atomic_set(&eb->spinning_readers, 0);
atomic_set(&eb->spinning_writers, 0);
init_waitqueue_head(&eb->write_lock_wq);
init_waitqueue_head(&eb->read_lock_wq);

#if LEAK_DEBUG
spin_lock_irqsave(&leak_lock, flags);
Expand Down
24 changes: 18 additions & 6 deletions fs/btrfs/extent_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,26 @@ struct extent_buffer {
struct rcu_head rcu_head;
atomic_t refs;

/* the spinlock is used to protect most operations */
spinlock_t lock;
/* count of read lock holders on the extent buffer */
atomic_t write_locks;
atomic_t read_locks;
atomic_t blocking_writers;
atomic_t blocking_readers;
atomic_t spinning_readers;
atomic_t spinning_writers;

/* protects write locks */
rwlock_t lock;

/* readers use lock_wq while they wait for the write
* lock holders to unlock
*/
wait_queue_head_t write_lock_wq;

/*
* when we keep the lock held while blocking, waiters go onto
* the wq
/* writers use read_lock_wq while they wait for readers
* to unlock
*/
wait_queue_head_t lock_wq;
wait_queue_head_t read_lock_wq;
};

static inline void extent_set_compress_type(unsigned long *bio_flags,
Expand Down
Loading

0 comments on commit bd68151

Please sign in to comment.