Skip to content

Commit

Permalink
Btrfs: leave btree locks spinning more often
Browse files Browse the repository at this point in the history
btrfs_mark_buffer dirty would set dirty bits in the extent_io tree
for the buffers it was dirtying.  This may require a kmalloc and it
was not atomic.  So, anyone who called btrfs_mark_buffer_dirty had to
set any btree locks they were holding to blocking first.

This commit changes dirty tracking for extent buffers to just use a flag
in the extent buffer.  Now that we have one and only one extent buffer
per page, this can be safely done without losing dirty bits along the way.

This also introduces a path->leave_spinning flag that callers of
btrfs_search_slot can use to indicate they will properly deal with a
path returned where all the locks are spinning instead of blocking.

Many of the btree search callers now expect spinning paths,
resulting in better btree concurrency overall.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
  • Loading branch information
Chris Mason committed Mar 24, 2009
1 parent 89573b9 commit b947343
Show file tree
Hide file tree
Showing 14 changed files with 172 additions and 96 deletions.
19 changes: 11 additions & 8 deletions fs/btrfs/ctree.c
Original file line number Diff line number Diff line change
Expand Up @@ -1684,7 +1684,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
* we don't really know what they plan on doing with the path
* from here on, so for now just mark it as blocking
*/
btrfs_set_path_blocking(p);
if (!p->leave_spinning)
btrfs_set_path_blocking(p);
return ret;
}

Expand Down Expand Up @@ -3032,26 +3033,27 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
return -EAGAIN;
}

btrfs_set_path_blocking(path);
ret = split_leaf(trans, root, &orig_key, path,
sizeof(struct btrfs_item), 1);
path->keep_locks = 0;
BUG_ON(ret);

btrfs_unlock_up_safe(path, 1);
leaf = path->nodes[0];
BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));

split:
/*
* make sure any changes to the path from split_leaf leave it
* in a blocking state
*/
btrfs_set_path_blocking(path);

leaf = path->nodes[0];
BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));

split:
item = btrfs_item_nr(leaf, path->slots[0]);
orig_offset = btrfs_item_offset(leaf, item);
item_size = btrfs_item_size(leaf, item);


buf = kmalloc(item_size, GFP_NOFS);
read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
path->slots[0]), item_size);
Expand Down Expand Up @@ -3545,14 +3547,15 @@ setup_items_for_insert(struct btrfs_trans_handle *trans,
}

btrfs_set_header_nritems(leaf, nritems + nr);
btrfs_mark_buffer_dirty(leaf);

ret = 0;
if (slot == 0) {
struct btrfs_disk_key disk_key;
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
ret = fixup_low_keys(trans, root, path, &disk_key, 1);
}
btrfs_unlock_up_safe(path, 1);
btrfs_mark_buffer_dirty(leaf);

if (btrfs_leaf_free_space(root, leaf) < 0) {
btrfs_print_leaf(root, leaf);
Expand Down Expand Up @@ -3596,7 +3599,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
total_data, total_size, nr);

out:
btrfs_unlock_up_safe(path, 1);
return ret;
}

Expand Down Expand Up @@ -3792,6 +3794,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
slot = path->slots[1];
extent_buffer_get(leaf);

btrfs_set_path_blocking(path);
wret = push_leaf_left(trans, root, path, 1, 1);
if (wret < 0 && wret != -ENOSPC)
ret = wret;
Expand Down
12 changes: 9 additions & 3 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,15 +401,16 @@ struct btrfs_path {
int locks[BTRFS_MAX_LEVEL];
int reada;
/* keep some upper locks as we walk down */
int keep_locks;
int skip_locking;
int lowest_level;

/*
* set by btrfs_split_item, tells search_slot to keep all locks
* and to force calls to keep space in the nodes
*/
int search_for_split;
unsigned int search_for_split:1;
unsigned int keep_locks:1;
unsigned int skip_locking:1;
unsigned int leave_spinning:1;
};

/*
Expand Down Expand Up @@ -779,6 +780,11 @@ struct btrfs_fs_info {
atomic_t throttle_gen;

u64 total_pinned;

/* protected by the delalloc lock, used to keep from writing
* metadata until there is a nice batch
*/
u64 dirty_metadata_bytes;
struct list_head dirty_cowonly_roots;

struct btrfs_fs_devices *fs_devices;
Expand Down
3 changes: 3 additions & 0 deletions fs/btrfs/dir-item.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root
key.objectid = dir;
btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY);
key.offset = btrfs_name_hash(name, name_len);

path = btrfs_alloc_path();
path->leave_spinning = 1;

data_size = sizeof(*dir_item) + name_len;
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
name, name_len);
Expand Down
67 changes: 54 additions & 13 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -668,14 +668,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
static int btree_writepage(struct page *page, struct writeback_control *wbc)
{
struct extent_io_tree *tree;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
struct extent_buffer *eb;
int was_dirty;

tree = &BTRFS_I(page->mapping->host)->io_tree;
if (!(current->flags & PF_MEMALLOC)) {
return extent_write_full_page(tree, page,
btree_get_extent, wbc);
}

if (current->flags & PF_MEMALLOC) {
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
redirty_page_for_writepage(wbc, page);
eb = btrfs_find_tree_block(root, page_offset(page),
PAGE_CACHE_SIZE);
WARN_ON(!eb);

was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
if (!was_dirty) {
spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
spin_unlock(&root->fs_info->delalloc_lock);
}
return extent_write_full_page(tree, page, btree_get_extent, wbc);
free_extent_buffer(eb);

unlock_page(page);
return 0;
}

static int btree_writepages(struct address_space *mapping,
Expand All @@ -684,15 +701,15 @@ static int btree_writepages(struct address_space *mapping,
struct extent_io_tree *tree;
tree = &BTRFS_I(mapping->host)->io_tree;
if (wbc->sync_mode == WB_SYNC_NONE) {
struct btrfs_root *root = BTRFS_I(mapping->host)->root;
u64 num_dirty;
u64 start = 0;
unsigned long thresh = 32 * 1024 * 1024;

if (wbc->for_kupdate)
return 0;

num_dirty = count_range_bits(tree, &start, (u64)-1,
thresh, EXTENT_DIRTY);
/* this is a bit racy, but that's ok */
num_dirty = root->fs_info->dirty_metadata_bytes;
if (num_dirty < thresh)
return 0;
}
Expand Down Expand Up @@ -859,9 +876,17 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
root->fs_info->running_transaction->transid) {
btrfs_assert_tree_locked(buf);

/* ugh, clear_extent_buffer_dirty can be expensive */
btrfs_set_lock_blocking(buf);
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
spin_lock(&root->fs_info->delalloc_lock);
if (root->fs_info->dirty_metadata_bytes >= buf->len)
root->fs_info->dirty_metadata_bytes -= buf->len;
else
WARN_ON(1);
spin_unlock(&root->fs_info->delalloc_lock);
}

/* ugh, clear_extent_buffer_dirty needs to lock the page */
btrfs_set_lock_blocking(buf);
clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
buf);
}
Expand Down Expand Up @@ -2348,8 +2373,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
u64 transid = btrfs_header_generation(buf);
struct inode *btree_inode = root->fs_info->btree_inode;

btrfs_set_lock_blocking(buf);
int was_dirty;

btrfs_assert_tree_locked(buf);
if (transid != root->fs_info->generation) {
Expand All @@ -2360,7 +2384,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
(unsigned long long)root->fs_info->generation);
WARN_ON(1);
}
set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf);
was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
buf);
if (!was_dirty) {
spin_lock(&root->fs_info->delalloc_lock);
root->fs_info->dirty_metadata_bytes += buf->len;
spin_unlock(&root->fs_info->delalloc_lock);
}
}

void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
Expand Down Expand Up @@ -2400,6 +2430,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
int btree_lock_page_hook(struct page *page)
{
struct inode *inode = page->mapping->host;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_buffer *eb;
unsigned long len;
Expand All @@ -2415,6 +2446,16 @@ int btree_lock_page_hook(struct page *page)

btrfs_tree_lock(eb);
btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);

if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
spin_lock(&root->fs_info->delalloc_lock);
if (root->fs_info->dirty_metadata_bytes >= eb->len)
root->fs_info->dirty_metadata_bytes -= eb->len;
else
WARN_ON(1);
spin_unlock(&root->fs_info->delalloc_lock);
}

btrfs_tree_unlock(eb);
free_extent_buffer(eb);
out:
Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/disk-io.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root,
void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr);
int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root);
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf);
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
int wait_on_tree_block_writeback(struct btrfs_root *root,
Expand Down
Loading

0 comments on commit b947343

Please sign in to comment.