Skip to content

Commit

Permalink
Btrfs: Online btree defragmentation fixes
Browse files Browse the repository at this point in the history
The btree defragger wasn't making forward progress because the new key wasn't
being saved by the btrfs_search_forward function.

This also disables the automatic btree defrag, it wasn't scaling well to
huge filesystems.  The auto-defrag needs to be done differently.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
  • Loading branch information
Chris Mason committed Sep 25, 2008
1 parent 1b1e213 commit 3f157a2
Show file tree
Hide file tree
Showing 9 changed files with 190 additions and 129 deletions.
170 changes: 161 additions & 9 deletions fs/btrfs/ctree.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,21 +63,18 @@ void btrfs_free_path(struct btrfs_path *p)
void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
{
int i;
int keep = p->keep_locks;
int skip = p->skip_locking;

for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
p->slots[i] = 0;
if (!p->nodes[i])
continue;
if (p->locks[i]) {
btrfs_tree_unlock(p->nodes[i]);
p->locks[i] = 0;
}
free_extent_buffer(p->nodes[i]);
p->nodes[i] = NULL;
}
memset(p, 0, sizeof(*p));
p->keep_locks = keep;
p->skip_locking = skip;
}

struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
Expand Down Expand Up @@ -463,8 +460,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
search_start = cur->start;
last_block = cur->start;
*last_ret = search_start;
if (parent_level == 1)
btrfs_clear_buffer_defrag(cur);
btrfs_tree_unlock(cur);
free_extent_buffer(cur);
}
Expand Down Expand Up @@ -2969,8 +2964,138 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
return 1;
}

/*
* A helper function to walk down the tree starting at min_key, and looking
* for nodes or leaves that are either in cache or have a minimum
* transaction id. This is used by the btree defrag code, but could
* also be used to search for blocks that have changed since a given
* transaction id.
*
* This does not cow, but it does stuff the starting key it finds back
* into min_key, so you can call btrfs_search_slot with cow=1 on the
* key and get a writable path.
*
* This does lock as it descends, and path->keep_locks should be set
* to 1 by the caller.
*
* This honors path->lowest_level to prevent descent past a given level
* of the tree.
*
* returns zero if something useful was found, < 0 on error and 1 if there
* was nothing in the tree that matched the search criteria.
*/
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
struct btrfs_path *path, int cache_only,
u64 min_trans)
{
struct extent_buffer *cur;
struct btrfs_key found_key;
int slot;
u32 nritems;
int level;
int ret = 1;

again:
cur = btrfs_lock_root_node(root);
level = btrfs_header_level(cur);
path->nodes[level] = cur;
path->locks[level] = 1;

if (btrfs_header_generation(cur) < min_trans) {
ret = 1;
goto out;
}
while(1) {
nritems = btrfs_header_nritems(cur);
level = btrfs_header_level(cur);
bin_search(cur, min_key, level, &slot);

/* at level = 0, we're done, setup the path and exit */
if (level == 0) {
ret = 0;
path->slots[level] = slot;
btrfs_item_key_to_cpu(cur, &found_key, slot);
goto out;
}
/*
* check this node pointer against the cache_only and
* min_trans parameters. If it isn't in cache or is too
* old, skip to the next one.
*/
while(slot < nritems) {
u64 blockptr;
u64 gen;
struct extent_buffer *tmp;
blockptr = btrfs_node_blockptr(cur, slot);
gen = btrfs_node_ptr_generation(cur, slot);
if (gen < min_trans) {
slot++;
continue;
}
if (!cache_only)
break;

tmp = btrfs_find_tree_block(root, blockptr,
btrfs_level_size(root, level - 1));

if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
free_extent_buffer(tmp);
break;
}
if (tmp)
free_extent_buffer(tmp);
slot++;
}
/*
* we didn't find a candidate key in this node, walk forward
* and find another one
*/
if (slot >= nritems) {
ret = btrfs_find_next_key(root, path, min_key, level,
cache_only, min_trans);
if (ret == 0) {
btrfs_release_path(root, path);
goto again;
} else {
goto out;
}
}
/* save our key for returning back */
btrfs_node_key_to_cpu(cur, &found_key, slot);
path->slots[level] = slot;
if (level == path->lowest_level) {
ret = 0;
unlock_up(path, level, 1);
goto out;
}
cur = read_node_slot(root, cur, slot);

btrfs_tree_lock(cur);
path->locks[level - 1] = 1;
path->nodes[level - 1] = cur;
unlock_up(path, level, 1);
}
out:
if (ret == 0)
memcpy(min_key, &found_key, sizeof(found_key));
return ret;
}

/*
* this is similar to btrfs_next_leaf, but does not try to preserve
* and fixup the path. It looks for and returns the next key in the
* tree based on the current path and the cache_only and min_trans
* parameters.
*
* 0 is returned if another key is found, < 0 if there are any errors
* and 1 is returned if there are no higher keys in the tree
*
* path->keep_locks should be set to 1 on the search made before
* calling this function.
*/
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int lowest_level)
struct btrfs_key *key, int lowest_level,
int cache_only, u64 min_trans)
{
int level = lowest_level;
int slot;
Expand All @@ -2982,6 +3107,7 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,

slot = path->slots[level] + 1;
c = path->nodes[level];
next:
if (slot >= btrfs_header_nritems(c)) {
level++;
if (level == BTRFS_MAX_LEVEL) {
Expand All @@ -2991,8 +3117,28 @@ int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
}
if (level == 0)
btrfs_item_key_to_cpu(c, key, slot);
else
else {
u64 blockptr = btrfs_node_blockptr(c, slot);
u64 gen = btrfs_node_ptr_generation(c, slot);

if (cache_only) {
struct extent_buffer *cur;
cur = btrfs_find_tree_block(root, blockptr,
btrfs_level_size(root, level - 1));
if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
slot++;
if (cur)
free_extent_buffer(cur);
goto next;
}
free_extent_buffer(cur);
}
if (gen < min_trans) {
slot++;
goto next;
}
btrfs_node_key_to_cpu(c, key, slot);
}
return 0;
}
return 1;
Expand Down Expand Up @@ -3095,6 +3241,12 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
return 0;
}

/*
* this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
* searching until it gets past min_objectid or finds an item of 'type'
*
* returns 0 if something is found, 1 if nothing was found and < 0 on error
*/
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
int type)
Expand Down
7 changes: 6 additions & 1 deletion fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,7 @@ struct btrfs_root {
u64 last_inode_alloc;
int ref_cows;
int track_dirty;
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
int defrag_running;
Expand Down Expand Up @@ -1412,7 +1413,11 @@ int btrfs_previous_item(struct btrfs_root *root,
struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *key, int lowest_level);
struct btrfs_key *key, int lowest_level,
int cache_only, u64 min_trans);
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
struct btrfs_path *path, int cache_only,
u64 min_trans);

int btrfs_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
Expand Down
61 changes: 3 additions & 58 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,6 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
btrfs_header_generation(eb));
BUG_ON(ret);
btrfs_clear_buffer_defrag(eb);
found_start = btrfs_header_bytenr(eb);
if (found_start != start) {
printk("warning: eb start incorrect %Lu buffer %Lu len %lu\n",
Expand Down Expand Up @@ -355,7 +354,6 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
}
eb = alloc_extent_buffer(tree, start, len, page, GFP_NOFS);

btrfs_clear_buffer_defrag(eb);
found_start = btrfs_header_bytenr(eb);
if (found_start != start) {
ret = -EIO;
Expand Down Expand Up @@ -736,6 +734,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
memset(&root->root_item, 0, sizeof(root->root_item));
memset(&root->defrag_progress, 0, sizeof(root->defrag_progress));
memset(&root->root_kobj, 0, sizeof(root->root_kobj));
root->defrag_trans_start = fs_info->generation;
init_completion(&root->kobj_unregister);
root->defrag_running = 0;
root->defrag_level = 0;
Expand Down Expand Up @@ -1168,7 +1167,6 @@ static int transaction_kthread(void *arg)
goto sleep;
}
mutex_unlock(&root->fs_info->trans_mutex);
btrfs_defrag_dirty_roots(root->fs_info);
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
sleep:
Expand Down Expand Up @@ -1434,12 +1432,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
tree_root,
"btrfs-transaction");
if (!fs_info->transaction_kthread)
goto fail_trans_kthread;
goto fail_cleaner;


return tree_root;

fail_trans_kthread:
fail_cleaner:
kthread_stop(fs_info->cleaner_kthread);
fail_extent_root:
free_extent_buffer(extent_root->node);
Expand Down Expand Up @@ -1662,7 +1660,6 @@ int close_ctree(struct btrfs_root *root)
kthread_stop(root->fs_info->transaction_kthread);
kthread_stop(root->fs_info->cleaner_kthread);

btrfs_defrag_dirty_roots(root->fs_info);
btrfs_clean_old_snapshots(root);
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
Expand Down Expand Up @@ -1794,58 +1791,6 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
return;
}

void btrfs_set_buffer_defrag(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
buf->start + buf->len - 1, EXTENT_DEFRAG, GFP_NOFS);
}

void btrfs_set_buffer_defrag_done(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
set_extent_bits(&BTRFS_I(btree_inode)->io_tree, buf->start,
buf->start + buf->len - 1, EXTENT_DEFRAG_DONE,
GFP_NOFS);
}

int btrfs_buffer_defrag(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
buf->start, buf->start + buf->len - 1, EXTENT_DEFRAG, 0);
}

int btrfs_buffer_defrag_done(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
return test_range_bit(&BTRFS_I(btree_inode)->io_tree,
buf->start, buf->start + buf->len - 1,
EXTENT_DEFRAG_DONE, 0);
}

int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
buf->start, buf->start + buf->len - 1,
EXTENT_DEFRAG_DONE, GFP_NOFS);
}

int btrfs_clear_buffer_defrag(struct extent_buffer *buf)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
struct inode *btree_inode = root->fs_info->btree_inode;
return clear_extent_bits(&BTRFS_I(btree_inode)->io_tree,
buf->start, buf->start + buf->len - 1,
EXTENT_DEFRAG, GFP_NOFS);
}

int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
{
struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
Expand Down
6 changes: 0 additions & 6 deletions fs/btrfs/disk-io.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,6 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
int wait_on_tree_block_writeback(struct btrfs_root *root,
struct extent_buffer *buf);
void btrfs_set_buffer_defrag(struct extent_buffer *buf);
void btrfs_set_buffer_defrag_done(struct extent_buffer *buf);
int btrfs_buffer_defrag(struct extent_buffer *buf);
int btrfs_buffer_defrag_done(struct extent_buffer *buf);
int btrfs_clear_buffer_defrag(struct extent_buffer *buf);
int btrfs_clear_buffer_defrag_done(struct extent_buffer *buf);
int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid);
u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len);
void btrfs_csum_final(u32 crc, char *result);
Expand Down
2 changes: 0 additions & 2 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -2095,8 +2095,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,

set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
if (!btrfs_test_opt(root, SSD))
btrfs_set_buffer_defrag(buf);
trans->blocks_used++;
return buf;
}
Expand Down
1 change: 0 additions & 1 deletion fs/btrfs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0;
}
btrfs_clean_old_snapshots(root);
btrfs_defrag_dirty_roots(root->fs_info);
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
sb->s_dirt = 0;
Expand Down
Loading

0 comments on commit 3f157a2

Please sign in to comment.