Skip to content

Commit

Permalink
Btrfs: update space balancing code
Browse files Browse the repository at this point in the history
This patch updates the space balancing code to utilize the new
backref format.  Before, btrfs-vol -b would break any COW links
on data blocks or metadata.  This was slow and caused the amount
of space used to explode if a large number of snapshots were present.

The new code can keeps the sharing of all data extents and
most of the tree blocks.

To maintain the sharing of data extents, the space balance code uses
a seperate inode hold data extent pointers, then updates the references
to point to the new location.

To maintain the sharing of tree blocks, the space balance code uses
reloc trees to relocate tree blocks in reference counted roots.
There is one reloc tree for each subvol, and all reloc trees share
same root key objectid. Reloc trees are snapshots of the latest
committed roots of subvols (root->commit_root).

To relocate a tree block referenced by a subvol, there are two steps.
COW the block through subvol's reloc tree, then update block pointer in
the subvol to point to the new block. Since all reloc trees share
same root key objectid, doing special handing for tree blocks
owned by them is easy. Once a tree block has been COWed in one
reloc tree, we can use the resulting new block directly when the
same block is required to COW again through other reloc trees.
In this way, relocated tree blocks are shared between reloc trees,
so they are also shared between subvols.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
  • Loading branch information
Zheng Yan authored and Chris Mason committed Sep 26, 2008
1 parent 5b21f2e commit 1a40e23
Show file tree
Hide file tree
Showing 7 changed files with 1,848 additions and 445 deletions.
155 changes: 153 additions & 2 deletions fs/btrfs/ctree.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
struct extent_buffer *cow;
u32 nritems;
int ret = 0;
int different_trans = 0;
int level;
int unlock_orig = 0;

Expand Down Expand Up @@ -233,20 +232,48 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
WARN_ON(btrfs_header_generation(buf) > trans->transid);
if (btrfs_header_generation(buf) != trans->transid) {
u32 nr_extents;
different_trans = 1;
ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
if (ret)
return ret;

ret = btrfs_cache_ref(trans, root, buf, nr_extents);
WARN_ON(ret);
} else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
/*
* There are only two places that can drop reference to
* tree blocks owned by living reloc trees, one is here,
* the other place is btrfs_merge_path. In both places,
* we check reference count while tree block is locked.
* Furthermore, if reference count is one, it won't get
* increased by someone else.
*/
u32 refs;
ret = btrfs_lookup_extent_ref(trans, root, buf->start,
buf->len, &refs);
BUG_ON(ret);
if (refs == 1) {
ret = btrfs_update_ref(trans, root, buf, cow,
0, nritems);
clean_tree_block(trans, root, buf);
} else {
ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
}
BUG_ON(ret);
} else {
ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
if (ret)
return ret;
clean_tree_block(trans, root, buf);
}

if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
ret = btrfs_add_reloc_mapping(root, buf->start,
buf->len, cow->start);
BUG_ON(ret);
ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
WARN_ON(ret);
}

if (buf == root->node) {
WARN_ON(parent && parent != buf);

Expand Down Expand Up @@ -1466,6 +1493,130 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
return ret;
}

int btrfs_merge_path(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_key *node_keys,
u64 *nodes, int lowest_level)
{
struct extent_buffer *eb;
struct extent_buffer *parent;
struct btrfs_key key;
u64 bytenr;
u64 generation;
u32 blocksize;
int level;
int slot;
int key_match;
int ret;

eb = btrfs_lock_root_node(root);
ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
BUG_ON(ret);

parent = eb;
while (1) {
level = btrfs_header_level(parent);
if (level == 0 || level <= lowest_level)
break;

ret = bin_search(parent, &node_keys[lowest_level], level,
&slot);
if (ret && slot > 0)
slot--;

bytenr = btrfs_node_blockptr(parent, slot);
if (nodes[level - 1] == bytenr)
break;

blocksize = btrfs_level_size(root, level - 1);
generation = btrfs_node_ptr_generation(parent, slot);
btrfs_node_key_to_cpu(eb, &key, slot);
key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));

/*
* if node keys match and node pointer hasn't been modified
* in the running transaction, we can merge the path. for
* reloc trees, the node pointer check is skipped, this is
* because the reloc trees are fully controlled by the space
* balance code, no one else can modify them.
*/
if (!nodes[level - 1] || !key_match ||
(generation == trans->transid &&
root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) {
next_level:
if (level == 1 || level == lowest_level + 1)
break;

eb = read_tree_block(root, bytenr, blocksize,
generation);
btrfs_tree_lock(eb);

ret = btrfs_cow_block(trans, root, eb, parent, slot,
&eb, 0);
BUG_ON(ret);

btrfs_tree_unlock(parent);
free_extent_buffer(parent);
parent = eb;
continue;
}

if (generation == trans->transid) {
u32 refs;
BUG_ON(btrfs_header_owner(eb) !=
BTRFS_TREE_RELOC_OBJECTID);
/*
* lock the block to keep __btrfs_cow_block from
* changing the reference count.
*/
eb = read_tree_block(root, bytenr, blocksize,
generation);
btrfs_tree_lock(eb);

ret = btrfs_lookup_extent_ref(trans, root, bytenr,
blocksize, &refs);
BUG_ON(ret);
/*
* if replace block whose reference count is one,
* we have to "drop the subtree". so skip it for
* simplicity
*/
if (refs == 1) {
btrfs_tree_unlock(eb);
free_extent_buffer(eb);
goto next_level;
}
}

btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
btrfs_set_node_ptr_generation(parent, slot, trans->transid);
btrfs_mark_buffer_dirty(parent);

ret = btrfs_inc_extent_ref(trans, root,
nodes[level - 1],
blocksize, parent->start,
btrfs_header_owner(parent),
btrfs_header_generation(parent),
level - 1, 0);
BUG_ON(ret);
ret = btrfs_free_extent(trans, root, bytenr,
blocksize, parent->start,
btrfs_header_owner(parent),
btrfs_header_generation(parent),
level - 1, 0, 1);
BUG_ON(ret);

if (generation == trans->transid) {
btrfs_tree_unlock(eb);
free_extent_buffer(eb);
}
break;
}
btrfs_tree_unlock(parent);
free_extent_buffer(parent);
return 0;
}

/*
* adjust the pointers going up the tree, starting at level
* making sure the right key of each node is points to 'key'.
Expand Down
26 changes: 25 additions & 1 deletion fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -604,6 +604,7 @@ struct btrfs_fs_info {
struct mutex chunk_mutex;
struct mutex drop_mutex;
struct mutex volume_mutex;
struct mutex tree_reloc_mutex;
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
Expand Down Expand Up @@ -647,6 +648,10 @@ struct btrfs_fs_info {
struct task_struct *cleaner_kthread;
int thread_pool_size;

/* tree relocation relocated fields */
struct extent_io_tree reloc_mapping_tree;
struct list_head dead_reloc_roots;
struct btrfs_leaf_ref_tree reloc_ref_tree;
struct btrfs_leaf_ref_tree shared_ref_tree;

struct kobject super_kobj;
Expand Down Expand Up @@ -698,6 +703,7 @@ struct btrfs_root {
struct btrfs_leaf_ref_tree ref_tree_struct;
struct btrfs_dirty_root *dirty_root;
struct btrfs_root *log_root;
struct btrfs_root *reloc_root;

struct btrfs_root_item root_item;
struct btrfs_key root_key;
Expand Down Expand Up @@ -1517,7 +1523,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize);
int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size);
int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
Expand Down Expand Up @@ -1582,10 +1587,29 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytes_used,
u64 type, u64 chunk_objectid, u64 chunk_offset,
u64 size);
int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start);
int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start);
int btrfs_free_reloc_root(struct btrfs_root *root);
int btrfs_drop_dead_reloc_roots(struct btrfs_root *root);
int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
u64 num_bytes, u64 new_bytenr);
int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr,
u64 num_bytes, u64 *new_bytenr);
void btrfs_free_reloc_mappings(struct btrfs_root *root);
int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf, u64 orig_start);
int btrfs_add_dead_reloc_root(struct btrfs_root *root);
int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
/* ctree.c */
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
int type);
int btrfs_merge_path(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_key *node_keys,
u64 *nodes, int lowest_level);
int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *new_key);
Expand Down
9 changes: 9 additions & 0 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1406,6 +1406,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->btree_inode->i_mapping, GFP_NOFS);
fs_info->do_barriers = 1;

extent_io_tree_init(&fs_info->reloc_mapping_tree,
fs_info->btree_inode->i_mapping, GFP_NOFS);
INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree);
btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree);

BTRFS_I(fs_info->btree_inode)->root = tree_root;
Expand All @@ -1421,6 +1425,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
mutex_init(&fs_info->transaction_kthread_mutex);
mutex_init(&fs_info->cleaner_mutex);
mutex_init(&fs_info->volume_mutex);
mutex_init(&fs_info->tree_reloc_mutex);
init_waitqueue_head(&fs_info->transaction_throttle);
init_waitqueue_head(&fs_info->transaction_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
Expand Down Expand Up @@ -1627,6 +1632,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
ret = btrfs_recover_log_trees(log_tree_root);
BUG_ON(ret);
}

ret = btrfs_cleanup_reloc_trees(tree_root);
BUG_ON(ret);

fs_info->last_trans_committed = btrfs_super_generation(disk_super);
return tree_root;

Expand Down
Loading

0 comments on commit 1a40e23

Please sign in to comment.