Skip to content

Commit

Permalink
Btrfs: Add run time btree defrag, and an ioctl to force btree defrag
Browse files Browse the repository at this point in the history
This adds two types of btree defrag, a run time form that tries to
defrag recently allocated blocks in the btree when they are still in ram,
and an ioctl that forces defrag of all btree blocks.

File data blocks are not defragged yet, but this can make a huge difference
in sequential btree reads.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
  • Loading branch information
Chris Mason authored and David Woodhouse committed Aug 7, 2007
1 parent 3c69fae commit 6702ed4
Show file tree
Hide file tree
Showing 11 changed files with 553 additions and 89 deletions.
2 changes: 1 addition & 1 deletion fs/btrfs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ifneq ($(KERNELRELEASE),)
obj-m := btrfs.o
btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
hash.o file-item.o inode-item.o inode-map.o disk-io.o \
transaction.o bit-radix.o inode.o file.o
transaction.o bit-radix.o inode.o file.o tree-defrag.o

#btrfs-y := ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \
# root-tree.o dir-item.o hash.o file-item.o inode-item.o \
Expand Down
195 changes: 153 additions & 42 deletions fs/btrfs/ctree.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,44 +65,44 @@ void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
memset(p, 0, sizeof(*p));
}

static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
static int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct buffer_head *buf, struct buffer_head
*parent, int parent_slot, struct buffer_head
**cow_ret)
**cow_ret, u64 search_start, u64 empty_size)
{
struct buffer_head *cow;
struct btrfs_node *cow_node;
int ret;
int ret = 0;
int different_trans = 0;

WARN_ON(root->ref_cows && trans->transid != root->last_trans);
WARN_ON(!buffer_uptodate(buf));
if (trans->transaction != root->fs_info->running_transaction) {
printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
root->fs_info->running_transaction->transid);
WARN_ON(1);
}
if (trans->transid != root->fs_info->generation) {
printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
root->fs_info->generation);
WARN_ON(1);
}
if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
trans->transid) {
*cow_ret = buf;
return 0;
}
cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr);
cow = btrfs_alloc_free_block(trans, root, search_start, empty_size);
if (IS_ERR(cow))
return PTR_ERR(cow);

cow_node = btrfs_buffer_node(cow);
if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
WARN_ON(1);

memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
btrfs_set_header_generation(&cow_node->header, trans->transid);
btrfs_set_header_owner(&cow_node->header, root->root_key.objectid);
ret = btrfs_inc_ref(trans, root, buf);
if (ret)
return ret;

WARN_ON(btrfs_header_generation(btrfs_buffer_header(buf)) >
trans->transid);
if (btrfs_header_generation(btrfs_buffer_header(buf)) !=
trans->transid) {
different_trans = 1;
ret = btrfs_inc_ref(trans, root, buf);
if (ret)
return ret;
} else {
WARN_ON(!root->ref_cows);
clean_tree_block(trans, root, buf);
}

if (buf == root->node) {
root->node = cow;
get_bh(cow);
Expand All @@ -114,6 +114,8 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
bh_blocknr(cow));
btrfs_mark_buffer_dirty(parent);
WARN_ON(btrfs_header_generation(btrfs_buffer_header(parent)) !=
trans->transid);
btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
}
btrfs_block_release(root, buf);
Expand All @@ -122,6 +124,115 @@ static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
return 0;
}

int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct buffer_head *buf, struct buffer_head
*parent, int parent_slot, struct buffer_head
**cow_ret)
{
u64 search_start;
if (trans->transaction != root->fs_info->running_transaction) {
printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
root->fs_info->running_transaction->transid);
WARN_ON(1);
}
if (trans->transid != root->fs_info->generation) {
printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
root->fs_info->generation);
WARN_ON(1);
}
if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
trans->transid) {
*cow_ret = buf;
return 0;
}

search_start = bh_blocknr(buf) & ~((u64)65535);
return __btrfs_cow_block(trans, root, buf, parent,
parent_slot, cow_ret, search_start, 0);
}

static int close_blocks(u64 blocknr, u64 other)
{
if (blocknr < other && other - blocknr < 8)
return 1;
if (blocknr > other && blocknr - other < 8)
return 1;
return 0;
}

int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct buffer_head *parent,
int cache_only)
{
struct btrfs_node *parent_node;
struct buffer_head *cur_bh;
struct buffer_head *tmp_bh;
u64 blocknr;
u64 search_start = 0;
u64 other;
u32 parent_nritems;
int start_slot;
int end_slot;
int i;
int err = 0;

if (trans->transaction != root->fs_info->running_transaction) {
printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
root->fs_info->running_transaction->transid);
WARN_ON(1);
}
if (trans->transid != root->fs_info->generation) {
printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
root->fs_info->generation);
WARN_ON(1);
}
parent_node = btrfs_buffer_node(parent);
parent_nritems = btrfs_header_nritems(&parent_node->header);

start_slot = 0;
end_slot = parent_nritems;

if (parent_nritems == 1)
return 0;

for (i = start_slot; i < end_slot; i++) {
int close = 1;
blocknr = btrfs_node_blockptr(parent_node, i);
if (i > 0) {
other = btrfs_node_blockptr(parent_node, i - 1);
close = close_blocks(blocknr, other);
}
if (close && i < end_slot - 1) {
other = btrfs_node_blockptr(parent_node, i + 1);
close = close_blocks(blocknr, other);
}
if (close)
continue;

cur_bh = btrfs_find_tree_block(root, blocknr);
if (!cur_bh || !buffer_uptodate(cur_bh) ||
buffer_locked(cur_bh)) {
if (cache_only) {
brelse(cur_bh);
continue;
}
brelse(cur_bh);
cur_bh = read_tree_block(root, blocknr);
}
if (search_start == 0) {
search_start = bh_blocknr(cur_bh) & ~((u64)65535);
}
err = __btrfs_cow_block(trans, root, cur_bh, parent, i,
&tmp_bh, search_start,
min(8, end_slot - i));
if (err)
break;
search_start = bh_blocknr(tmp_bh);
brelse(tmp_bh);
}
return err;
}

/*
* The leaf data grows from end-to-front in the node.
* this returns the address of the start of the last item,
Expand Down Expand Up @@ -221,6 +332,7 @@ static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,

parent_slot = path->slots[level + 1];
parent_key = &parent->ptrs[parent_slot].key;

BUG_ON(memcmp(parent_key, &leaf->items[0].key,
sizeof(struct btrfs_disk_key)));
BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
Expand Down Expand Up @@ -643,7 +755,7 @@ static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
* readahead one full node of leaves
*/
static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
int slot)
int level, int slot)
{
struct btrfs_node *node;
int i;
Expand All @@ -659,10 +771,13 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
unsigned long gang[8];
struct buffer_head *bh;

if (!path->nodes[1])
if (level == 0)
return;

if (!path->nodes[level])
return;

node = btrfs_buffer_node(path->nodes[1]);
node = btrfs_buffer_node(path->nodes[level]);
search = btrfs_node_blockptr(node, slot);
bh = btrfs_find_tree_block(root, search);
if (bh) {
Expand Down Expand Up @@ -690,7 +805,7 @@ static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
for (i = 0; i < ret; i++) {
blocknr = gang[i];
clear_radix_bit(&found, blocknr);
if (nread > 64)
if (nread > 32)
continue;
if (direction > 0 && cluster_start <= blocknr &&
cluster_start + 8 > blocknr) {
Expand Down Expand Up @@ -726,19 +841,15 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
struct buffer_head *b;
struct buffer_head *cow_buf;
struct btrfs_node *c;
struct btrfs_root_item *root_item = &root->root_item;
u64 blocknr;
int slot;
int ret;
int level;
int should_reada = p->reada;
u8 lowest_level = 0;

if (btrfs_root_refs(root_item) == 0 && root->ref_cows) {
lowest_level = root_item->drop_level;
WARN_ON(ins_len || cow);
}

lowest_level = p->lowest_level;
WARN_ON(lowest_level && ins_len);
WARN_ON(p->nodes[0] != NULL);
WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
again:
Expand Down Expand Up @@ -798,8 +909,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
if (level == lowest_level)
break;
blocknr = btrfs_node_blockptr(c, slot);
if (level == 1 && should_reada)
reada_for_search(root, p, slot);
if (should_reada)
reada_for_search(root, p, level, slot);
b = read_tree_block(root, btrfs_node_blockptr(c, slot));

} else {
Expand Down Expand Up @@ -960,7 +1071,7 @@ static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
BUG_ON(path->nodes[level]);
BUG_ON(path->nodes[level-1] != root->node);

t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr);
t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr, 0);
if (IS_ERR(t))
return PTR_ERR(t);
c = btrfs_buffer_node(t);
Expand Down Expand Up @@ -1070,7 +1181,7 @@ static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
}

c_nritems = btrfs_header_nritems(&c->header);
split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr);
split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr, 0);
if (IS_ERR(split_buffer))
return PTR_ERR(split_buffer);

Expand Down Expand Up @@ -1461,7 +1572,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
nritems = btrfs_header_nritems(&l->header);
mid = (nritems + 1)/ 2;

right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr);
right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
if (IS_ERR(right_buffer))
return PTR_ERR(right_buffer);

Expand Down Expand Up @@ -1560,7 +1671,7 @@ static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root

if (!double_split)
return ret;
right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr);
right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr, 0);
if (IS_ERR(right_buffer))
return PTR_ERR(right_buffer);

Expand Down Expand Up @@ -1988,8 +2099,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
blocknr = btrfs_node_blockptr(c_node, slot);
if (next)
btrfs_block_release(root, next);
if (level == 1 && path->reada)
reada_for_search(root, path, slot);
if (path->reada)
reada_for_search(root, path, level, slot);
next = read_tree_block(root, blocknr);
break;
}
Expand All @@ -2002,8 +2113,8 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
path->slots[level] = 0;
if (!level)
break;
if (level == 1 && path->reada)
reada_for_search(root, path, slot);
if (path->reada)
reada_for_search(root, path, level, slot);
next = read_tree_block(root,
btrfs_node_blockptr(btrfs_buffer_node(next), 0));
}
Expand Down
19 changes: 17 additions & 2 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ struct btrfs_path {
struct buffer_head *nodes[BTRFS_MAX_LEVEL];
int slots[BTRFS_MAX_LEVEL];
int reada;
int lowest_level;
};

/*
Expand Down Expand Up @@ -338,6 +339,9 @@ struct btrfs_root {
u64 highest_inode;
u64 last_inode_alloc;
int ref_cows;
struct btrfs_key defrag_progress;
int defrag_running;
int defrag_level;
};

/* the lower bits in the key flags defines the item type */
Expand Down Expand Up @@ -1031,10 +1035,11 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 hint);
struct btrfs_root *root, u64 hint,
u64 empty_size);
int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 owner,
u64 num_blocks, u64 search_start,
u64 num_blocks, u64 empty_size, u64 search_start,
u64 search_end, struct btrfs_key *ins, int data);
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct buffer_head *buf);
Expand All @@ -1051,6 +1056,10 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
int btrfs_free_block_groups(struct btrfs_fs_info *info);
int btrfs_read_block_groups(struct btrfs_root *root);
/* ctree.c */
int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct buffer_head *buf, struct buffer_head
*parent, int parent_slot, struct buffer_head
**cow_ret);
int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_path *path, u32 data_size);
int btrfs_truncate_item(struct btrfs_trans_handle *trans,
Expand All @@ -1060,6 +1069,9 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, struct btrfs_path *p, int
ins_len, int cow);
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct buffer_head *parent,
int cache_only);
void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p);
struct btrfs_path *btrfs_alloc_path(void);
void btrfs_free_path(struct btrfs_path *p);
Expand Down Expand Up @@ -1171,4 +1183,7 @@ extern struct file_operations btrfs_file_operations;
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
u64 start, u64 end, u64 *hint_block);
/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int cache_only);
#endif
Loading

0 comments on commit 6702ed4

Please sign in to comment.