Skip to content

Commit

Permalink
Btrfs: fix tree logs parallel sync
Browse files Browse the repository at this point in the history
To improve performance, btrfs_sync_log merges tree log sync
requests. But it wrongly merges sync requests for different
tree logs. If multiple tree logs are synced at the same time,
only one of them actually gets synced.

This patch has following changes to fix the bug:

Move most tree log related fields in btrfs_fs_info to
btrfs_root. This allows merging sync requests separately
for each tree log.

Don't insert root item into the log root tree immediately
after log tree is allocated. Root item for log tree is
inserted when log tree get synced for the first time. This
allows syncing the log root tree without first syncing all
log trees.

At tree-log sync, btrfs_sync_log first sync the log tree;
then updates corresponding root item in the log root tree;
sync the log root tree; then update the super block.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
  • Loading branch information
Yan Zheng authored and Chris Mason committed Jan 21, 2009
1 parent 7e66285 commit 7237f18
Show file tree
Hide file tree
Showing 6 changed files with 248 additions and 210 deletions.
13 changes: 7 additions & 6 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -695,9 +695,7 @@ struct btrfs_fs_info {
struct btrfs_transaction *running_transaction;
wait_queue_head_t transaction_throttle;
wait_queue_head_t transaction_wait;

wait_queue_head_t async_submit_wait;
wait_queue_head_t tree_log_wait;

struct btrfs_super_block super_copy;
struct btrfs_super_block super_for_commit;
Expand All @@ -724,10 +722,6 @@ struct btrfs_fs_info {
atomic_t async_submit_draining;
atomic_t nr_async_bios;
atomic_t async_delalloc_pages;
atomic_t tree_log_writers;
atomic_t tree_log_commit;
unsigned long tree_log_batch;
u64 tree_log_transid;

/*
* this is used by the balancing code to wait for all the pending
Expand Down Expand Up @@ -827,7 +821,14 @@ struct btrfs_root {
struct kobject root_kobj;
struct completion kobj_unregister;
struct mutex objectid_mutex;

struct mutex log_mutex;
wait_queue_head_t log_writer_wait;
wait_queue_head_t log_commit_wait[2];
atomic_t log_writers;
atomic_t log_commit[2];
unsigned long log_transid;
unsigned long log_batch;

u64 objectid;
u64 last_trans;
Expand Down
79 changes: 68 additions & 11 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,14 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
spin_lock_init(&root->list_lock);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
init_waitqueue_head(&root->log_writer_wait);
init_waitqueue_head(&root->log_commit_wait[0]);
init_waitqueue_head(&root->log_commit_wait[1]);
atomic_set(&root->log_commit[0], 0);
atomic_set(&root->log_commit[1], 0);
atomic_set(&root->log_writers, 0);
root->log_batch = 0;
root->log_transid = 0;
extent_io_tree_init(&root->dirty_log_pages,
fs_info->btree_inode->i_mapping, GFP_NOFS);

Expand Down Expand Up @@ -933,15 +941,16 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
return 0;
}

int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
struct btrfs_root *tree_root = fs_info->tree_root;
struct extent_buffer *leaf;

root = kzalloc(sizeof(*root), GFP_NOFS);
if (!root)
return -ENOMEM;
return ERR_PTR(-ENOMEM);

__setup_root(tree_root->nodesize, tree_root->leafsize,
tree_root->sectorsize, tree_root->stripesize,
Expand All @@ -950,12 +959,23 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
root->root_key.objectid = BTRFS_TREE_LOG_OBJECTID;
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
root->root_key.offset = BTRFS_TREE_LOG_OBJECTID;
/*
* log trees do not get reference counted because they go away
* before a real commit is actually done. They do store pointers
* to file data extents, and those reference counts still get
* updated (along with back refs to the log tree).
*/
root->ref_cows = 0;

root->node = btrfs_alloc_free_block(trans, root, root->leafsize,
0, BTRFS_TREE_LOG_OBJECTID,
trans->transid, 0, 0, 0);
leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
0, BTRFS_TREE_LOG_OBJECTID,
trans->transid, 0, 0, 0);
if (IS_ERR(leaf)) {
kfree(root);
return ERR_CAST(leaf);
}

root->node = leaf;
btrfs_set_header_nritems(root->node, 0);
btrfs_set_header_level(root->node, 0);
btrfs_set_header_bytenr(root->node, root->node->start);
Expand All @@ -967,7 +987,48 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
BTRFS_FSID_SIZE);
btrfs_mark_buffer_dirty(root->node);
btrfs_tree_unlock(root->node);
fs_info->log_root_tree = root;
return root;
}

int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info)
{
struct btrfs_root *log_root;

log_root = alloc_log_tree(trans, fs_info);
if (IS_ERR(log_root))
return PTR_ERR(log_root);
WARN_ON(fs_info->log_root_tree);
fs_info->log_root_tree = log_root;
return 0;
}

int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_root *log_root;
struct btrfs_inode_item *inode_item;

log_root = alloc_log_tree(trans, root->fs_info);
if (IS_ERR(log_root))
return PTR_ERR(log_root);

log_root->last_trans = trans->transid;
log_root->root_key.offset = root->root_key.objectid;

inode_item = &log_root->root_item.inode;
inode_item->generation = cpu_to_le64(1);
inode_item->size = cpu_to_le64(3);
inode_item->nlink = cpu_to_le32(1);
inode_item->nbytes = cpu_to_le64(root->leafsize);
inode_item->mode = cpu_to_le32(S_IFDIR | 0755);

btrfs_set_root_bytenr(&log_root->root_item, log_root->node->start);
btrfs_set_root_generation(&log_root->root_item, trans->transid);

WARN_ON(root->log_root);
root->log_root = log_root;
root->log_transid = 0;
return 0;
}

Expand Down Expand Up @@ -1530,10 +1591,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->transaction_throttle);
init_waitqueue_head(&fs_info->transaction_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
init_waitqueue_head(&fs_info->tree_log_wait);
atomic_set(&fs_info->tree_log_commit, 0);
atomic_set(&fs_info->tree_log_writers, 0);
fs_info->tree_log_transid = 0;

__setup_root(4096, 4096, 4096, 4096, tree_root,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
Expand Down
2 changes: 2 additions & 0 deletions fs/btrfs/disk-io.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btree_lock_page_hook(struct page *page);
#endif
10 changes: 3 additions & 7 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -2698,13 +2698,9 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
/* if metadata always pin */
if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) {
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_block_group_cache *cache;

/* btrfs_free_reserved_extent */
cache = btrfs_lookup_block_group(root->fs_info, bytenr);
BUG_ON(!cache);
btrfs_add_free_space(cache, bytenr, num_bytes);
put_block_group(cache);
mutex_lock(&root->fs_info->pinned_mutex);
btrfs_update_pinned_extents(root, bytenr, num_bytes, 1);
mutex_unlock(&root->fs_info->pinned_mutex);
update_reserved_extents(root, bytenr, num_bytes, 0);
return 0;
}
Expand Down
4 changes: 2 additions & 2 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1214,10 +1214,10 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
}
mutex_unlock(&root->fs_info->trans_mutex);

root->fs_info->tree_log_batch++;
root->log_batch++;
filemap_fdatawrite(inode->i_mapping);
btrfs_wait_ordered_range(inode, 0, (u64)-1);
root->fs_info->tree_log_batch++;
root->log_batch++;

/*
* ok we haven't committed the transaction yet, lets do a commit
Expand Down
Loading

0 comments on commit 7237f18

Please sign in to comment.