Skip to content

Commit

Permalink
Btrfs: remove transaction from send
Browse files Browse the repository at this point in the history
Lets try this again.  We can deadlock the box if we send on a box and try to
write onto the same fs with the app that is trying to listen to the send pipe.
This is because the writer could get stuck waiting for a transaction commit
which is being blocked by the send.  So fix this by making sure looking at the
commit roots is always going to be consistent.  We do this by keeping track of
which roots need to have their commit roots swapped during commit, and then
taking the commit_root_sem and swapping them all at once.  Then make sure we
take a read lock on the commit_root_sem in cases where we search the commit root
to make sure we're always looking at a consistent view of the commit roots.
Previously we had problems with this because we would swap a fs tree commit root
and then swap the extent tree commit root independently which would cause the
backref walking code to screw up sometimes.  With this patch we no longer
deadlock and pass all the weird send/receive corner cases.  Thanks,

Reportedy-by: Hugo Mills <hugo@carfax.org.uk>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Chris Mason <clm@fb.com>
  • Loading branch information
Josef Bacik authored and Chris Mason committed Apr 7, 2014
1 parent a26e8c9 commit 9e351cc
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 187 deletions.
33 changes: 27 additions & 6 deletions fs/btrfs/backref.c
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,10 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
goto out;
}

root_level = btrfs_old_root_level(root, time_seq);
if (path->search_commit_root)
root_level = btrfs_header_level(root->commit_root);
else
root_level = btrfs_old_root_level(root, time_seq);

if (root_level + 1 == level) {
srcu_read_unlock(&fs_info->subvol_srcu, index);
Expand Down Expand Up @@ -1099,9 +1102,9 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
*
* returns 0 on success, < 0 on error.
*/
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots)
static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots)
{
struct ulist *tmp;
struct ulist_node *node = NULL;
Expand Down Expand Up @@ -1137,6 +1140,20 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
return 0;
}

int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots)
{
int ret;

if (!trans)
down_read(&fs_info->commit_root_sem);
ret = __btrfs_find_all_roots(trans, fs_info, bytenr, time_seq, roots);
if (!trans)
up_read(&fs_info->commit_root_sem);
return ret;
}

/*
* this makes the path point to (inum INODE_ITEM ioff)
*/
Expand Down Expand Up @@ -1516,6 +1533,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
if (IS_ERR(trans))
return PTR_ERR(trans);
btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
} else {
down_read(&fs_info->commit_root_sem);
}

ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
Expand All @@ -1526,8 +1545,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,

ULIST_ITER_INIT(&ref_uiter);
while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
ret = btrfs_find_all_roots(trans, fs_info, ref_node->val,
tree_mod_seq_elem.seq, &roots);
ret = __btrfs_find_all_roots(trans, fs_info, ref_node->val,
tree_mod_seq_elem.seq, &roots);
if (ret)
break;
ULIST_ITER_INIT(&root_uiter);
Expand All @@ -1549,6 +1568,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
if (!search_commit_root) {
btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
btrfs_end_transaction(trans, fs_info->extent_root);
} else {
up_read(&fs_info->commit_root_sem);
}

return ret;
Expand Down
88 changes: 0 additions & 88 deletions fs/btrfs/ctree.c
Original file line number Diff line number Diff line change
Expand Up @@ -5360,7 +5360,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
{
int ret;
int cmp;
struct btrfs_trans_handle *trans = NULL;
struct btrfs_path *left_path = NULL;
struct btrfs_path *right_path = NULL;
struct btrfs_key left_key;
Expand All @@ -5378,9 +5377,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
u64 right_blockptr;
u64 left_gen;
u64 right_gen;
u64 left_start_ctransid;
u64 right_start_ctransid;
u64 ctransid;

left_path = btrfs_alloc_path();
if (!left_path) {
Expand All @@ -5404,21 +5400,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
right_path->search_commit_root = 1;
right_path->skip_locking = 1;

spin_lock(&left_root->root_item_lock);
left_start_ctransid = btrfs_root_ctransid(&left_root->root_item);
spin_unlock(&left_root->root_item_lock);

spin_lock(&right_root->root_item_lock);
right_start_ctransid = btrfs_root_ctransid(&right_root->root_item);
spin_unlock(&right_root->root_item_lock);

trans = btrfs_join_transaction(left_root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
goto out;
}

/*
* Strategy: Go to the first items of both trees. Then do
*
Expand Down Expand Up @@ -5482,67 +5463,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
advance_left = advance_right = 0;

while (1) {
/*
* We need to make sure the transaction does not get committed
* while we do anything on commit roots. This means, we need to
* join and leave transactions for every item that we process.
*/
if (trans && btrfs_should_end_transaction(trans, left_root)) {
btrfs_release_path(left_path);
btrfs_release_path(right_path);

ret = btrfs_end_transaction(trans, left_root);
trans = NULL;
if (ret < 0)
goto out;
}
/* now rejoin the transaction */
if (!trans) {
trans = btrfs_join_transaction(left_root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
goto out;
}

spin_lock(&left_root->root_item_lock);
ctransid = btrfs_root_ctransid(&left_root->root_item);
spin_unlock(&left_root->root_item_lock);
if (ctransid != left_start_ctransid)
left_start_ctransid = 0;

spin_lock(&right_root->root_item_lock);
ctransid = btrfs_root_ctransid(&right_root->root_item);
spin_unlock(&right_root->root_item_lock);
if (ctransid != right_start_ctransid)
right_start_ctransid = 0;

if (!left_start_ctransid || !right_start_ctransid) {
WARN(1, KERN_WARNING
"BTRFS: btrfs_compare_tree detected "
"a change in one of the trees while "
"iterating. This is probably a "
"bug.\n");
ret = -EIO;
goto out;
}

/*
* the commit root may have changed, so start again
* where we stopped
*/
left_path->lowest_level = left_level;
right_path->lowest_level = right_level;
ret = btrfs_search_slot(NULL, left_root,
&left_key, left_path, 0, 0);
if (ret < 0)
goto out;
ret = btrfs_search_slot(NULL, right_root,
&right_key, right_path, 0, 0);
if (ret < 0)
goto out;
}

if (advance_left && !left_end_reached) {
ret = tree_advance(left_root, left_path, &left_level,
left_root_level,
Expand Down Expand Up @@ -5672,14 +5592,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
btrfs_free_path(left_path);
btrfs_free_path(right_path);
kfree(tmp_buf);

if (trans) {
if (!ret)
ret = btrfs_end_transaction(trans, left_root);
else
btrfs_end_transaction(trans, left_root);
}

return ret;
}

Expand Down
3 changes: 1 addition & 2 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1440,7 +1440,7 @@ struct btrfs_fs_info {
*/
struct mutex ordered_extent_flush_mutex;

struct rw_semaphore extent_commit_sem;
struct rw_semaphore commit_root_sem;

struct rw_semaphore cleanup_work_sem;

Expand Down Expand Up @@ -1711,7 +1711,6 @@ struct btrfs_root {
struct btrfs_block_rsv *block_rsv;

/* free ino cache stuff */
struct mutex fs_commit_mutex;
struct btrfs_free_space_ctl *free_ino_ctl;
enum btrfs_caching_type cached;
spinlock_t cache_lock;
Expand Down
3 changes: 1 addition & 2 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1567,7 +1567,6 @@ int btrfs_init_fs_root(struct btrfs_root *root)
root->subv_writers = writers;

btrfs_init_free_ino_ctl(root);
mutex_init(&root->fs_commit_mutex);
spin_lock_init(&root->cache_lock);
init_waitqueue_head(&root->cache_wait);

Expand Down Expand Up @@ -2345,7 +2344,7 @@ int open_ctree(struct super_block *sb,
mutex_init(&fs_info->transaction_kthread_mutex);
mutex_init(&fs_info->cleaner_mutex);
mutex_init(&fs_info->volume_mutex);
init_rwsem(&fs_info->extent_commit_sem);
init_rwsem(&fs_info->commit_root_sem);
init_rwsem(&fs_info->cleanup_work_sem);
init_rwsem(&fs_info->subvol_sem);
sema_init(&fs_info->uuid_tree_rescan_sem, 1);
Expand Down
20 changes: 10 additions & 10 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,7 @@ static noinline void caching_thread(struct btrfs_work *work)
again:
mutex_lock(&caching_ctl->mutex);
/* need to make sure the commit_root doesn't disappear */
down_read(&fs_info->extent_commit_sem);
down_read(&fs_info->commit_root_sem);

next:
ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
Expand All @@ -443,10 +443,10 @@ static noinline void caching_thread(struct btrfs_work *work)
break;

if (need_resched() ||
rwsem_is_contended(&fs_info->extent_commit_sem)) {
rwsem_is_contended(&fs_info->commit_root_sem)) {
caching_ctl->progress = last;
btrfs_release_path(path);
up_read(&fs_info->extent_commit_sem);
up_read(&fs_info->commit_root_sem);
mutex_unlock(&caching_ctl->mutex);
cond_resched();
goto again;
Expand Down Expand Up @@ -513,7 +513,7 @@ static noinline void caching_thread(struct btrfs_work *work)

err:
btrfs_free_path(path);
up_read(&fs_info->extent_commit_sem);
up_read(&fs_info->commit_root_sem);

free_excluded_extents(extent_root, block_group);

Expand Down Expand Up @@ -633,10 +633,10 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
return 0;
}

down_write(&fs_info->extent_commit_sem);
down_write(&fs_info->commit_root_sem);
atomic_inc(&caching_ctl->count);
list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
up_write(&fs_info->extent_commit_sem);
up_write(&fs_info->commit_root_sem);

btrfs_get_block_group(cache);

Expand Down Expand Up @@ -5471,7 +5471,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_block_group_cache *cache;
struct btrfs_space_info *space_info;

down_write(&fs_info->extent_commit_sem);
down_write(&fs_info->commit_root_sem);

list_for_each_entry_safe(caching_ctl, next,
&fs_info->caching_block_groups, list) {
Expand All @@ -5490,7 +5490,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
else
fs_info->pinned_extents = &fs_info->freed_extents[0];

up_write(&fs_info->extent_commit_sem);
up_write(&fs_info->commit_root_sem);

list_for_each_entry_rcu(space_info, &fs_info->space_info, list)
percpu_counter_set(&space_info->total_bytes_pinned, 0);
Expand Down Expand Up @@ -8256,14 +8256,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
struct btrfs_caching_control *caching_ctl;
struct rb_node *n;

down_write(&info->extent_commit_sem);
down_write(&info->commit_root_sem);
while (!list_empty(&info->caching_block_groups)) {
caching_ctl = list_entry(info->caching_block_groups.next,
struct btrfs_caching_control, list);
list_del(&caching_ctl->list);
put_caching_control(caching_ctl);
}
up_write(&info->extent_commit_sem);
up_write(&info->commit_root_sem);

spin_lock(&info->block_group_cache_lock);
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
Expand Down
14 changes: 7 additions & 7 deletions fs/btrfs/inode-map.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ static int caching_kthread(void *data)
key.type = BTRFS_INODE_ITEM_KEY;
again:
/* need to make sure the commit_root doesn't disappear */
mutex_lock(&root->fs_commit_mutex);
down_read(&fs_info->commit_root_sem);

ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
Expand Down Expand Up @@ -88,7 +88,7 @@ static int caching_kthread(void *data)
btrfs_item_key_to_cpu(leaf, &key, 0);
btrfs_release_path(path);
root->cache_progress = last;
mutex_unlock(&root->fs_commit_mutex);
up_read(&fs_info->commit_root_sem);
schedule_timeout(1);
goto again;
} else
Expand Down Expand Up @@ -127,7 +127,7 @@ static int caching_kthread(void *data)
btrfs_unpin_free_ino(root);
out:
wake_up(&root->cache_wait);
mutex_unlock(&root->fs_commit_mutex);
up_read(&fs_info->commit_root_sem);

btrfs_free_path(path);

Expand Down Expand Up @@ -223,11 +223,11 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
* or the caching work is done.
*/

mutex_lock(&root->fs_commit_mutex);
down_write(&root->fs_info->commit_root_sem);
spin_lock(&root->cache_lock);
if (root->cached == BTRFS_CACHE_FINISHED) {
spin_unlock(&root->cache_lock);
mutex_unlock(&root->fs_commit_mutex);
up_write(&root->fs_info->commit_root_sem);
goto again;
}
spin_unlock(&root->cache_lock);
Expand All @@ -240,7 +240,7 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
else
__btrfs_add_free_space(pinned, objectid, 1);

mutex_unlock(&root->fs_commit_mutex);
up_write(&root->fs_info->commit_root_sem);
}
}

Expand All @@ -250,7 +250,7 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
* and others will just be dropped, because the commit root we were
* searching has changed.
*
* Must be called with root->fs_commit_mutex held
* Must be called with root->fs_info->commit_root_sem held
*/
void btrfs_unpin_free_ino(struct btrfs_root *root)
{
Expand Down
Loading

0 comments on commit 9e351cc

Please sign in to comment.