Skip to content

Commit

Permalink
Btrfs: kill trans_mutex
Browse files Browse the repository at this point in the history
We use trans_mutex for lots of things, here's a basic list

1) To serialize trans_handles joining the currently running transaction
2) To make sure that no new trans handles are started while we are committing
3) To protect the dead_roots list and the transaction lists

Really the serializing trans_handles joining is not too hard, and can really get
bogged down in acquiring a reference to the transaction.  So replace the
trans_mutex with a trans_lock spinlock and use it to do the following

1) Protect fs_info->running_transaction.  All trans handles have to do is check
this, and then take a reference of the transaction and keep on going.
2) Protect the fs_info->trans_list.  This doesn't get used too much, basically
it just holds the current transactions, which will usually just be the currently
committing transaction and the currently running transaction at most.
3) Protect the dead roots list.  This is only ever processed by splicing the
list so this is relatively simple.
4) Protect the fs_info->reloc_ctl stuff.  This is very lightweight and was using
the trans_mutex before, so this is a pretty straightforward change.
5) Protect fs_info->no_trans_join.  Because we don't hold the trans_lock over
the entirety of the commit we need to have a way to block new people from
creating a new transaction while we're doing our work.  So we set no_trans_join
and in join_transaction we test to see if that is set, and if it is we do a
wait_on_commit.
6) Make the transaction use count atomic so we don't need to take locks to
modify it when we're dropping references.
7) Add a commit_lock to the transaction to make sure multiple people trying to
commit the same transaction don't race and commit at the same time.
8) Make open_ioctl_trans an atomic so we don't have to take any locks for ioctl
trans.

I have tested this with xfstests, but obviously it is a pretty hairy change so
lots of testing is greatly appreciated.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
  • Loading branch information
Josef Bacik committed May 23, 2011
1 parent 2a1eb46 commit a4abeea
Show file tree
Hide file tree
Showing 8 changed files with 177 additions and 169 deletions.
6 changes: 3 additions & 3 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,6 @@ struct btrfs_fs_info {
* is required instead of the faster short fsync log commits
*/
u64 last_trans_log_full_commit;
u64 open_ioctl_trans;
unsigned long mount_opt:20;
unsigned long compress_type:4;
u64 max_inline;
Expand All @@ -936,7 +935,6 @@ struct btrfs_fs_info {
struct super_block *sb;
struct inode *btree_inode;
struct backing_dev_info bdi;
struct mutex trans_mutex;
struct mutex tree_log_mutex;
struct mutex transaction_kthread_mutex;
struct mutex cleaner_mutex;
Expand All @@ -957,6 +955,7 @@ struct btrfs_fs_info {
struct rw_semaphore subvol_sem;
struct srcu_struct subvol_srcu;

spinlock_t trans_lock;
struct list_head trans_list;
struct list_head hashers;
struct list_head dead_roots;
Expand All @@ -969,6 +968,7 @@ struct btrfs_fs_info {
atomic_t async_submit_draining;
atomic_t nr_async_bios;
atomic_t async_delalloc_pages;
atomic_t open_ioctl_trans;

/*
* this is used by the balancing code to wait for all the pending
Expand Down Expand Up @@ -1032,6 +1032,7 @@ struct btrfs_fs_info {
int closing;
int log_root_recovering;
int enospc_unlink;
int trans_no_join;

u64 total_pinned;

Expand All @@ -1053,7 +1054,6 @@ struct btrfs_fs_info {
struct reloc_control *reloc_ctl;

spinlock_t delalloc_lock;
spinlock_t new_trans_lock;
u64 delalloc_bytes;

/* data_alloc_cluster is only used in ssd mode */
Expand Down
30 changes: 15 additions & 15 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1551,22 +1551,22 @@ static int transaction_kthread(void *arg)
vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
mutex_lock(&root->fs_info->transaction_kthread_mutex);

spin_lock(&root->fs_info->new_trans_lock);
spin_lock(&root->fs_info->trans_lock);
cur = root->fs_info->running_transaction;
if (!cur) {
spin_unlock(&root->fs_info->new_trans_lock);
spin_unlock(&root->fs_info->trans_lock);
goto sleep;
}

now = get_seconds();
if (!cur->blocked &&
(now < cur->start_time || now - cur->start_time < 30)) {
spin_unlock(&root->fs_info->new_trans_lock);
spin_unlock(&root->fs_info->trans_lock);
delay = HZ * 5;
goto sleep;
}
transid = cur->transid;
spin_unlock(&root->fs_info->new_trans_lock);
spin_unlock(&root->fs_info->trans_lock);

trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
Expand Down Expand Up @@ -1658,7 +1658,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->ordered_operations);
INIT_LIST_HEAD(&fs_info->caching_block_groups);
spin_lock_init(&fs_info->delalloc_lock);
spin_lock_init(&fs_info->new_trans_lock);
spin_lock_init(&fs_info->trans_lock);
spin_lock_init(&fs_info->ref_cache_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
Expand Down Expand Up @@ -1687,6 +1687,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->sb = sb;
fs_info->max_inline = 8192 * 1024;
fs_info->metadata_ratio = 0;
fs_info->trans_no_join = 0;

fs_info->thread_pool_size = min_t(unsigned long,
num_online_cpus() + 2, 8);
Expand Down Expand Up @@ -1735,7 +1736,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
fs_info->do_barriers = 1;


mutex_init(&fs_info->trans_mutex);
mutex_init(&fs_info->ordered_operations_mutex);
mutex_init(&fs_info->tree_log_mutex);
mutex_init(&fs_info->chunk_mutex);
Expand Down Expand Up @@ -3006,10 +3006,13 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)

WARN_ON(1);

mutex_lock(&root->fs_info->trans_mutex);
mutex_lock(&root->fs_info->transaction_kthread_mutex);

spin_lock(&root->fs_info->trans_lock);
list_splice_init(&root->fs_info->trans_list, &list);
root->fs_info->trans_no_join = 1;
spin_unlock(&root->fs_info->trans_lock);

while (!list_empty(&list)) {
t = list_entry(list.next, struct btrfs_transaction, list);
if (!t)
Expand All @@ -3034,23 +3037,18 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
t->blocked = 0;
if (waitqueue_active(&root->fs_info->transaction_wait))
wake_up(&root->fs_info->transaction_wait);
mutex_unlock(&root->fs_info->trans_mutex);

mutex_lock(&root->fs_info->trans_mutex);
t->commit_done = 1;
if (waitqueue_active(&t->commit_wait))
wake_up(&t->commit_wait);
mutex_unlock(&root->fs_info->trans_mutex);

mutex_lock(&root->fs_info->trans_mutex);

btrfs_destroy_pending_snapshots(t);

btrfs_destroy_delalloc_inodes(root);

spin_lock(&root->fs_info->new_trans_lock);
spin_lock(&root->fs_info->trans_lock);
root->fs_info->running_transaction = NULL;
spin_unlock(&root->fs_info->new_trans_lock);
spin_unlock(&root->fs_info->trans_lock);

btrfs_destroy_marked_extents(root, &t->dirty_pages,
EXTENT_DIRTY);
Expand All @@ -3064,8 +3062,10 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
kmem_cache_free(btrfs_transaction_cachep, t);
}

spin_lock(&root->fs_info->trans_lock);
root->fs_info->trans_no_join = 0;
spin_unlock(&root->fs_info->trans_lock);
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
mutex_unlock(&root->fs_info->trans_mutex);

return 0;
}
Expand Down
3 changes: 2 additions & 1 deletion fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -3200,7 +3200,8 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)

/* commit the current transaction and try again */
commit_trans:
if (!committed && !root->fs_info->open_ioctl_trans) {
if (!committed &&
!atomic_read(&root->fs_info->open_ioctl_trans)) {
committed = 1;
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
Expand Down
4 changes: 1 addition & 3 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1222,14 +1222,12 @@ int btrfs_sync_file(struct file *file, int datasync)
* the current transaction, we can bail out now without any
* syncing
*/
mutex_lock(&root->fs_info->trans_mutex);
smp_mb();
if (BTRFS_I(inode)->last_trans <=
root->fs_info->last_trans_committed) {
BTRFS_I(inode)->last_trans = 0;
mutex_unlock(&root->fs_info->trans_mutex);
goto out;
}
mutex_unlock(&root->fs_info->trans_mutex);

/*
* ok we haven't committed the transaction yet, lets do a commit
Expand Down
12 changes: 3 additions & 9 deletions fs/btrfs/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -2177,9 +2177,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
if (ret)
goto out;

mutex_lock(&root->fs_info->trans_mutex);
root->fs_info->open_ioctl_trans++;
mutex_unlock(&root->fs_info->trans_mutex);
atomic_inc(&root->fs_info->open_ioctl_trans);

ret = -ENOMEM;
trans = btrfs_start_ioctl_transaction(root);
Expand All @@ -2190,9 +2188,7 @@ static long btrfs_ioctl_trans_start(struct file *file)
return 0;

out_drop:
mutex_lock(&root->fs_info->trans_mutex);
root->fs_info->open_ioctl_trans--;
mutex_unlock(&root->fs_info->trans_mutex);
atomic_dec(&root->fs_info->open_ioctl_trans);
mnt_drop_write(file->f_path.mnt);
out:
return ret;
Expand Down Expand Up @@ -2426,9 +2422,7 @@ long btrfs_ioctl_trans_end(struct file *file)

btrfs_end_transaction(trans, root);

mutex_lock(&root->fs_info->trans_mutex);
root->fs_info->open_ioctl_trans--;
mutex_unlock(&root->fs_info->trans_mutex);
atomic_dec(&root->fs_info->open_ioctl_trans);

mnt_drop_write(file->f_path.mnt);
return 0;
Expand Down
16 changes: 8 additions & 8 deletions fs/btrfs/relocation.c
Original file line number Diff line number Diff line change
Expand Up @@ -2136,10 +2136,10 @@ int prepare_to_merge(struct reloc_control *rc, int err)
u64 num_bytes = 0;
int ret;

mutex_lock(&root->fs_info->trans_mutex);
spin_lock(&root->fs_info->trans_lock);
rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
rc->merging_rsv_size += rc->nodes_relocated * 2;
mutex_unlock(&root->fs_info->trans_mutex);
spin_unlock(&root->fs_info->trans_lock);
again:
if (!err) {
num_bytes = rc->merging_rsv_size;
Expand Down Expand Up @@ -2208,9 +2208,9 @@ int merge_reloc_roots(struct reloc_control *rc)
int ret;
again:
root = rc->extent_root;
mutex_lock(&root->fs_info->trans_mutex);
spin_lock(&root->fs_info->trans_lock);
list_splice_init(&rc->reloc_roots, &reloc_roots);
mutex_unlock(&root->fs_info->trans_mutex);
spin_unlock(&root->fs_info->trans_lock);

while (!list_empty(&reloc_roots)) {
found = 1;
Expand Down Expand Up @@ -3583,17 +3583,17 @@ int find_next_extent(struct btrfs_trans_handle *trans,
static void set_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
mutex_lock(&fs_info->trans_mutex);
spin_lock(&fs_info->trans_lock);
fs_info->reloc_ctl = rc;
mutex_unlock(&fs_info->trans_mutex);
spin_unlock(&fs_info->trans_lock);
}

static void unset_reloc_control(struct reloc_control *rc)
{
struct btrfs_fs_info *fs_info = rc->extent_root->fs_info;
mutex_lock(&fs_info->trans_mutex);
spin_lock(&fs_info->trans_lock);
fs_info->reloc_ctl = NULL;
mutex_unlock(&fs_info->trans_mutex);
spin_unlock(&fs_info->trans_lock);
}

static int check_extent_flags(u64 flags)
Expand Down
Loading

0 comments on commit a4abeea

Please sign in to comment.