Skip to content

Commit

Permalink
btrfs: zoned: serialize log transaction on zoned filesystems
Browse files Browse the repository at this point in the history
This is the 2/3 patch to enable tree-log on zoned filesystems.

Since we can start more than one log transactions per subvolume
simultaneously, nodes from multiple transactions can be allocated
interleaved. Such mixed allocation results in non-sequential writes at
the time of a log transaction commit. The nodes of the global log root
tree (fs_info->log_root_tree), also have the same problem with mixed
allocation.

Serializes log transactions by waiting for a committing transaction when
someone tries to start a new transaction, to avoid the mixed allocation
problem. We must also wait for running log transactions from another
subvolume, but there is no easy way to detect which subvolume root is
running a log transaction. So, this patch forbids starting a new log
transaction when other subvolumes already allocated the global log root
tree.

Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Signed-off-by: David Sterba <dsterba@suse.com>
  • Loading branch information
Naohiro Aota authored and David Sterba committed Feb 9, 2021
1 parent 40ab3be commit fa1a0f4
Showing 1 changed file with 33 additions and 1 deletion.
34 changes: 33 additions & 1 deletion fs/btrfs/tree-log.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
struct btrfs_root *log,
struct btrfs_path *path,
u64 dirid, int del_all);
static void wait_log_commit(struct btrfs_root *root, int transid);

/*
* tree logging is a special write ahead log used to make sure that
Expand Down Expand Up @@ -140,7 +141,9 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_root *tree_root = fs_info->tree_root;
const bool zoned = btrfs_is_zoned(fs_info);
int ret = 0;
bool created = false;

/*
* First check if the log root tree was already created. If not, create
Expand All @@ -150,8 +153,10 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
mutex_lock(&tree_root->log_mutex);
if (!fs_info->log_root_tree) {
ret = btrfs_init_log_root_tree(trans, fs_info);
if (!ret)
if (!ret) {
set_bit(BTRFS_ROOT_HAS_LOG_TREE, &tree_root->state);
created = true;
}
}
mutex_unlock(&tree_root->log_mutex);
if (ret)
Expand All @@ -160,19 +165,38 @@ static int start_log_trans(struct btrfs_trans_handle *trans,

mutex_lock(&root->log_mutex);

again:
if (root->log_root) {
int index = (root->log_transid + 1) % 2;

if (btrfs_need_log_full_commit(trans)) {
ret = -EAGAIN;
goto out;
}

if (zoned && atomic_read(&root->log_commit[index])) {
wait_log_commit(root, root->log_transid - 1);
goto again;
}

if (!root->log_start_pid) {
clear_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
root->log_start_pid = current->pid;
} else if (root->log_start_pid != current->pid) {
set_bit(BTRFS_ROOT_MULTI_LOG_TASKS, &root->state);
}
} else {
/*
* This means fs_info->log_root_tree was already created
* for some other FS trees. Do the full commit not to mix
* nodes from multiple log transactions to do sequential
* writing.
*/
if (zoned && !created) {
ret = -EAGAIN;
goto out;
}

ret = btrfs_add_log_tree(trans, root);
if (ret)
goto out;
Expand Down Expand Up @@ -201,14 +225,22 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
*/
static int join_running_log_trans(struct btrfs_root *root)
{
const bool zoned = btrfs_is_zoned(root->fs_info);
int ret = -ENOENT;

if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &root->state))
return ret;

mutex_lock(&root->log_mutex);
again:
if (root->log_root) {
int index = (root->log_transid + 1) % 2;

ret = 0;
if (zoned && atomic_read(&root->log_commit[index])) {
wait_log_commit(root, root->log_transid - 1);
goto again;
}
atomic_inc(&root->log_writers);
}
mutex_unlock(&root->log_mutex);
Expand Down

0 comments on commit fa1a0f4

Please sign in to comment.