Skip to content

Commit

Permalink
Btrfs: fix skipped error handle when log sync failed
Browse files Browse the repository at this point in the history
It is possible that many tasks sync the log tree at the same time, but
only one task can do the sync work, the others will wait for it. But those
wait tasks didn't get the result of the log sync, and returned 0 when they
ended the wait. It caused those tasks skipped the error handle, and the
serious problem was they told the users the file sync succeeded but in
fact they failed.

This patch fixes this problem by introducing a log context structure,
we insert it into the a global list. When the sync fails, we will set
the error number of every log context in the list, then the waiting tasks
get the error number of the log context and handle the error if need.

Signed-off-by: Miao Xie <miaox@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
  • Loading branch information
Miao Xie authored and Josef Bacik committed Mar 10, 2014
1 parent bb14a59 commit 8b050d3
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 31 deletions.
1 change: 1 addition & 0 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1718,6 +1718,7 @@ struct btrfs_root {
struct mutex log_mutex;
wait_queue_head_t log_writer_wait;
wait_queue_head_t log_commit_wait[2];
struct list_head log_ctxs[2];
atomic_t log_writers;
atomic_t log_commit[2];
atomic_t log_batch;
Expand Down
2 changes: 2 additions & 0 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1200,6 +1200,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
init_waitqueue_head(&root->log_writer_wait);
init_waitqueue_head(&root->log_commit_wait[0]);
init_waitqueue_head(&root->log_commit_wait[1]);
INIT_LIST_HEAD(&root->log_ctxs[0]);
INIT_LIST_HEAD(&root->log_ctxs[1]);
atomic_set(&root->log_commit[0], 0);
atomic_set(&root->log_commit[1], 0);
atomic_set(&root->log_writers, 0);
Expand Down
9 changes: 6 additions & 3 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1864,8 +1864,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0;
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
int ret = 0;
bool full_sync = 0;

trace_btrfs_sync_file(file, datasync);
Expand Down Expand Up @@ -1959,7 +1960,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
}
trans->sync = true;

ret = btrfs_log_dentry_safe(trans, root, dentry);
btrfs_init_log_ctx(&ctx);

ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx);
if (ret < 0) {
/* Fallthrough and commit/free transaction. */
ret = 1;
Expand All @@ -1979,7 +1982,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)

if (ret != BTRFS_NO_LOG_SYNC) {
if (!ret) {
ret = btrfs_sync_log(trans, root);
ret = btrfs_sync_log(trans, root, &ctx);
if (!ret) {
ret = btrfs_end_transaction(trans, root);
goto out;
Expand Down
114 changes: 88 additions & 26 deletions fs/btrfs/tree-log.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,10 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans,
* syncing the tree wait for us to finish
*/
static int start_log_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
struct btrfs_root *root,
struct btrfs_log_ctx *ctx)
{
int index;
int ret;

mutex_lock(&root->log_mutex);
Expand All @@ -151,6 +153,10 @@ static int start_log_trans(struct btrfs_trans_handle *trans,

atomic_inc(&root->log_batch);
atomic_inc(&root->log_writers);
if (ctx) {
index = root->log_transid % 2;
list_add_tail(&ctx->list, &root->log_ctxs[index]);
}
mutex_unlock(&root->log_mutex);
return 0;
}
Expand All @@ -172,6 +178,10 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
root->log_start_pid = current->pid;
atomic_inc(&root->log_batch);
atomic_inc(&root->log_writers);
if (ctx) {
index = root->log_transid % 2;
list_add_tail(&ctx->list, &root->log_ctxs[index]);
}
out:
mutex_unlock(&root->log_mutex);
return ret;
Expand Down Expand Up @@ -2361,25 +2371,18 @@ static int update_log_root(struct btrfs_trans_handle *trans,
return ret;
}

static int wait_log_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int transid)
static void wait_log_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root, int transid)
{
DEFINE_WAIT(wait);
int index = transid % 2;
int ret = 0;

/*
* we only allow two pending log transactions at a time,
* so we know that if ours is more than 2 older than the
* current transaction, we're done
*/
do {
if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) ==
trans->transid) {
ret = -EAGAIN;
break;
}

prepare_to_wait(&root->log_commit_wait[index],
&wait, TASK_UNINTERRUPTIBLE);
mutex_unlock(&root->log_mutex);
Expand All @@ -2392,27 +2395,55 @@ static int wait_log_commit(struct btrfs_trans_handle *trans,
mutex_lock(&root->log_mutex);
} while (root->log_transid < transid + 2 &&
atomic_read(&root->log_commit[index]));

return ret;
}

static void wait_for_writer(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
DEFINE_WAIT(wait);
while (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) !=
trans->transid && atomic_read(&root->log_writers)) {

while (atomic_read(&root->log_writers)) {
prepare_to_wait(&root->log_writer_wait,
&wait, TASK_UNINTERRUPTIBLE);
mutex_unlock(&root->log_mutex);
if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) !=
trans->transid && atomic_read(&root->log_writers))
if (atomic_read(&root->log_writers))
schedule();
mutex_lock(&root->log_mutex);
finish_wait(&root->log_writer_wait, &wait);
}
}

static inline void btrfs_remove_log_ctx(struct btrfs_root *root,
struct btrfs_log_ctx *ctx)
{
if (!ctx)
return;

mutex_lock(&root->log_mutex);
list_del_init(&ctx->list);
mutex_unlock(&root->log_mutex);
}

/*
* Invoked in log mutex context, or be sure there is no other task which
* can access the list.
*/
static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
int index, int error)
{
struct btrfs_log_ctx *ctx;

if (!error) {
INIT_LIST_HEAD(&root->log_ctxs[index]);
return;
}

list_for_each_entry(ctx, &root->log_ctxs[index], list)
ctx->log_ret = error;

INIT_LIST_HEAD(&root->log_ctxs[index]);
}

/*
* btrfs_sync_log does sends a given tree log down to the disk and
* updates the super blocks to record it. When this call is done,
Expand All @@ -2426,7 +2457,7 @@ static void wait_for_writer(struct btrfs_trans_handle *trans,
* that has happened.
*/
int btrfs_sync_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
struct btrfs_root *root, struct btrfs_log_ctx *ctx)
{
int index1;
int index2;
Expand All @@ -2435,15 +2466,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
struct btrfs_root *log = root->log_root;
struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
int log_transid = 0;
struct btrfs_log_ctx root_log_ctx;
struct blk_plug plug;

mutex_lock(&root->log_mutex);
log_transid = root->log_transid;
index1 = root->log_transid % 2;
if (atomic_read(&root->log_commit[index1])) {
ret = wait_log_commit(trans, root, root->log_transid);
wait_log_commit(trans, root, root->log_transid);
mutex_unlock(&root->log_mutex);
return ret;
return ctx->log_ret;
}
atomic_set(&root->log_commit[index1], 1);

Expand Down Expand Up @@ -2534,13 +2566,18 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
}

index2 = log_root_tree->log_transid % 2;

btrfs_init_log_ctx(&root_log_ctx);
list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);

if (atomic_read(&log_root_tree->log_commit[index2])) {
blk_finish_plug(&plug);
btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
ret = wait_log_commit(trans, log_root_tree,
log_root_tree->log_transid);
wait_log_commit(trans, log_root_tree,
log_root_tree->log_transid);
btrfs_free_logged_extents(log, log_transid);
mutex_unlock(&log_root_tree->log_mutex);
ret = root_log_ctx.log_ret;
goto out;
}
atomic_set(&log_root_tree->log_commit[index2], 1);
Expand Down Expand Up @@ -2609,12 +2646,31 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_unlock(&root->log_mutex);

out_wake_log_root:
/*
* We needn't get log_mutex here because we are sure all
* the other tasks are blocked.
*/
btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);

/*
* It is dangerous if log_commit is changed before we set
* ->log_ret of log ctx. Because the readers may not get
* the return value.
*/
smp_wmb();

atomic_set(&log_root_tree->log_commit[index2], 0);
smp_mb();
if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
wake_up(&log_root_tree->log_commit_wait[index2]);
out:
/* See above. */
btrfs_remove_all_log_ctxs(root, index1, ret);

/* See above. */
smp_wmb();
atomic_set(&root->log_commit[index1], 0);

smp_mb();
if (waitqueue_active(&root->log_commit_wait[index1]))
wake_up(&root->log_commit_wait[index1]);
Expand Down Expand Up @@ -4076,7 +4132,8 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
*/
static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode,
struct dentry *parent, int exists_only)
struct dentry *parent, int exists_only,
struct btrfs_log_ctx *ctx)
{
int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
struct super_block *sb;
Expand Down Expand Up @@ -4113,7 +4170,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
goto end_no_trans;
}

ret = start_log_trans(trans, root);
ret = start_log_trans(trans, root, ctx);
if (ret)
goto end_no_trans;

Expand Down Expand Up @@ -4163,6 +4220,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
root->fs_info->last_trans_log_full_commit = trans->transid;
ret = 1;
}

if (ret)
btrfs_remove_log_ctx(root, ctx);
btrfs_end_log_trans(root);
end_no_trans:
return ret;
Expand All @@ -4175,12 +4235,14 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
* data on disk.
*/
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry)
struct btrfs_root *root, struct dentry *dentry,
struct btrfs_log_ctx *ctx)
{
struct dentry *parent = dget_parent(dentry);
int ret;

ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0);
ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent,
0, ctx);
dput(parent);

return ret;
Expand Down Expand Up @@ -4417,6 +4479,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
root->fs_info->last_trans_committed))
return 0;

return btrfs_log_inode_parent(trans, root, inode, parent, 1);
return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL);
}

16 changes: 14 additions & 2 deletions fs/btrfs/tree-log.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,26 @@
/* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
#define BTRFS_NO_LOG_SYNC 256

struct btrfs_log_ctx {
int log_ret;
struct list_head list;
};

static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx)
{
ctx->log_ret = 0;
INIT_LIST_HEAD(&ctx->list);
}

int btrfs_sync_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct btrfs_root *root, struct btrfs_log_ctx *ctx);
int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root);
int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_recover_log_trees(struct btrfs_root *tree_root);
int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct dentry *dentry);
struct btrfs_root *root, struct dentry *dentry,
struct btrfs_log_ctx *ctx);
int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
const char *name, int name_len,
Expand Down

0 comments on commit 8b050d3

Please sign in to comment.