Skip to content

Commit

Permalink
Btrfs: recover balance on mount
Browse files Browse the repository at this point in the history
On mount, if balance item is found, resume balance in a separate
kernel thread.

Try to be smart to continue roughly where previous balance (or convert)
was interrupted.  For chunk types that were being converted to some
profile we turn on soft convert, in case of a simple balance we turn on
usage filter and relocate only less-than-90%-full chunks of that type.
These are just heuristics but they help quite a bit, and can be improved
in future.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
  • Loading branch information
Ilya Dryomov committed Jan 16, 2012
1 parent 0940ebf commit 5964101
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 2 deletions.
4 changes: 4 additions & 0 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -2427,6 +2427,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!err)
err = btrfs_orphan_cleanup(fs_info->tree_root);
up_read(&fs_info->cleanup_work_sem);

if (!err)
err = btrfs_recover_balance(fs_info->tree_root);

if (err) {
close_ctree(tree_root);
return ERR_PTR(err);
Expand Down
135 changes: 133 additions & 2 deletions fs/btrfs/volumes.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <linux/random.h>
#include <linux/iocontext.h>
#include <linux/capability.h>
#include <linux/kthread.h>
#include <asm/div64.h>
#include "compat.h"
#include "ctree.h"
Expand Down Expand Up @@ -2164,6 +2165,46 @@ static int del_balance_item(struct btrfs_root *root)
return ret;
}

/*
* This is a heuristic used to reduce the number of chunks balanced on
* resume after balance was interrupted.
*/
static void update_balance_args(struct btrfs_balance_control *bctl)
{
/*
* Turn on soft mode for chunk types that were being converted.
*/
if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;

/*
* Turn on usage filter if is not already used. The idea is
* that chunks that we have already balanced should be
* reasonably full. Don't do it for chunks that are being
* converted - that will keep us from relocating unconverted
* (albeit full) chunks.
*/
if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
!(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
bctl->data.usage = 90;
}
if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
!(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
bctl->sys.usage = 90;
}
if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
!(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
bctl->meta.usage = 90;
}
}

/*
* Should be called with both balance and volume mutexes held to
* serialize other volume operations (add_dev/rm_dev/resize) with
Expand Down Expand Up @@ -2626,10 +2667,18 @@ int btrfs_balance(struct btrfs_balance_control *bctl,

do_balance:
ret = insert_balance_item(fs_info->tree_root, bctl);
if (ret)
if (ret && ret != -EEXIST)
goto out;

set_balance_control(bctl);
if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
BUG_ON(ret == -EEXIST);
set_balance_control(bctl);
} else {
BUG_ON(ret != -EEXIST);
spin_lock(&fs_info->balance_lock);
update_balance_args(bctl);
spin_unlock(&fs_info->balance_lock);
}

mutex_unlock(&fs_info->balance_mutex);

Expand All @@ -2646,7 +2695,89 @@ int btrfs_balance(struct btrfs_balance_control *bctl,

return ret;
out:
if (bctl->flags & BTRFS_BALANCE_RESUME)
__cancel_balance(fs_info);
else
kfree(bctl);
return ret;
}

static int balance_kthread(void *data)
{
struct btrfs_balance_control *bctl =
(struct btrfs_balance_control *)data;
struct btrfs_fs_info *fs_info = bctl->fs_info;
int ret;

mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);

set_balance_control(bctl);

printk(KERN_INFO "btrfs: continuing balance\n");
ret = btrfs_balance(bctl, NULL);

mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
return ret;
}

int btrfs_recover_balance(struct btrfs_root *tree_root)
{
struct task_struct *tsk;
struct btrfs_balance_control *bctl;
struct btrfs_balance_item *item;
struct btrfs_disk_balance_args disk_bargs;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_key key;
int ret;

path = btrfs_alloc_path();
if (!path)
return -ENOMEM;

bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
if (!bctl) {
ret = -ENOMEM;
goto out;
}

key.objectid = BTRFS_BALANCE_OBJECTID;
key.type = BTRFS_BALANCE_ITEM_KEY;
key.offset = 0;

ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
if (ret < 0)
goto out_bctl;
if (ret > 0) { /* ret = -ENOENT; */
ret = 0;
goto out_bctl;
}

leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);

bctl->fs_info = tree_root->fs_info;
bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;

btrfs_balance_data(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
btrfs_balance_meta(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
btrfs_balance_sys(leaf, item, &disk_bargs);
btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);

tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
if (IS_ERR(tsk))
ret = PTR_ERR(tsk);
else
goto out;

out_bctl:
kfree(bctl);
out:
btrfs_free_path(path);
return ret;
}

Expand Down
2 changes: 2 additions & 0 deletions fs/btrfs/volumes.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ struct map_lookup {
BTRFS_BALANCE_METADATA)

#define BTRFS_BALANCE_FORCE (1ULL << 3)
#define BTRFS_BALANCE_RESUME (1ULL << 4)

/*
* Balance filters
Expand Down Expand Up @@ -271,6 +272,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
int btrfs_init_new_device(struct btrfs_root *root, char *path);
int btrfs_balance(struct btrfs_balance_control *bctl,
struct btrfs_ioctl_balance_args *bargs);
int btrfs_recover_balance(struct btrfs_root *tree_root);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
Expand Down

0 comments on commit 5964101

Please sign in to comment.