Skip to content

Commit

Permalink
Btrfs: change core code of btrfs to support the device replace operat…
Browse files Browse the repository at this point in the history
…ions

This commit contains all the essential changes to the core code
of Btrfs for support of the device replace procedure.

Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de>
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
  • Loading branch information
Stefan Behrens authored and Josef Bacik committed Dec 12, 2012
1 parent e93c89c commit 8dabb74
Show file tree
Hide file tree
Showing 7 changed files with 111 additions and 14 deletions.
24 changes: 23 additions & 1 deletion fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "inode-map.h"
#include "check-integrity.h"
#include "rcu-string.h"
#include "dev-replace.h"

#ifdef CONFIG_X86
#include <asm/cpufeature.h>
Expand Down Expand Up @@ -2438,7 +2439,11 @@ int open_ctree(struct super_block *sb,
goto fail_tree_roots;
}

btrfs_close_extra_devices(fs_devices);
/*
* keep the device that is marked to be the target device for the
* dev_replace procedure
*/
btrfs_close_extra_devices(fs_info, fs_devices, 0);

if (!fs_devices->latest_bdev) {
printk(KERN_CRIT "btrfs: failed to read devices on %s\n",
Expand Down Expand Up @@ -2510,6 +2515,14 @@ int open_ctree(struct super_block *sb,
goto fail_block_groups;
}

ret = btrfs_init_dev_replace(fs_info);
if (ret) {
pr_err("btrfs: failed to init dev_replace: %d\n", ret);
goto fail_block_groups;
}

btrfs_close_extra_devices(fs_info, fs_devices, 1);

ret = btrfs_init_space_info(fs_info);
if (ret) {
printk(KERN_ERR "Failed to initial space info: %d\n", ret);
Expand Down Expand Up @@ -2658,6 +2671,13 @@ int open_ctree(struct super_block *sb,
return ret;
}

ret = btrfs_resume_dev_replace_async(fs_info);
if (ret) {
pr_warn("btrfs: failed to resume dev_replace\n");
close_ctree(tree_root);
return ret;
}

return 0;

fail_qgroup:
Expand Down Expand Up @@ -3300,6 +3320,8 @@ int close_ctree(struct btrfs_root *root)
/* pause restriper - we want to resume on mount */
btrfs_pause_balance(fs_info);

btrfs_dev_replace_suspend_for_unmount(fs_info);

btrfs_scrub_cancel(fs_info);

/* wait for any defraggers to finish */
Expand Down
17 changes: 17 additions & 0 deletions fs/btrfs/reada.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "volumes.h"
#include "disk-io.h"
#include "transaction.h"
#include "dev-replace.h"

#undef DEBUG

Expand Down Expand Up @@ -331,6 +332,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
int nzones = 0;
int i;
unsigned long index = logical >> PAGE_CACHE_SHIFT;
int dev_replace_is_ongoing;

spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
Expand Down Expand Up @@ -392,20 +394,25 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
}

/* insert extent in reada_tree + all per-device trees, all or nothing */
btrfs_dev_replace_lock(&fs_info->dev_replace);
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re);
if (ret == -EEXIST) {
re_exist = radix_tree_lookup(&fs_info->reada_tree, index);
BUG_ON(!re_exist);
re_exist->refcnt++;
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
if (ret) {
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
prev_dev = NULL;
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
&fs_info->dev_replace);
for (i = 0; i < nzones; ++i) {
dev = bbio->stripes[i].dev;
if (dev == prev_dev) {
Expand All @@ -422,6 +429,14 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
/* cannot read ahead on missing device */
continue;
}
if (dev_replace_is_ongoing &&
dev == fs_info->dev_replace.tgtdev) {
/*
* as this device is selected for reading only as
* a last resort, skip it for read ahead.
*/
continue;
}
prev_dev = dev;
ret = radix_tree_insert(&dev->reada_extents, index, re);
if (ret) {
Expand All @@ -434,10 +449,12 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(fs_info == NULL);
radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);
goto error;
}
}
spin_unlock(&fs_info->reada_lock);
btrfs_dev_replace_unlock(&fs_info->dev_replace);

kfree(bbio);
return re;
Expand Down
7 changes: 6 additions & 1 deletion fs/btrfs/scrub.c
Original file line number Diff line number Diff line change
Expand Up @@ -2843,12 +2843,17 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return -EIO;
}

if (dev->scrub_device) {
btrfs_dev_replace_lock(&fs_info->dev_replace);
if (dev->scrub_device ||
(!is_dev_replace &&
btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
btrfs_dev_replace_unlock(&fs_info->dev_replace);
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
scrub_workers_put(fs_info);
return -EINPROGRESS;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace);
sctx = scrub_setup_ctx(dev, is_dev_replace);
if (IS_ERR(sctx)) {
mutex_unlock(&fs_info->scrub_lock);
Expand Down
13 changes: 13 additions & 0 deletions fs/btrfs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include "export.h"
#include "compression.h"
#include "rcu-string.h"
#include "dev-replace.h"

#define CREATE_TRACE_POINTS
#include <trace/events/btrfs.h>
Expand Down Expand Up @@ -1225,8 +1226,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
return 0;

if (*flags & MS_RDONLY) {
/*
* this also happens on 'umount -rf' or on shutdown, when
* the filesystem is busy.
*/
sb->s_flags |= MS_RDONLY;

btrfs_dev_replace_suspend_for_unmount(fs_info);
btrfs_scrub_cancel(fs_info);

ret = btrfs_commit_super(root);
if (ret)
goto restore;
Expand Down Expand Up @@ -1263,6 +1271,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;

ret = btrfs_resume_dev_replace_async(fs_info);
if (ret) {
pr_warn("btrfs: failed to resume dev_replace\n");
goto restore;
}
sb->s_flags &= ~MS_RDONLY;
}

Expand Down
7 changes: 6 additions & 1 deletion fs/btrfs/transaction.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "tree-log.h"
#include "inode-map.h"
#include "volumes.h"
#include "dev-replace.h"

#define BTRFS_ROOT_TRANS_TAG 0

Expand Down Expand Up @@ -845,7 +846,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
return ret;

ret = btrfs_run_dev_stats(trans, root->fs_info);
BUG_ON(ret);
WARN_ON(ret);
ret = btrfs_run_dev_replace(trans, root->fs_info);
WARN_ON(ret);

ret = btrfs_run_qgroups(trans, root->fs_info);
BUG_ON(ret);
Expand All @@ -868,6 +871,8 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
switch_commit_root(fs_info->extent_root);
up_write(&fs_info->extent_commit_sem);

btrfs_after_dev_replace_commit(fs_info);

return 0;
}

Expand Down
54 changes: 44 additions & 10 deletions fs/btrfs/volumes.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include "check-integrity.h"
#include "rcu-string.h"
#include "math.h"
#include "dev-replace.h"

static int init_first_rw_device(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
Expand Down Expand Up @@ -505,7 +506,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
return ERR_PTR(-ENOMEM);
}

void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
struct btrfs_fs_devices *fs_devices, int step)
{
struct btrfs_device *device, *next;

Expand All @@ -528,6 +530,21 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
continue;
}

if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
/*
* In the first step, keep the device which has
* the correct fsid and the devid that is used
* for the dev_replace procedure.
* In the second step, the dev_replace state is
* read from the device tree and it is known
* whether the procedure is really active or
* not, which means whether this device is
* used or whether it should be removed.
*/
if (step == 0 || device->is_tgtdev_for_dev_replace) {
continue;
}
}
if (device->bdev) {
blkdev_put(device->bdev, device->mode);
device->bdev = NULL;
Expand All @@ -536,7 +553,8 @@ void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
if (device->writeable) {
list_del_init(&device->dev_alloc_list);
device->writeable = 0;
fs_devices->rw_devices--;
if (!device->is_tgtdev_for_dev_replace)
fs_devices->rw_devices--;
}
list_del_init(&device->dev_list);
fs_devices->num_devices--;
Expand Down Expand Up @@ -594,7 +612,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
if (device->bdev)
fs_devices->open_devices--;

if (device->writeable) {
if (device->writeable && !device->is_tgtdev_for_dev_replace) {
list_del_init(&device->dev_alloc_list);
fs_devices->rw_devices--;
}
Expand Down Expand Up @@ -718,7 +736,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fs_devices->rotating = 1;

fs_devices->open_devices++;
if (device->writeable) {
if (device->writeable && !device->is_tgtdev_for_dev_replace) {
fs_devices->rw_devices++;
list_add(&device->dev_alloc_list,
&fs_devices->alloc_list);
Expand Down Expand Up @@ -1350,16 +1368,22 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
root->fs_info->avail_system_alloc_bits |
root->fs_info->avail_metadata_alloc_bits;

if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
root->fs_info->fs_devices->num_devices <= 4) {
num_devices = root->fs_info->fs_devices->num_devices;
btrfs_dev_replace_lock(&root->fs_info->dev_replace);
if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
WARN_ON(num_devices < 1);
num_devices--;
}
btrfs_dev_replace_unlock(&root->fs_info->dev_replace);

if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
printk(KERN_ERR "btrfs: unable to go below four devices "
"on raid10\n");
ret = -EINVAL;
goto out;
}

if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
root->fs_info->fs_devices->num_devices <= 2) {
if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) {
printk(KERN_ERR "btrfs: unable to go below two "
"devices on raid1\n");
ret = -EINVAL;
Expand Down Expand Up @@ -2935,6 +2959,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
u64 allowed;
int mixed = 0;
int ret;
u64 num_devices;

if (btrfs_fs_closing(fs_info) ||
atomic_read(&fs_info->balance_pause_req) ||
Expand Down Expand Up @@ -2963,10 +2988,17 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
}
}

num_devices = fs_info->fs_devices->num_devices;
btrfs_dev_replace_lock(&fs_info->dev_replace);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
BUG_ON(num_devices < 1);
num_devices--;
}
btrfs_dev_replace_unlock(&fs_info->dev_replace);
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (fs_info->fs_devices->num_devices == 1)
if (num_devices == 1)
allowed |= BTRFS_BLOCK_GROUP_DUP;
else if (fs_info->fs_devices->num_devices < 4)
else if (num_devices < 4)
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
else
allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
Expand Down Expand Up @@ -3591,6 +3623,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
devices_info[ndevs].total_avail = total_avail;
devices_info[ndevs].dev = device;
++ndevs;
WARN_ON(ndevs > fs_devices->rw_devices);
}

/*
Expand Down Expand Up @@ -4773,6 +4806,7 @@ static void fill_device_from_item(struct extent_buffer *leaf,
device->io_align = btrfs_device_io_align(leaf, dev_item);
device->io_width = btrfs_device_io_width(leaf, dev_item);
device->sector_size = btrfs_device_sector_size(leaf, dev_item);
WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
device->is_tgtdev_for_dev_replace = 0;

ptr = (unsigned long)btrfs_device_uuid(dev_item);
Expand Down
3 changes: 2 additions & 1 deletion fs/btrfs/volumes.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,8 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
struct btrfs_fs_devices **fs_devices_ret);
int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
struct btrfs_fs_devices *fs_devices, int step);
int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
char *device_path,
struct btrfs_device **device);
Expand Down

0 comments on commit 8dabb74

Please sign in to comment.