Skip to content

Commit

Permalink
Btrfs: add basic restriper infrastructure
Browse files Browse the repository at this point in the history
Add basic restriper infrastructure: extended balancing ioctl and all
related ioctl data structures, add data structure for tracking
restriper's state to fs_info, etc.  The semantics of the old balancing
ioctl are fully preserved.

Explicitly disallow any volume operations when balance is in progress.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
  • Loading branch information
Ilya Dryomov committed Jan 16, 2012
1 parent 10ea00f commit c9e9f97
Show file tree
Hide file tree
Showing 6 changed files with 281 additions and 41 deletions.
6 changes: 6 additions & 0 deletions fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,7 @@ struct btrfs_block_group_cache {
struct reloc_control;
struct btrfs_device;
struct btrfs_fs_devices;
struct btrfs_balance_control;
struct btrfs_delayed_root;
struct btrfs_fs_info {
u8 fsid[BTRFS_FSID_SIZE];
Expand Down Expand Up @@ -1159,6 +1160,11 @@ struct btrfs_fs_info {
u64 avail_metadata_alloc_bits;
u64 avail_system_alloc_bits;

/* restriper state */
spinlock_t balance_lock;
struct mutex balance_mutex;
struct btrfs_balance_control *balance_ctl;

unsigned data_chunk_allocations;
unsigned metadata_ratio;

Expand Down
4 changes: 4 additions & 0 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -2002,6 +2002,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
init_rwsem(&fs_info->scrub_super_lock);
fs_info->scrub_workers_refcnt = 0;

spin_lock_init(&fs_info->balance_lock);
mutex_init(&fs_info->balance_mutex);
fs_info->balance_ctl = NULL;

sb->s_blocksize = 4096;
sb->s_blocksize_bits = blksize_bits(4096);
sb->s_bdi = &fs_info->bdi;
Expand Down
135 changes: 119 additions & 16 deletions fs/btrfs/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -1203,13 +1203,21 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if (!capable(CAP_SYS_ADMIN))
return -EPERM;

mutex_lock(&root->fs_info->volume_mutex);
if (root->fs_info->balance_ctl) {
printk(KERN_INFO "btrfs: balance in progress\n");
ret = -EINVAL;
goto out;
}

vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
goto out;
}

vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';

mutex_lock(&root->fs_info->volume_mutex);
sizestr = vol_args->name;
devstr = strchr(sizestr, ':');
if (devstr) {
Expand All @@ -1226,7 +1234,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
(unsigned long long)devid);
ret = -EINVAL;
goto out_unlock;
goto out_free;
}
if (!strcmp(sizestr, "max"))
new_size = device->bdev->bd_inode->i_size;
Expand All @@ -1241,7 +1249,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
new_size = memparse(sizestr, NULL);
if (new_size == 0) {
ret = -EINVAL;
goto out_unlock;
goto out_free;
}
}

Expand All @@ -1250,7 +1258,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
if (mod < 0) {
if (new_size > old_size) {
ret = -EINVAL;
goto out_unlock;
goto out_free;
}
new_size = old_size - new_size;
} else if (mod > 0) {
Expand All @@ -1259,11 +1267,11 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,

if (new_size < 256 * 1024 * 1024) {
ret = -EINVAL;
goto out_unlock;
goto out_free;
}
if (new_size > device->bdev->bd_inode->i_size) {
ret = -EFBIG;
goto out_unlock;
goto out_free;
}

do_div(new_size, root->sectorsize);
Expand All @@ -1276,17 +1284,18 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out_unlock;
goto out_free;
}
ret = btrfs_grow_device(trans, device, new_size);
btrfs_commit_transaction(trans, root);
} else if (new_size < old_size) {
ret = btrfs_shrink_device(device, new_size);
}

out_unlock:
mutex_unlock(&root->fs_info->volume_mutex);
out_free:
kfree(vol_args);
out:
mutex_unlock(&root->fs_info->volume_mutex);
return ret;
}

Expand Down Expand Up @@ -2052,14 +2061,25 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;

mutex_lock(&root->fs_info->volume_mutex);
if (root->fs_info->balance_ctl) {
printk(KERN_INFO "btrfs: balance in progress\n");
ret = -EINVAL;
goto out;
}

vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
goto out;
}

vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_init_new_device(root, vol_args->name);

kfree(vol_args);
out:
mutex_unlock(&root->fs_info->volume_mutex);
return ret;
}

Expand All @@ -2074,14 +2094,25 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
if (root->fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;

mutex_lock(&root->fs_info->volume_mutex);
if (root->fs_info->balance_ctl) {
printk(KERN_INFO "btrfs: balance in progress\n");
ret = -EINVAL;
goto out;
}

vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
if (IS_ERR(vol_args)) {
ret = PTR_ERR(vol_args);
goto out;
}

vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
ret = btrfs_rm_device(root, vol_args->name);

kfree(vol_args);
out:
mutex_unlock(&root->fs_info->volume_mutex);
return ret;
}

Expand Down Expand Up @@ -3034,6 +3065,76 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root,
return ret;
}

void update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs)
{
struct btrfs_balance_control *bctl = fs_info->balance_ctl;

bargs->flags = bctl->flags;

memcpy(&bargs->data, &bctl->data, sizeof(bargs->data));
memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta));
memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys));
}

static long btrfs_ioctl_balance(struct btrfs_root *root, void __user *arg)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ioctl_balance_args *bargs;
struct btrfs_balance_control *bctl;
int ret;

if (!capable(CAP_SYS_ADMIN))
return -EPERM;

if (fs_info->sb->s_flags & MS_RDONLY)
return -EROFS;

mutex_lock(&fs_info->volume_mutex);
mutex_lock(&fs_info->balance_mutex);

if (arg) {
bargs = memdup_user(arg, sizeof(*bargs));
if (IS_ERR(bargs)) {
ret = PTR_ERR(bargs);
goto out;
}
} else {
bargs = NULL;
}

bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
if (!bctl) {
ret = -ENOMEM;
goto out_bargs;
}

bctl->fs_info = fs_info;
if (arg) {
memcpy(&bctl->data, &bargs->data, sizeof(bctl->data));
memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta));
memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys));

bctl->flags = bargs->flags;
}

ret = btrfs_balance(bctl, bargs);
/*
* bctl is freed in __cancel_balance
*/
if (arg) {
if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT;
}

out_bargs:
kfree(bargs);
out:
mutex_unlock(&fs_info->balance_mutex);
mutex_unlock(&fs_info->volume_mutex);
return ret;
}

long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg)
{
Expand Down Expand Up @@ -3078,7 +3179,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_DEV_INFO:
return btrfs_ioctl_dev_info(root, argp);
case BTRFS_IOC_BALANCE:
return btrfs_balance(root->fs_info->dev_root);
return btrfs_ioctl_balance(root, NULL);
case BTRFS_IOC_CLONE:
return btrfs_ioctl_clone(file, arg, 0, 0, 0);
case BTRFS_IOC_CLONE_RANGE:
Expand Down Expand Up @@ -3110,6 +3211,8 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_scrub_cancel(root, argp);
case BTRFS_IOC_SCRUB_PROGRESS:
return btrfs_ioctl_scrub_progress(root, argp);
case BTRFS_IOC_BALANCE_V2:
return btrfs_ioctl_balance(root, argp);
}

return -ENOTTY;
Expand Down
43 changes: 43 additions & 0 deletions fs/btrfs/ioctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,47 @@ struct btrfs_ioctl_fs_info_args {
__u64 reserved[124]; /* pad to 1k */
};

/*
* this is packed, because it should be exactly the same as its disk
* byte order counterpart (struct btrfs_disk_balance_args)
*/
struct btrfs_balance_args {
__u64 profiles;
__u64 usage;
__u64 devid;
__u64 pstart;
__u64 pend;
__u64 vstart;
__u64 vend;

__u64 target;

__u64 flags;

__u64 unused[8];
} __attribute__ ((__packed__));

/* report balance progress to userspace */
struct btrfs_balance_progress {
__u64 expected; /* estimated # of chunks that will be
* relocated to fulfill the request */
__u64 considered; /* # of chunks we have considered so far */
__u64 completed; /* # of chunks relocated so far */
};

struct btrfs_ioctl_balance_args {
__u64 flags; /* in/out */
__u64 state; /* out */

struct btrfs_balance_args data; /* in/out */
struct btrfs_balance_args meta; /* in/out */
struct btrfs_balance_args sys; /* in/out */

struct btrfs_balance_progress stat; /* out */

__u64 unused[72]; /* pad to 1k */
};

#define BTRFS_INO_LOOKUP_PATH_MAX 4080
struct btrfs_ioctl_ino_lookup_args {
__u64 treeid;
Expand Down Expand Up @@ -272,6 +313,8 @@ struct btrfs_ioctl_logical_ino_args {
struct btrfs_ioctl_dev_info_args)
#define BTRFS_IOC_FS_INFO _IOR(BTRFS_IOCTL_MAGIC, 31, \
struct btrfs_ioctl_fs_info_args)
#define BTRFS_IOC_BALANCE_V2 _IOWR(BTRFS_IOCTL_MAGIC, 32, \
struct btrfs_ioctl_balance_args)
#define BTRFS_IOC_INO_PATHS _IOWR(BTRFS_IOCTL_MAGIC, 35, \
struct btrfs_ioctl_ino_path_args)
#define BTRFS_IOC_LOGICAL_INO _IOWR(BTRFS_IOCTL_MAGIC, 36, \
Expand Down
Loading

0 comments on commit c9e9f97

Please sign in to comment.