Skip to content

Commit

Permalink
Btrfs: Seed device support
Browse files Browse the repository at this point in the history
Seed device is a special btrfs with SEEDING super flag
set and can only be mounted in read-only mode. Seed
devices allow people to create new btrfs on top of it.

The new FS contains the same contents as the seed device,
but it can be mounted in read-write mode.

This patch does the following:

1) split code in btrfs_alloc_chunk into two parts. The first part does makes
the newly allocated chunk usable, but does not do any operation that modifies
the chunk tree. The second part does the the chunk tree modifications. This
division is for the bootstrap step of adding storage to the seed device.

2) Update device management code to handle seed device.
The basic idea is: For an FS grown from seed devices, its
seed devices are put into a list. Seed devices are
opened on demand at mounting time. If any seed device is
missing or has been changed, btrfs kernel module will
refuse to mount the FS.

3) make btrfs_find_block_group not return NULL when all
block groups are read-only.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
  • Loading branch information
Yan Zheng authored and Chris Mason committed Nov 18, 2008
1 parent c146afa commit 2b82032
Show file tree
Hide file tree
Showing 8 changed files with 946 additions and 329 deletions.
8 changes: 8 additions & 0 deletions fs/btrfs/ctree.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,10 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
btrfs_set_header_owner(cow, new_root_objectid);
btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);

write_extent_buffer(cow, root->fs_info->fsid,
(unsigned long)btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);

WARN_ON(btrfs_header_generation(buf) > trans->transid);
ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
kfree(new_root);
Expand Down Expand Up @@ -274,6 +278,10 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_header_owner(cow, root->root_key.objectid);
btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);

write_extent_buffer(cow, root->fs_info->fsid,
(unsigned long)btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);

WARN_ON(btrfs_header_generation(buf) > trans->transid);
if (btrfs_header_generation(buf) != trans->transid) {
u32 nr_extents;
Expand Down
18 changes: 17 additions & 1 deletion fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,9 @@ struct btrfs_dev_item {
/* type and info about this device */
__le64 type;

/* expected generation for this device */
__le64 generation;

/* grouping information for allocation decisions */
__le32 dev_group;

Expand All @@ -188,6 +191,9 @@ struct btrfs_dev_item {

/* btrfs generated uuid for this device */
u8 uuid[BTRFS_UUID_SIZE];

/* uuid of FS who owns this device */
u8 fsid[BTRFS_UUID_SIZE];
} __attribute__ ((__packed__));

struct btrfs_stripe {
Expand Down Expand Up @@ -263,6 +269,7 @@ struct btrfs_header {
sizeof(struct btrfs_item) - \
sizeof(struct btrfs_file_extent_item))

#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)

/*
* this is a very generous portion of the super block, giving us
Expand All @@ -278,7 +285,7 @@ struct btrfs_header {
struct btrfs_super_block {
u8 csum[BTRFS_CSUM_SIZE];
/* the first 4 fields must match struct btrfs_header */
u8 fsid[16]; /* FS specific uuid */
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
__le64 bytenr; /* this block number */
__le64 flags;

Expand Down Expand Up @@ -941,6 +948,7 @@ BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64);
BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32);
BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8);
BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8);
BTRFS_SETGET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64);

BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64);
BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item,
Expand All @@ -960,12 +968,19 @@ BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item,
seek_speed, 8);
BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
bandwidth, 8);
BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item,
generation, 64);

static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
{
return (char *)d + offsetof(struct btrfs_dev_item, uuid);
}

static inline char *btrfs_device_fsid(struct btrfs_dev_item *d)
{
return (char *)d + offsetof(struct btrfs_dev_item, fsid);
}

BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
Expand Down Expand Up @@ -1661,6 +1676,7 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
struct extent_buffer *buf, u64 orig_start);
int btrfs_add_dead_reloc_root(struct btrfs_root *root);
int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
/* ctree.c */
int btrfs_previous_item(struct btrfs_root *root,
struct btrfs_path *path, u64 min_objectid,
Expand Down
56 changes: 35 additions & 21 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,25 @@ int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
return 0;
}

static int check_tree_block_fsid(struct btrfs_root *root,
struct extent_buffer *eb)
{
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
u8 fsid[BTRFS_UUID_SIZE];
int ret = 1;

read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb),
BTRFS_FSID_SIZE);
while (fs_devices) {
if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
ret = 0;
break;
}
fs_devices = fs_devices->seed;
}
return ret;
}

int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
{
Expand Down Expand Up @@ -382,9 +401,7 @@ int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
ret = -EIO;
goto err;
}
if (memcmp_extent_buffer(eb, root->fs_info->fsid,
(unsigned long)btrfs_header_fsid(eb),
BTRFS_FSID_SIZE)) {
if (check_tree_block_fsid(root, eb)) {
printk("bad fsid on block %Lu\n", eb->start);
ret = -EIO;
goto err;
Expand Down Expand Up @@ -1558,9 +1575,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!btrfs_super_root(disk_super))
goto fail_sb_buffer;

err = btrfs_parse_options(tree_root, options);
if (err)
ret = btrfs_parse_options(tree_root, options);
if (ret) {
err = ret;
goto fail_sb_buffer;
}

/*
* we need to start all the end_io workers up front because the
Expand Down Expand Up @@ -1610,18 +1629,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_start_workers(&fs_info->endio_write_workers,
fs_info->thread_pool_size);

err = -EINVAL;
if (btrfs_super_num_devices(disk_super) > fs_devices->open_devices) {
printk("Btrfs: wanted %llu devices, but found %llu\n",
(unsigned long long)btrfs_super_num_devices(disk_super),
(unsigned long long)fs_devices->open_devices);
if (btrfs_test_opt(tree_root, DEGRADED))
printk("continuing in degraded mode\n");
else {
goto fail_sb_buffer;
}
}

fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
4 * 1024 * 1024 / PAGE_CACHE_SIZE);
Expand Down Expand Up @@ -1672,7 +1679,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_read_chunk_tree(chunk_root);
mutex_unlock(&fs_info->chunk_mutex);
BUG_ON(ret);
if (ret) {
printk("btrfs: failed to read chunk tree on %s\n", sb->s_id);
goto fail_chunk_root;
}

btrfs_close_extra_devices(fs_devices);

Expand All @@ -1684,7 +1694,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
btrfs_super_root(disk_super),
blocksize, generation);
if (!tree_root->node)
goto fail_sb_buffer;
goto fail_chunk_root;


ret = find_and_setup_root(tree_root, fs_info,
Expand Down Expand Up @@ -1753,6 +1763,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
free_extent_buffer(extent_root->node);
fail_tree_root:
free_extent_buffer(tree_root->node);
fail_chunk_root:
free_extent_buffer(chunk_root->node);
fail_sys_array:
fail_sb_buffer:
btrfs_stop_workers(&fs_info->fixup_workers);
Expand Down Expand Up @@ -1823,9 +1835,10 @@ int write_all_supers(struct btrfs_root *root)
total_errors++;
continue;
}
if (!dev->in_fs_metadata)
if (!dev->in_fs_metadata || !dev->writeable)
continue;

btrfs_set_stack_device_generation(dev_item, 0);
btrfs_set_stack_device_type(dev_item, dev->type);
btrfs_set_stack_device_id(dev_item, dev->devid);
btrfs_set_stack_device_total_bytes(dev_item, dev->total_bytes);
Expand All @@ -1834,6 +1847,7 @@ int write_all_supers(struct btrfs_root *root)
btrfs_set_stack_device_io_width(dev_item, dev->io_width);
btrfs_set_stack_device_sector_size(dev_item, dev->sector_size);
memcpy(dev_item->uuid, dev->uuid, BTRFS_UUID_SIZE);
memcpy(dev_item->fsid, dev->fs_devices->fsid, BTRFS_UUID_SIZE);
flags = btrfs_super_flags(sb);
btrfs_set_super_flags(sb, flags | BTRFS_HEADER_FLAG_WRITTEN);

Expand Down Expand Up @@ -1881,7 +1895,7 @@ int write_all_supers(struct btrfs_root *root)
dev = list_entry(cur, struct btrfs_device, dev_list);
if (!dev->bdev)
continue;
if (!dev->in_fs_metadata)
if (!dev->in_fs_metadata || !dev->writeable)
continue;

BUG_ON(!dev->pending_io);
Expand Down
31 changes: 14 additions & 17 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ __btrfs_find_block_group(struct btrfs_root *root,
if (search_start) {
struct btrfs_block_group_cache *shint;
shint = btrfs_lookup_first_block_group(info, search_start);
if (shint && block_group_bits(shint, data) && !shint->ro) {
if (shint && block_group_bits(shint, data)) {
spin_lock(&shint->lock);
used = btrfs_block_group_used(&shint->item);
if (used + shint->pinned + shint->reserved <
Expand All @@ -366,7 +366,7 @@ __btrfs_find_block_group(struct btrfs_root *root,
spin_unlock(&shint->lock);
}
}
if (hint && !hint->ro && block_group_bits(hint, data)) {
if (hint && block_group_bits(hint, data)) {
spin_lock(&hint->lock);
used = btrfs_block_group_used(&hint->item);
if (used + hint->pinned + hint->reserved <
Expand All @@ -392,7 +392,7 @@ __btrfs_find_block_group(struct btrfs_root *root,
last = cache->key.objectid + cache->key.offset;
used = btrfs_block_group_used(&cache->item);

if (!cache->ro && block_group_bits(cache, data)) {
if (block_group_bits(cache, data)) {
free_check = div_factor(cache->key.offset, factor);
if (used + cache->pinned + cache->reserved <
free_check) {
Expand Down Expand Up @@ -1843,9 +1843,9 @@ static void set_block_group_readonly(struct btrfs_block_group_cache *cache)
spin_unlock(&cache->space_info->lock);
}

static u64 reduce_alloc_profile(struct btrfs_root *root, u64 flags)
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
{
u64 num_devices = root->fs_info->fs_devices->num_devices;
u64 num_devices = root->fs_info->fs_devices->rw_devices;

if (num_devices == 1)
flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
Expand Down Expand Up @@ -1877,13 +1877,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
{
struct btrfs_space_info *space_info;
u64 thresh;
u64 start;
u64 num_bytes;
int ret = 0;

mutex_lock(&extent_root->fs_info->chunk_mutex);

flags = reduce_alloc_profile(extent_root, flags);
flags = btrfs_reduce_alloc_profile(extent_root, flags);

space_info = __find_space_info(extent_root->fs_info, flags);
if (!space_info) {
Expand Down Expand Up @@ -1913,16 +1911,11 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
}
spin_unlock(&space_info->lock);

ret = btrfs_alloc_chunk(trans, extent_root, &start, &num_bytes, flags);
ret = btrfs_alloc_chunk(trans, extent_root, flags);
if (ret) {
printk("space info full %Lu\n", flags);
space_info->full = 1;
goto out;
}

ret = btrfs_make_block_group(trans, extent_root, 0, flags,
BTRFS_FIRST_CHUNK_TREE_OBJECTID, start, num_bytes);
BUG_ON(ret);
out:
mutex_unlock(&extent_root->fs_info->chunk_mutex);
return ret;
Expand Down Expand Up @@ -3040,7 +3033,7 @@ static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans,
data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
}
again:
data = reduce_alloc_profile(root, data);
data = btrfs_reduce_alloc_profile(root, data);
/*
* the only place that sets empty_size is btrfs_realloc_node, which
* is not called recursively on allocations
Expand Down Expand Up @@ -5136,7 +5129,8 @@ static int noinline relocate_one_path(struct btrfs_trans_handle *trans,
else
btrfs_node_key_to_cpu(eb, &keys[level], 0);
}
if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
if (nodes[0] &&
ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) {
eb = path->nodes[0];
ret = replace_extents_in_leaf(trans, reloc_root, eb,
group, reloc_inode);
Expand Down Expand Up @@ -5377,7 +5371,7 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;

num_devices = root->fs_info->fs_devices->num_devices;
num_devices = root->fs_info->fs_devices->rw_devices;
if (num_devices == 1) {
stripped |= BTRFS_BLOCK_GROUP_DUP;
stripped = flags & ~stripped;
Expand Down Expand Up @@ -5801,6 +5795,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
BUG_ON(ret);

set_avail_alloc_bits(root->fs_info, cache->flags);
if (btrfs_chunk_readonly(root, cache->key.objectid))
set_block_group_readonly(cache);
}
ret = 0;
error:
Expand Down Expand Up @@ -5889,6 +5885,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
block_group->space_info->total_bytes -= block_group->key.offset;
block_group->space_info->bytes_readonly -= block_group->key.offset;
spin_unlock(&block_group->space_info->lock);
block_group->space_info->full = 0;

/*
memset(shrink_block_group, 0, sizeof(*shrink_block_group));
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
devid = simple_strtoull(devstr, &end, 10);
printk(KERN_INFO "resizing devid %llu\n", devid);
}
device = btrfs_find_device(root, devid, NULL);
device = btrfs_find_device(root, devid, NULL, NULL);
if (!device) {
printk(KERN_INFO "resizer unable to find device %llu\n", devid);
ret = -EINVAL;
Expand Down
9 changes: 9 additions & 0 deletions fs/btrfs/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,11 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
if (error)
goto error_free_subvol_name;

if (!(flags & MS_RDONLY) && fs_devices->rw_devices == 0) {
error = -EACCES;
goto error_close_devices;
}

bdev = fs_devices->latest_bdev;
s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
if (IS_ERR(s))
Expand All @@ -444,6 +449,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
goto error_close_devices;
}

btrfs_close_devices(fs_devices);
} else {
char b[BDEVNAME_SIZE];

Expand Down Expand Up @@ -512,6 +518,9 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
ret = btrfs_commit_super(root);
WARN_ON(ret);
} else {
if (root->fs_info->fs_devices->rw_devices == 0)
return -EACCES;

if (btrfs_super_log_root(&root->fs_info->super_copy) != 0)
return -EINVAL;

Expand Down
Loading

0 comments on commit 2b82032

Please sign in to comment.