Skip to content

Commit

Permalink
Btrfs: fix nodatasum handling in balancing code
Browse files Browse the repository at this point in the history
Checksums on data can be disabled by mount option, so it's
possible some data extents don't have checksums or have
invalid checksums. This causes trouble for data relocation.
This patch contains following things to make data relocation
work.

1) make nodatasum/nodatacow mount option only affects new
files. Checksums and COW on data are only controlled by the
inode flags.

2) check the existence of checksum in the nodatacow checker.
If checksums exist, force COW the data extent. This ensure that
checksum for a given block is either valid or does not exist.

3) update data relocation code to properly handle the case
of checksum missing.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
  • Loading branch information
Yan Zheng authored and Chris Mason committed Dec 12, 2008
1 parent e4404d6 commit 17d217f
Show file tree
Hide file tree
Showing 7 changed files with 226 additions and 35 deletions.
9 changes: 3 additions & 6 deletions fs/btrfs/compression.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,7 @@ static int check_compressed_csum(struct inode *inode,
u32 csum;
u32 *cb_sum = &cb->sums;

if (btrfs_test_opt(root, NODATASUM) ||
btrfs_test_flag(inode, NODATASUM))
if (btrfs_test_flag(inode, NODATASUM))
return 0;

for (i = 0; i < cb->nr_pages; i++) {
Expand Down Expand Up @@ -671,8 +670,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
*/
atomic_inc(&cb->pending_bios);

if (!btrfs_test_opt(root, NODATASUM) &&
!btrfs_test_flag(inode, NODATASUM)) {
if (!btrfs_test_flag(inode, NODATASUM)) {
btrfs_lookup_bio_sums(root, inode, comp_bio,
sums);
}
Expand All @@ -699,8 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
BUG_ON(ret);

if (!btrfs_test_opt(root, NODATASUM) &&
!btrfs_test_flag(inode, NODATASUM)) {
if (!btrfs_test_flag(inode, NODATASUM)) {
btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
}

Expand Down
5 changes: 4 additions & 1 deletion fs/btrfs/ctree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1702,7 +1702,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root,
int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *leaf);
int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr);
struct btrfs_root *root, u64 objectid, u64 bytenr);
int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
Expand Down Expand Up @@ -1789,6 +1789,7 @@ int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans,
struct extent_buffer *buf, u64 orig_start);
int btrfs_add_dead_reloc_root(struct btrfs_root *root);
int btrfs_cleanup_reloc_trees(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags);
/* ctree.c */
int btrfs_previous_item(struct btrfs_root *root,
Expand Down Expand Up @@ -1994,6 +1995,8 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
u64 isize);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start,
u64 end, struct list_head *list);
/* inode.c */

/* RHEL and EL kernels have a patch that renames PG_checked to FsMisc */
Expand Down
50 changes: 46 additions & 4 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -1359,7 +1359,7 @@ int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
}

int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr)
struct btrfs_root *root, u64 objectid, u64 bytenr)
{
struct btrfs_root *extent_root = root->fs_info->extent_root;
struct btrfs_path *path;
Expand Down Expand Up @@ -1418,8 +1418,9 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
ref_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_extent_ref);
ref_root = btrfs_ref_root(leaf, ref_item);
if (ref_root != root->root_key.objectid &&
ref_root != BTRFS_TREE_LOG_OBJECTID) {
if ((ref_root != root->root_key.objectid &&
ref_root != BTRFS_TREE_LOG_OBJECTID) ||
objectid != btrfs_ref_objectid(leaf, ref_item)) {
ret = 1;
goto out;
}
Expand Down Expand Up @@ -5367,7 +5368,6 @@ static int noinline relocate_one_extent(struct btrfs_root *extent_root,
if (ret)
goto out;
}
btrfs_record_root_in_trans(found_root);
ret = replace_one_extent(trans, found_root,
path, extent_key,
&first_key, ref_path,
Expand Down Expand Up @@ -5534,6 +5534,7 @@ static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
} else {
BUG_ON(1);
}
BTRFS_I(inode)->index_cnt = group->key.objectid;

err = btrfs_orphan_add(trans, inode);
out:
Expand All @@ -5546,6 +5547,47 @@ static struct inode noinline *create_reloc_inode(struct btrfs_fs_info *fs_info,
return inode;
}

int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
{

struct btrfs_ordered_sum *sums;
struct btrfs_sector_sum *sector_sum;
struct btrfs_ordered_extent *ordered;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct list_head list;
size_t offset;
int ret;
u64 disk_bytenr;

INIT_LIST_HEAD(&list);

ordered = btrfs_lookup_ordered_extent(inode, file_pos);
BUG_ON(ordered->file_offset != file_pos || ordered->len != len);

disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
ret = btrfs_lookup_csums_range(root, disk_bytenr,
disk_bytenr + len - 1, &list);

while (!list_empty(&list)) {
sums = list_entry(list.next, struct btrfs_ordered_sum, list);
list_del_init(&sums->list);

sector_sum = sums->sums;
sums->bytenr = ordered->start;

offset = 0;
while (offset < sums->len) {
sector_sum->bytenr += ordered->start - disk_bytenr;
sector_sum++;
offset += root->sectorsize;
}

btrfs_add_ordered_sum(inode, ordered, sums);
}
btrfs_put_ordered_extent(ordered);
return 0;
}

int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start)
{
struct btrfs_trans_handle *trans;
Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/extent_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#define EXTENT_ORDERED (1 << 9)
#define EXTENT_ORDERED_METADATA (1 << 10)
#define EXTENT_BOUNDARY (1 << 11)
#define EXTENT_NODATASUM (1 << 12)
#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)

/* flags for bio submission */
Expand Down
114 changes: 111 additions & 3 deletions fs/btrfs/file-item.c
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
return ret;
}


int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u32 *dst)
{
Expand Down Expand Up @@ -185,9 +186,16 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
if (ret == -ENOENT || ret == -EFBIG)
ret = 0;
sum = 0;
printk("no csum found for inode %lu start "
"%llu\n", inode->i_ino,
(unsigned long long)offset);
if (BTRFS_I(inode)->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
set_extent_bits(io_tree, offset,
offset + bvec->bv_len - 1,
EXTENT_NODATASUM, GFP_NOFS);
} else {
printk("no csum found for inode %lu "
"start %llu\n", inode->i_ino,
(unsigned long long)offset);
}
item = NULL;
btrfs_release_path(root, path);
goto found;
Expand Down Expand Up @@ -228,6 +236,106 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
return 0;
}

int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list)
{
struct btrfs_key key;
struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_ordered_sum *sums;
struct btrfs_sector_sum *sector_sum;
struct btrfs_csum_item *item;
unsigned long offset;
int ret;
size_t size;
u64 csum_end;
u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);

path = btrfs_alloc_path();
BUG_ON(!path);

key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key.offset = start;
key.type = BTRFS_EXTENT_CSUM_KEY;

ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
&key, path, 0, 0);
if (ret < 0)
goto fail;
if (ret > 0 && path->slots[0] > 0) {
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
key.type == BTRFS_EXTENT_CSUM_KEY) {
offset = (start - key.offset) >>
root->fs_info->sb->s_blocksize_bits;
if (offset * csum_size <
btrfs_item_size_nr(leaf, path->slots[0] - 1))
path->slots[0]--;
}
}

while (start <= end) {
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root->fs_info->csum_root, path);
if (ret < 0)
goto fail;
if (ret > 0)
break;
leaf = path->nodes[0];
}

btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
key.type != BTRFS_EXTENT_CSUM_KEY)
break;

btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
if (key.offset > end)
break;

if (key.offset > start)
start = key.offset;

size = btrfs_item_size_nr(leaf, path->slots[0]);
csum_end = key.offset + (size / csum_size) * root->sectorsize;

size = min(csum_end, end + 1) - start;
sums = kzalloc(btrfs_ordered_sum_size(root, size), GFP_NOFS);
BUG_ON(!sums);

sector_sum = sums->sums;
sums->bytenr = start;
sums->len = size;

offset = (start - key.offset) >>
root->fs_info->sb->s_blocksize_bits;
offset *= csum_size;

item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_csum_item);
while (size > 0) {
read_extent_buffer(path->nodes[0], &sector_sum->sum,
((unsigned long)item) + offset,
csum_size);
sector_sum->bytenr = start;

size -= root->sectorsize;
start += root->sectorsize;
offset += csum_size;
sector_sum++;
}
list_add_tail(&sums->list, list);

path->slots[0]++;
}
ret = 0;
fail:
btrfs_free_path(path);
return ret;
}

int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct bio *bio, u64 file_start, int contig)
{
Expand Down
8 changes: 0 additions & 8 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1059,14 +1059,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
first_index = pos >> PAGE_CACHE_SHIFT;
last_index = (pos + count) >> PAGE_CACHE_SHIFT;

/*
* if this is a nodatasum mount, force summing off for the inode
* all the time. That way a later mount with summing on won't
* get confused
*/
if (btrfs_test_opt(root, NODATASUM))
btrfs_set_flag(inode, NODATASUM);

/*
* there are lots of better ways to do this, but this code
* makes sure the first and last page in the file range are
Expand Down
Loading

0 comments on commit 17d217f

Please sign in to comment.