Skip to content

Commit

Permalink
btrfs: use the file extent tree infrastructure
Browse files Browse the repository at this point in the history
We want to use this everywhere we modify the file extent items
permanently.  These include:

  1) Inserting new file extents for writes and prealloc extents.
  2) Truncating inode items.
  3) btrfs_cont_expand().
  4) Insert inline extents.
  5) Insert new extents from log replay.
  6) Insert a new extent for clone, as it could be past i_size.
  7) Hole punching

For hole punching in particular it might seem it's not necessary because
anybody extending would use btrfs_cont_expand, however there is a corner
that still can give us trouble.  Start with an empty file and

fallocate KEEP_SIZE 1M-2M

We now have a 0 length file, and a hole file extent from 0-1M, and a
prealloc extent from 1M-2M.  Now

punch 1M-1.5M

Because this is past i_size we have

[HOLE EXTENT][ NOTHING ][PREALLOC]
[0        1M][1M   1.5M][1.5M  2M]

with an i_size of 0.  Now if we pwrite 0-1.5M we'll increas our i_size
to 1.5M, but our disk_i_size is still 0 until the ordered extent
completes.

However if we now immediately truncate 2M on the file we'll just call
btrfs_cont_expand(inode, 1.5M, 2M), since our old i_size is 1.5M.  If we
commit the transaction here and crash we'll expose the gap.

To fix this we need to clear the file extent mapping for the range that
we punched but didn't insert a corresponding file extent for.  This will
mean the truncate will only get an disk_i_size set to 1M if we crash
before the finish ordered io happens.

I've written an xfstest to reproduce the problem and validate this fix.

Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: David Sterba <dsterba@suse.com>
  • Loading branch information
Josef Bacik authored and David Sterba committed Mar 23, 2020
1 parent 41a2ee7 commit 9ddc959
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 1 deletion.
3 changes: 3 additions & 0 deletions fs/btrfs/delayed-inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1760,6 +1760,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,

int btrfs_fill_inode(struct inode *inode, u32 *rdev)
{
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct btrfs_delayed_node *delayed_node;
struct btrfs_inode_item *inode_item;

Expand All @@ -1779,6 +1780,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
round_up(i_size_read(inode), fs_info->sectorsize));
inode->i_mode = btrfs_stack_inode_mode(inode_item);
set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
Expand Down
32 changes: 32 additions & 0 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -2486,6 +2486,11 @@ static int btrfs_insert_clone_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);

ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
clone_info->file_offset, clone_len);
if (ret)
return ret;

/* If it's a hole, nothing more needs to be done. */
if (clone_info->disk_offset == 0)
return 0;
Expand Down Expand Up @@ -2596,6 +2601,24 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
btrfs_abort_transaction(trans, ret);
break;
}
} else if (!clone_info && cur_offset < drop_end) {
/*
* We are past the i_size here, but since we didn't
* insert holes we need to clear the mapped area so we
* know to not set disk_i_size in this area until a new
* file extent is inserted here.
*/
ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
cur_offset, drop_end - cur_offset);
if (ret) {
/*
* We couldn't clear our area, so we could
* presumably adjust up and corrupt the fs, so
* we need to abort.
*/
btrfs_abort_transaction(trans, ret);
break;
}
}

if (clone_info && drop_end > clone_info->file_offset) {
Expand Down Expand Up @@ -2686,6 +2709,15 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
btrfs_abort_transaction(trans, ret);
goto out_trans;
}
} else if (!clone_info && cur_offset < drop_end) {
/* See the comment in the loop above for the reasoning here. */
ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
cur_offset, drop_end - cur_offset);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out_trans;
}

}
if (clone_info) {
ret = btrfs_insert_clone_extent(trans, inode, path, clone_info,
Expand Down
55 changes: 54 additions & 1 deletion fs/btrfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,15 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);

/*
* We align size to sectorsize for inline extents just for simplicity
* sake.
*/
size = ALIGN(size, root->fs_info->sectorsize);
ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start, size);
if (ret)
goto fail;

/*
* we're an inline extent, so nobody can
* extend the file past i_size without locking
Expand Down Expand Up @@ -2446,6 +2455,11 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
ins.offset = disk_num_bytes;
ins.type = BTRFS_EXTENT_ITEM_KEY;

ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), file_pos,
ram_bytes);
if (ret)
goto out;

/*
* Release the reserved range from inode dirty range map, as it is
* already moved into delayed_ref_head
Expand Down Expand Up @@ -4160,6 +4174,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
}

while (1) {
u64 clear_start = 0, clear_len = 0;

fi = NULL;
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
Expand Down Expand Up @@ -4210,13 +4226,16 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,

if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
u64 num_dec;

clear_start = found_key.offset;
extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
if (!del_item) {
u64 orig_num_bytes =
btrfs_file_extent_num_bytes(leaf, fi);
extent_num_bytes = ALIGN(new_size -
found_key.offset,
fs_info->sectorsize);
clear_start = ALIGN(new_size, fs_info->sectorsize);
btrfs_set_file_extent_num_bytes(leaf, fi,
extent_num_bytes);
num_dec = (orig_num_bytes -
Expand All @@ -4242,6 +4261,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
inode_sub_bytes(inode, num_dec);
}
}
clear_len = num_dec;
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
/*
* we can't truncate inline items that have had
Expand All @@ -4263,12 +4283,33 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
*/
ret = NEED_TRUNCATE_BLOCK;
break;
} else {
/*
* Inline extents are special, we just treat
* them as a full sector worth in the file
* extent tree just for simplicity sake.
*/
clear_len = fs_info->sectorsize;
}

if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
inode_sub_bytes(inode, item_end + 1 - new_size);
}
delete:
/*
* We use btrfs_truncate_inode_items() to clean up log trees for
* multiple fsyncs, and in this case we don't want to clear the
* file extent range because it's just the log.
*/
if (root == BTRFS_I(inode)->root) {
ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
clear_start, clear_len);
if (ret) {
btrfs_abort_transaction(trans, ret);
break;
}
}

if (del_item)
last_size = found_key.offset;
else
Expand Down Expand Up @@ -4591,14 +4632,21 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
}
last_byte = min(extent_map_end(em), block_end);
last_byte = ALIGN(last_byte, fs_info->sectorsize);
hole_size = last_byte - cur_offset;

if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
struct extent_map *hole_em;
hole_size = last_byte - cur_offset;

err = maybe_insert_hole(root, inode, cur_offset,
hole_size);
if (err)
break;

err = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
cur_offset, hole_size);
if (err)
break;

btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
cur_offset + hole_size - 1, 0);
hole_em = alloc_extent_map();
Expand Down Expand Up @@ -4630,6 +4678,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
hole_size - 1, 0);
}
free_extent_map(hole_em);
} else {
err = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
cur_offset, hole_size);
if (err)
break;
}
next:
free_extent_map(em);
Expand Down
5 changes: 5 additions & 0 deletions fs/btrfs/tree-log.c
Original file line number Diff line number Diff line change
Expand Up @@ -830,6 +830,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
goto out;
}

ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start,
extent_end - start);
if (ret)
goto out;

inode_add_bytes(inode, nbytes);
update_inode:
ret = btrfs_update_inode(trans, root, inode);
Expand Down

0 comments on commit 9ddc959

Please sign in to comment.