Skip to content

Commit

Permalink
Btrfs: Fix bookend extent race v2
Browse files Browse the repository at this point in the history
When dropping middle part of an extent, btrfs_drop_extents truncates
the extent at first, then inserts a bookend extent.

Since truncation and insertion can't be done atomically, there is a small
period that the bookend extent isn't in the tree. This causes problem for
functions that search the tree for file extent item. The way to fix this is
lock the range of the bookend extent before truncation.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
  • Loading branch information
Yan Zheng authored and Chris Mason committed Oct 30, 2008
1 parent 9036c10 commit 6643558
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 32 deletions.
28 changes: 13 additions & 15 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -3379,19 +3379,21 @@ static int noinline relocate_data_extent(struct inode *reloc_inode,
struct btrfs_root *root = BTRFS_I(reloc_inode)->root;
struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree;
struct extent_map *em;
u64 start = extent_key->objectid - offset;
u64 end = start + extent_key->offset - 1;

em = alloc_extent_map(GFP_NOFS);
BUG_ON(!em || IS_ERR(em));

em->start = extent_key->objectid - offset;
em->start = start;
em->len = extent_key->offset;
em->block_len = extent_key->offset;
em->block_start = extent_key->objectid;
em->bdev = root->fs_info->fs_devices->latest_bdev;
set_bit(EXTENT_FLAG_PINNED, &em->flags);

/* setup extent map to cheat btrfs_readpage */
mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex);
lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);
while (1) {
int ret;
spin_lock(&em_tree->lock);
Expand All @@ -3401,13 +3403,11 @@ static int noinline relocate_data_extent(struct inode *reloc_inode,
free_extent_map(em);
break;
}
btrfs_drop_extent_cache(reloc_inode, em->start,
em->start + em->len - 1, 0);
btrfs_drop_extent_cache(reloc_inode, start, end, 0);
}
mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex);
unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS);

return relocate_inode_pages(reloc_inode, extent_key->objectid - offset,
extent_key->offset);
return relocate_inode_pages(reloc_inode, start, extent_key->offset);
}

struct btrfs_ref_path {
Expand Down Expand Up @@ -3831,7 +3831,6 @@ static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
* the file extent item was modified by someone
* before the extent got locked.
*/
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
lock_end, GFP_NOFS);
extent_locked = 0;
Expand Down Expand Up @@ -3896,8 +3895,12 @@ static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
lock_start = key.offset;
lock_end = lock_start + num_bytes - 1;
} else {
BUG_ON(lock_start != key.offset);
BUG_ON(lock_end - lock_start + 1 < num_bytes);
if (lock_start > key.offset ||
lock_end + 1 < key.offset + num_bytes) {
unlock_extent(&BTRFS_I(inode)->io_tree,
lock_start, lock_end, GFP_NOFS);
extent_locked = 0;
}
}

if (!inode) {
Expand Down Expand Up @@ -3951,7 +3954,6 @@ static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
if (ordered)
btrfs_put_ordered_extent(ordered);

mutex_lock(&BTRFS_I(inode)->extent_mutex);
extent_locked = 1;
continue;
}
Expand Down Expand Up @@ -4073,7 +4075,6 @@ static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
}

if (extent_locked) {
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
lock_end, GFP_NOFS);
extent_locked = 0;
Expand All @@ -4091,7 +4092,6 @@ static int noinline replace_one_extent(struct btrfs_trans_handle *trans,
if (inode) {
mutex_unlock(&inode->i_mutex);
if (extent_locked) {
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
unlock_extent(&BTRFS_I(inode)->io_tree, lock_start,
lock_end, GFP_NOFS);
}
Expand Down Expand Up @@ -4180,10 +4180,8 @@ static int noinline invalidate_extent_cache(struct btrfs_root *root,

lock_extent(&BTRFS_I(inode)->io_tree, key.offset,
key.offset + num_bytes - 1, GFP_NOFS);
mutex_lock(&BTRFS_I(inode)->extent_mutex);
btrfs_drop_extent_cache(inode, key.offset,
key.offset + num_bytes - 1, 1);
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
unlock_extent(&BTRFS_I(inode)->io_tree, key.offset,
key.offset + num_bytes - 1, GFP_NOFS);
cond_resched();
Expand Down
6 changes: 5 additions & 1 deletion fs/btrfs/extent_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -946,8 +946,12 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,

err = set_extent_bit(tree, start, end, EXTENT_LOCKED, 1,
&failed_start, mask);
if (err == -EEXIST)
if (err == -EEXIST) {
if (failed_start > start)
clear_extent_bit(tree, start, failed_start - 1,
EXTENT_LOCKED, 1, 0, mask);
return 0;
}
return 1;
}
EXPORT_SYMBOL(try_lock_extent);
Expand Down
31 changes: 25 additions & 6 deletions fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
u64 start, u64 end, u64 inline_limit, u64 *hint_byte)
{
u64 extent_end = 0;
u64 locked_end = end;
u64 search_start = start;
u64 leaf_start;
u64 ram_bytes = 0;
Expand Down Expand Up @@ -479,12 +480,6 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
goto next_slot;
}

if (found_inline) {
u64 mask = root->sectorsize - 1;
search_start = (extent_end + mask) & ~mask;
} else
search_start = extent_end;

if (end <= extent_end && start >= key.offset && found_inline)
*hint_byte = EXTENT_MAP_INLINE;

Expand All @@ -501,6 +496,26 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
if (found_inline && start <= key.offset)
keep = 1;
}

if (bookend && found_extent && locked_end < extent_end) {
ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
locked_end, extent_end - 1, GFP_NOFS);
if (!ret) {
btrfs_release_path(root, path);
lock_extent(&BTRFS_I(inode)->io_tree,
locked_end, extent_end - 1, GFP_NOFS);
locked_end = extent_end;
continue;
}
locked_end = extent_end;
}

if (found_inline) {
u64 mask = root->sectorsize - 1;
search_start = (extent_end + mask) & ~mask;
} else
search_start = extent_end;

/* truncate existing extent */
if (start > key.offset) {
u64 new_num;
Expand Down Expand Up @@ -638,6 +653,10 @@ int noinline btrfs_drop_extents(struct btrfs_trans_handle *trans,
}
out:
btrfs_free_path(path);
if (locked_end > end) {
unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
GFP_NOFS);
}
btrfs_check_file(root, inode);
return ret;
}
Expand Down
10 changes: 0 additions & 10 deletions fs/btrfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,6 @@ static int cow_file_range_inline(struct btrfs_trans_handle *trans,
return 1;
}

mutex_lock(&BTRFS_I(inode)->extent_mutex);
ret = btrfs_drop_extents(trans, root, inode, start,
aligned_end, aligned_end, &hint_byte);
BUG_ON(ret);
Expand All @@ -258,7 +257,6 @@ static int cow_file_range_inline(struct btrfs_trans_handle *trans,
compressed_pages);
BUG_ON(ret);
btrfs_drop_extent_cache(inode, start, aligned_end, 0);
mutex_unlock(&BTRFS_I(inode)->extent_mutex);
return 0;
}

Expand Down Expand Up @@ -437,9 +435,7 @@ static int cow_file_range(struct inode *inode, struct page *locked_page,
BUG_ON(disk_num_bytes >
btrfs_super_total_bytes(&root->fs_info->super_copy));

mutex_lock(&BTRFS_I(inode)->extent_mutex);
btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
mutex_unlock(&BTRFS_I(inode)->extent_mutex);

while(disk_num_bytes > 0) {
unsigned long min_bytes;
Expand Down Expand Up @@ -477,8 +473,6 @@ static int cow_file_range(struct inode *inode, struct page *locked_page,
em->block_start = ins.objectid;
em->block_len = ins.offset;
em->bdev = root->fs_info->fs_devices->latest_bdev;

mutex_lock(&BTRFS_I(inode)->extent_mutex);
set_bit(EXTENT_FLAG_PINNED, &em->flags);

if (will_compress)
Expand All @@ -495,7 +489,6 @@ static int cow_file_range(struct inode *inode, struct page *locked_page,
btrfs_drop_extent_cache(inode, start,
start + ram_size - 1, 0);
}
mutex_unlock(&BTRFS_I(inode)->extent_mutex);

cur_alloc_size = ins.offset;
ret = btrfs_add_ordered_extent(inode, start, ins.objectid,
Expand Down Expand Up @@ -1016,8 +1009,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)

INIT_LIST_HEAD(&list);

mutex_lock(&BTRFS_I(inode)->extent_mutex);

ret = btrfs_drop_extents(trans, root, inode,
ordered_extent->file_offset,
ordered_extent->file_offset +
Expand Down Expand Up @@ -1059,7 +1050,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
btrfs_drop_extent_cache(inode, ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len - 1, 0);
mutex_unlock(&BTRFS_I(inode)->extent_mutex);

ins.objectid = ordered_extent->start;
ins.offset = ordered_extent->disk_len;
Expand Down

0 comments on commit 6643558

Please sign in to comment.