Skip to content

Commit

Permalink
Btrfs: Use the extent map cache to find the logical disk block during…
Browse files Browse the repository at this point in the history
… data retries

The data read retry code needs to find the logical disk block before it
can resubmit new bios.  But, finding this block isn't allowed to take
the fs_mutex because that will deadlock with a number of different callers.

This changes the retry code to use the extent map cache instead, but
that requires the extent map cache to have the extent we're looking for.
This is a problem because btrfs_drop_extent_cache just drops the entire
extent instead of the little tiny part it is invalidating.

The bulk of the code in this patch changes btrfs_drop_extent_cache to
invalidate only a portion of the extent cache, and changes btrfs_get_extent
to deal with the results.

Signed-off-by: Chris Mason <chris.mason@oracle.com>
  • Loading branch information
Chris Mason committed Sep 25, 2008
1 parent 7b859fe commit 3b95151
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 15 deletions.
1 change: 1 addition & 0 deletions fs/btrfs/extent-tree.c
Original file line number Diff line number Diff line change
Expand Up @@ -1747,6 +1747,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
search_start, search_end, hint_byte, ins,
trans->alloc_exclude_start,
trans->alloc_exclude_nr, data);

if (ret == -ENOSPC && num_bytes > min_alloc_size) {
num_bytes = num_bytes >> 1;
num_bytes = max(num_bytes, min_alloc_size);
Expand Down
12 changes: 8 additions & 4 deletions fs/btrfs/extent_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1025,7 +1025,8 @@ u64 find_lock_delalloc_range(struct extent_io_tree *tree,
search_again:
node = tree_search(tree, cur_start);
if (!node) {
*end = (u64)-1;
if (!found)
*end = (u64)-1;
goto out;
}

Expand Down Expand Up @@ -1540,6 +1541,8 @@ static int end_bio_extent_readpage(struct bio *bio,
start, end, state);
if (ret == 0) {
state = NULL;
uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
continue;
}
}
Expand All @@ -1555,10 +1558,11 @@ static int end_bio_extent_readpage(struct bio *bio,
!(state->state & EXTENT_LOCKED))
state = NULL;
}
if (!state && uptodate) {
if (!state) {
spin_unlock_irqrestore(&tree->lock, flags);
set_extent_uptodate(tree, start, end,
GFP_ATOMIC);
if (uptodate)
set_extent_uptodate(tree, start, end,
GFP_ATOMIC);
unlock_extent(tree, start, end, GFP_ATOMIC);
goto next_io;
}
Expand Down
47 changes: 46 additions & 1 deletion fs/btrfs/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -356,26 +356,71 @@ static int noinline dirty_and_release_pages(struct btrfs_trans_handle *trans,
int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end)
{
struct extent_map *em;
struct extent_map *split = NULL;
struct extent_map *split2 = NULL;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
u64 len = end - start + 1;
int ret;
int testend = 1;

if (end == (u64)-1)
if (end == (u64)-1) {
len = (u64)-1;
testend = 0;
}
while(1) {
if (!split)
split = alloc_extent_map(GFP_NOFS);
if (!split2)
split2 = alloc_extent_map(GFP_NOFS);

spin_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (!em) {
spin_unlock(&em_tree->lock);
break;
}
remove_extent_mapping(em_tree, em);

if (em->block_start < EXTENT_MAP_LAST_BYTE &&
em->start < start) {
split->start = em->start;
split->len = start - em->start;
split->block_start = em->block_start;
split->bdev = em->bdev;
split->flags = em->flags;
ret = add_extent_mapping(em_tree, split);
BUG_ON(ret);
free_extent_map(split);
split = split2;
split2 = NULL;
}
if (em->block_start < EXTENT_MAP_LAST_BYTE &&
testend && em->start + em->len > start + len) {
u64 diff = start + len - em->start;

split->start = start + len;
split->len = em->start + em->len - (start + len);
split->bdev = em->bdev;
split->flags = em->flags;

split->block_start = em->block_start + diff;

ret = add_extent_mapping(em_tree, split);
BUG_ON(ret);
free_extent_map(split);
split = NULL;
}
spin_unlock(&em_tree->lock);

/* once for us */
free_extent_map(em);
/* once for the tree*/
free_extent_map(em);
}
if (split)
free_extent_map(split);
if (split2)
free_extent_map(split2);
return 0;
}

Expand Down
120 changes: 110 additions & 10 deletions fs/btrfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
if (alloc_hint == EXTENT_MAP_INLINE)
goto out;

BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));

while(num_bytes > 0) {
cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
Expand All @@ -140,6 +142,11 @@ static int cow_file_range(struct inode *inode, u64 start, u64 end)
ins.offset);
inode->i_blocks += ins.offset >> 9;
btrfs_check_file(root, inode);
if (num_bytes < cur_alloc_size) {
printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
cur_alloc_size);
break;
}
num_bytes -= cur_alloc_size;
alloc_hint = ins.objectid + ins.offset;
start += cur_alloc_size;
Expand Down Expand Up @@ -427,23 +434,28 @@ int btrfs_readpage_io_failed_hook(struct bio *failed_bio,
struct extent_map *em;
struct inode *inode = page->mapping->host;
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct bio *bio;
int num_copies;
int ret;
u64 logical;

ret = get_state_private(failure_tree, start, &private);
if (ret) {
size_t pg_offset = start - page_offset(page);
failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
if (!failrec)
return -ENOMEM;
failrec->start = start;
failrec->len = end - start + 1;
failrec->last_mirror = 0;

em = btrfs_get_extent(inode, NULL, pg_offset, start,
failrec->len, 0);
spin_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, failrec->len);
if (em->start > start || em->start + em->len < start) {
free_extent_map(em);
em = NULL;
}
spin_unlock(&em_tree->lock);

if (!em || IS_ERR(em)) {
kfree(failrec);
Expand Down Expand Up @@ -559,6 +571,8 @@ int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
flush_dcache_page(page);
kunmap_atomic(kaddr, KM_IRQ0);
local_irq_restore(flags);
if (private == 0)
return 0;
return -EIO;
}

Expand Down Expand Up @@ -908,8 +922,9 @@ static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
int pending_del_nr = 0;
int pending_del_slot = 0;
int extent_type = -1;
u64 mask = root->sectorsize - 1;

btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1);
btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1);
path = btrfs_alloc_path();
path->reada = -1;
BUG_ON(!path);
Expand Down Expand Up @@ -1212,7 +1227,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
hole_start, 0, 0,
hole_size);
btrfs_drop_extent_cache(inode, hole_start,
hole_size - 1);
(u64)-1);
btrfs_check_file(root, inode);
}
btrfs_end_transaction(trans, root);
Expand Down Expand Up @@ -2083,6 +2098,68 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
return err;
}

static int merge_extent_mapping(struct extent_map_tree *em_tree,
struct extent_map *existing,
struct extent_map *em)
{
u64 start_diff;
u64 new_end;
int ret = 0;
int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE;

if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE)
goto invalid;

if (!real_blocks && em->block_start != existing->block_start)
goto invalid;

new_end = max(existing->start + existing->len, em->start + em->len);

if (existing->start >= em->start) {
if (em->start + em->len < existing->start)
goto invalid;

start_diff = existing->start - em->start;
if (real_blocks && em->block_start + start_diff !=
existing->block_start)
goto invalid;

em->len = new_end - em->start;

remove_extent_mapping(em_tree, existing);
/* free for the tree */
free_extent_map(existing);
ret = add_extent_mapping(em_tree, em);

} else if (em->start > existing->start) {

if (existing->start + existing->len < em->start)
goto invalid;

start_diff = em->start - existing->start;
if (real_blocks && existing->block_start + start_diff !=
em->block_start)
goto invalid;

remove_extent_mapping(em_tree, existing);
em->block_start = existing->block_start;
em->start = existing->start;
em->len = new_end - existing->start;
free_extent_map(existing);

ret = add_extent_mapping(em_tree, em);
} else {
goto invalid;
}
return ret;

invalid:
printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n",
existing->start, existing->len, existing->block_start,
em->start, em->len, em->block_start);
return -EIO;
}

struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
size_t pg_offset, u64 start, u64 len,
int create)
Expand Down Expand Up @@ -2267,12 +2344,35 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
err = 0;
spin_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em);

/* it is possible that someone inserted the extent into the tree
* while we had the lock dropped. It is also possible that
* an overlapping map exists in the tree
*/
if (ret == -EEXIST) {
free_extent_map(em);
em = lookup_extent_mapping(em_tree, start, len);
if (!em) {
err = -EIO;
printk("failing to insert %Lu %Lu\n", start, len);
struct extent_map *existing;
existing = lookup_extent_mapping(em_tree, start, len);
if (!existing) {
existing = lookup_extent_mapping(em_tree, em->start,
em->len);
if (existing) {
err = merge_extent_mapping(em_tree, existing,
em);
free_extent_map(existing);
if (err) {
free_extent_map(em);
em = NULL;
}
} else {
err = -EIO;
printk("failing to insert %Lu %Lu\n",
start, len);
free_extent_map(em);
em = NULL;
}
} else {
free_extent_map(em);
em = existing;
}
}
spin_unlock(&em_tree->lock);
Expand Down
3 changes: 3 additions & 0 deletions fs/btrfs/volumes.c
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,9 @@ int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
spin_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, logical, *length);
spin_unlock(&em_tree->lock);
if (!em) {
printk("unable to find logical %Lu\n", logical);
}
BUG_ON(!em);

BUG_ON(em->start > logical || em->start + em->len < logical);
Expand Down

0 comments on commit 3b95151

Please sign in to comment.