Skip to content

Commit

Permalink
Btrfs: deal with read errors on extent buffers differently
Browse files Browse the repository at this point in the history
Since we need to read and write extent buffers in their entirety we can't use
the normal bio_readpage_error stuff since it only works on a per page basis.  So
instead make it so that if we see an io error in endio we just mark the eb as
having an IO error and then in btree_read_extent_buffer_pages we will manually
try other mirrors and then overwrite the bad mirror if we find a good copy.
This works with larger than page size blocks.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
  • Loading branch information
Josef Bacik authored and Chris Mason committed Mar 27, 2012
1 parent f3f266a commit ea46679
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 27 deletions.
43 changes: 29 additions & 14 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -360,9 +360,11 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
u64 start, u64 parent_transid)
{
struct extent_io_tree *io_tree;
int failed = 0;
int ret;
int num_copies = 0;
int mirror_num = 0;
int failed_mirror = 0;

clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
Expand All @@ -371,26 +373,39 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
WAIT_COMPLETE,
btree_get_extent, mirror_num);
if (!ret && !verify_parent_transid(io_tree, eb, parent_transid))
return ret;
break;

/*
* This buffer's crc is fine, but its contents are corrupted, so
* there is no reason to read the other copies, they won't be
* any less wrong.
*/
if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags))
return ret;
break;

if (!failed_mirror) {
failed = 1;
printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror);
failed_mirror = eb->failed_mirror;
}

num_copies = btrfs_num_copies(&root->fs_info->mapping_tree,
eb->start, eb->len);
if (num_copies == 1)
return ret;
break;

mirror_num++;
if (mirror_num == failed_mirror)
mirror_num++;

if (mirror_num > num_copies)
return ret;
break;
}
return -EIO;

if (failed && !ret)
repair_eb_io_failure(root, eb, failed_mirror);

return ret;
}

/*
Expand Down Expand Up @@ -575,6 +590,11 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
if (!reads_done)
goto err;

if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) {
ret = -EIO;
goto err;
}

found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
printk_ratelimited(KERN_INFO "btrfs bad tree block start "
Expand Down Expand Up @@ -626,21 +646,16 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
return ret;
}

static int btree_io_failed_hook(struct bio *failed_bio,
struct page *page, u64 start, u64 end,
int mirror_num, struct extent_state *state)
static int btree_io_failed_hook(struct page *page, int failed_mirror)
{
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;

eb = (struct extent_buffer *)page->private;
if (page != eb->pages[0])
return -EIO;

if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
eb->failed_mirror = failed_mirror;
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(root, eb, eb->start, -EIO);
}
return -EIO; /* we fixed nothing */
}

Expand Down
42 changes: 32 additions & 10 deletions fs/btrfs/extent_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1915,6 +1915,26 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
return 0;
}

int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
int mirror_num)
{
struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
u64 start = eb->start;
unsigned long i, num_pages = num_extent_pages(eb->start, eb->len);
int ret;

for (i = 0; i < num_pages; i++) {
struct page *p = extent_buffer_page(eb, i);
ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE,
start, p, mirror_num);
if (ret)
break;
start += PAGE_CACHE_SIZE;
}

return ret;
}

/*
* each time an IO finishes, we do a fast check in the IO failure tree
* to see if we need to process or clean up an io_failure_record
Expand Down Expand Up @@ -2261,6 +2281,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
u64 start;
u64 end;
int whole_page;
int failed_mirror;
int ret;

if (err)
Expand Down Expand Up @@ -2307,9 +2328,16 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
else
clean_io_failure(start, page);
}
if (!uptodate) {
int failed_mirror;

if (!uptodate)
failed_mirror = (int)(unsigned long)bio->bi_bdev;

if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
ret = tree->ops->readpage_io_failed_hook(page, failed_mirror);
if (!ret && !err &&
test_bit(BIO_UPTODATE, &bio->bi_flags))
uptodate = 1;
} else if (!uptodate) {
/*
* The generic bio_readpage_error handles errors the
* following way: If possible, new read requests are
Expand All @@ -2323,21 +2351,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
ret = bio_readpage_error(bio, page, start, end,
failed_mirror, NULL);
if (ret == 0) {
error_handled:
uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
if (err)
uptodate = 0;
uncache_state(&cached);
continue;
}
if (tree->ops && tree->ops->readpage_io_failed_hook) {
ret = tree->ops->readpage_io_failed_hook(
bio, page, start, end,
failed_mirror, state);
if (ret == 0)
goto error_handled;
}
}

if (uptodate && tree->track_uptodate) {
Expand Down Expand Up @@ -4396,6 +4416,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
goto unlock_exit;
}

clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
eb->failed_mirror = 0;
atomic_set(&eb->io_pages, num_reads);
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
Expand Down
8 changes: 5 additions & 3 deletions fs/btrfs/extent_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#define EXTENT_PAGE_PRIVATE_FIRST_PAGE 3

struct extent_state;
struct btrfs_root;

typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
struct bio *bio, int mirror_num,
Expand All @@ -73,9 +74,7 @@ struct extent_io_ops {
size_t size, struct bio *bio,
unsigned long bio_flags);
int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
int (*readpage_io_failed_hook)(struct bio *bio, struct page *page,
u64 start, u64 end, int failed_mirror,
struct extent_state *state);
int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
int (*writepage_io_failed_hook)(struct bio *bio, struct page *page,
u64 start, u64 end,
struct extent_state *state);
Expand Down Expand Up @@ -136,6 +135,7 @@ struct extent_buffer {
spinlock_t refs_lock;
atomic_t refs;
atomic_t io_pages;
int failed_mirror;
struct list_head leak_list;
struct rcu_head rcu_head;
pid_t lock_owner;
Expand Down Expand Up @@ -327,4 +327,6 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start,
u64 length, u64 logical, struct page *page,
int mirror_num);
int end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
int mirror_num);
#endif

0 comments on commit ea46679

Please sign in to comment.