Skip to content

Commit

Permalink
Btrfs: use a btrfs bioset instead of abusing bio internals
Browse files Browse the repository at this point in the history
Btrfs has been pointer tagging bi_private and using bi_bdev
to store the stripe index and mirror number of failed IOs.

As bios bubble back up through the call chain, we use these
to decide if and how to retry our IOs.  They are also used
to count IO failures on a per device basis.

Recently a bio tracepoint was added lead to crashes because
we were abusing bi_bdev.

This commit adds a btrfs bioset, and creates explicit fields
for the mirror number and stripe index.  The plan is to
extend this structure for all of the fields currently in
struct btrfs_bio, which will mean one less kmalloc in
our IO path.

Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Reported-by: Tejun Heo <tj@kernel.org>
  • Loading branch information
Chris Mason committed May 18, 2013
1 parent 667e7d9 commit 9be3395
Show file tree
Hide file tree
Showing 9 changed files with 120 additions and 72 deletions.
2 changes: 1 addition & 1 deletion fs/btrfs/check-integrity.c
Original file line number Diff line number Diff line change
Expand Up @@ -1700,7 +1700,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
unsigned int j;
DECLARE_COMPLETION_ONSTACK(complete);

bio = bio_alloc(GFP_NOFS, num_pages - i);
bio = btrfs_io_bio_alloc(GFP_NOFS, num_pages - i);
if (!bio) {
printk(KERN_INFO
"btrfsic: bio_alloc() for %u pages failed!\n",
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -3128,7 +3128,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
* caller
*/
device->flush_bio = NULL;
bio = bio_alloc(GFP_NOFS, 0);
bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
if (!bio)
return -ENOMEM;

Expand Down
49 changes: 41 additions & 8 deletions fs/btrfs/extent_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

static struct kmem_cache *extent_state_cache;
static struct kmem_cache *extent_buffer_cache;
static struct bio_set *btrfs_bioset;

#ifdef CONFIG_BTRFS_DEBUG
static LIST_HEAD(buffers);
Expand Down Expand Up @@ -125,10 +126,20 @@ int __init extent_io_init(void)
SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
if (!extent_buffer_cache)
goto free_state_cache;

btrfs_bioset = bioset_create(BIO_POOL_SIZE,
offsetof(struct btrfs_io_bio, bio));
if (!btrfs_bioset)
goto free_buffer_cache;
return 0;

free_buffer_cache:
kmem_cache_destroy(extent_buffer_cache);
extent_buffer_cache = NULL;

free_state_cache:
kmem_cache_destroy(extent_state_cache);
extent_state_cache = NULL;
return -ENOMEM;
}

Expand All @@ -145,6 +156,8 @@ void extent_io_exit(void)
kmem_cache_destroy(extent_state_cache);
if (extent_buffer_cache)
kmem_cache_destroy(extent_buffer_cache);
if (btrfs_bioset)
bioset_free(btrfs_bioset);
}

void extent_io_tree_init(struct extent_io_tree *tree,
Expand Down Expand Up @@ -2046,7 +2059,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
if (btrfs_is_parity_mirror(map_tree, logical, length, mirror_num))
return 0;

bio = bio_alloc(GFP_NOFS, 1);
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio->bi_private = &compl;
Expand Down Expand Up @@ -2336,7 +2349,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
return -EIO;
}

bio = bio_alloc(GFP_NOFS, 1);
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
free_io_failure(inode, failrec, 0);
return -EIO;
Expand Down Expand Up @@ -2457,10 +2470,11 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
struct page *page = bvec->bv_page;
struct extent_state *cached = NULL;
struct extent_state *state;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);

pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
"mirror=%ld\n", (u64)bio->bi_sector, err,
(long int)bio->bi_bdev);
"mirror=%lu\n", (u64)bio->bi_sector, err,
io_bio->mirror_num);
tree = &BTRFS_I(page->mapping->host)->io_tree;

start = page_offset(page) + bvec->bv_offset;
Expand All @@ -2485,7 +2499,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
}
spin_unlock(&tree->lock);

mirror = (int)(unsigned long)bio->bi_bdev;
mirror = io_bio->mirror_num;
if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
ret = tree->ops->readpage_end_io_hook(page, start, end,
state, mirror);
Expand Down Expand Up @@ -2550,17 +2564,23 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
bio_put(bio);
}

/*
* this allocates from the btrfs_bioset. We're returning a bio right now
* but you can call btrfs_io_bio for the appropriate container_of magic
*/
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags)
{
struct bio *bio;

bio = bio_alloc(gfp_flags, nr_vecs);
bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);

if (bio == NULL && (current->flags & PF_MEMALLOC)) {
while (!bio && (nr_vecs /= 2))
bio = bio_alloc(gfp_flags, nr_vecs);
while (!bio && (nr_vecs /= 2)) {
bio = bio_alloc_bioset(gfp_flags,
nr_vecs, btrfs_bioset);
}
}

if (bio) {
Expand All @@ -2571,6 +2591,19 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
return bio;
}

struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
{
return bio_clone_bioset(bio, gfp_mask, btrfs_bioset);
}


/* this also allocates from the btrfs_bioset */
struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
{
return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
}


static int __must_check submit_one_bio(int rw, struct bio *bio,
int mirror_num, unsigned long bio_flags)
{
Expand Down
2 changes: 2 additions & 0 deletions fs/btrfs/extent_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,8 @@ int extent_clear_unlock_delalloc(struct inode *inode,
struct bio *
btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
gfp_t gfp_flags);
struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs);
struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask);

struct btrfs_fs_info;

Expand Down
64 changes: 43 additions & 21 deletions fs/btrfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -6927,7 +6927,11 @@ struct btrfs_dio_private {
/* IO errors */
int errors;

/* orig_bio is our btrfs_io_bio */
struct bio *orig_bio;

/* dio_bio came from fs/direct-io.c */
struct bio *dio_bio;
};

static void btrfs_endio_direct_read(struct bio *bio, int err)
Expand All @@ -6937,6 +6941,7 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
struct bio_vec *bvec = bio->bi_io_vec;
struct inode *inode = dip->inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct bio *dio_bio;
u64 start;

start = dip->logical_offset;
Expand Down Expand Up @@ -6976,14 +6981,15 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)

unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
dip->logical_offset + dip->bytes - 1);
bio->bi_private = dip->private;
dio_bio = dip->dio_bio;

kfree(dip);

/* If we had a csum failure make sure to clear the uptodate flag */
if (err)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
dio_end_io(bio, err);
clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
dio_end_io(dio_bio, err);
bio_put(bio);
}

static void btrfs_endio_direct_write(struct bio *bio, int err)
Expand All @@ -6994,6 +7000,7 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
struct btrfs_ordered_extent *ordered = NULL;
u64 ordered_offset = dip->logical_offset;
u64 ordered_bytes = dip->bytes;
struct bio *dio_bio;
int ret;

if (err)
Expand Down Expand Up @@ -7021,14 +7028,15 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
goto again;
}
out_done:
bio->bi_private = dip->private;
dio_bio = dip->dio_bio;

kfree(dip);

/* If we had an error make sure to clear the uptodate flag */
if (err)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
dio_end_io(bio, err);
clear_bit(BIO_UPTODATE, &dio_bio->bi_flags);
dio_end_io(dio_bio, err);
bio_put(bio);
}

static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
Expand Down Expand Up @@ -7064,10 +7072,10 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
if (!atomic_dec_and_test(&dip->pending_bios))
goto out;

if (dip->errors)
if (dip->errors) {
bio_io_error(dip->orig_bio);
else {
set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
} else {
set_bit(BIO_UPTODATE, &dip->dio_bio->bi_flags);
bio_endio(dip->orig_bio, 0);
}
out:
Expand Down Expand Up @@ -7242,48 +7250,62 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
return 0;
}

static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
loff_t file_offset)
static void btrfs_submit_direct(int rw, struct bio *dio_bio,
struct inode *inode, loff_t file_offset)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_dio_private *dip;
struct bio_vec *bvec = bio->bi_io_vec;
struct bio_vec *bvec = dio_bio->bi_io_vec;
struct bio *io_bio;
int skip_sum;
int write = rw & REQ_WRITE;
int ret = 0;

skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;

io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);

if (!io_bio) {
ret = -ENOMEM;
goto free_ordered;
}

dip = kmalloc(sizeof(*dip), GFP_NOFS);
if (!dip) {
ret = -ENOMEM;
goto free_ordered;
goto free_io_bio;
}

dip->private = bio->bi_private;
dip->private = dio_bio->bi_private;
io_bio->bi_private = dio_bio->bi_private;
dip->inode = inode;
dip->logical_offset = file_offset;

dip->bytes = 0;
do {
dip->bytes += bvec->bv_len;
bvec++;
} while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1));
} while (bvec <= (dio_bio->bi_io_vec + dio_bio->bi_vcnt - 1));

dip->disk_bytenr = (u64)bio->bi_sector << 9;
bio->bi_private = dip;
dip->disk_bytenr = (u64)dio_bio->bi_sector << 9;
io_bio->bi_private = dip;
dip->errors = 0;
dip->orig_bio = bio;
dip->orig_bio = io_bio;
dip->dio_bio = dio_bio;
atomic_set(&dip->pending_bios, 0);

if (write)
bio->bi_end_io = btrfs_endio_direct_write;
io_bio->bi_end_io = btrfs_endio_direct_write;
else
bio->bi_end_io = btrfs_endio_direct_read;
io_bio->bi_end_io = btrfs_endio_direct_read;

ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
if (!ret)
return;

free_io_bio:
bio_put(io_bio);

free_ordered:
/*
* If this is a write, we need to clean up the reserved space and kill
Expand All @@ -7299,7 +7321,7 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
btrfs_put_ordered_extent(ordered);
btrfs_put_ordered_extent(ordered);
}
bio_endio(bio, ret);
bio_endio(dio_bio, ret);
}

static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb,
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/raid56.c
Original file line number Diff line number Diff line change
Expand Up @@ -1050,7 +1050,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
}

/* put a new bio on the list */
bio = bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
bio = btrfs_io_bio_alloc(GFP_NOFS, bio_max_len >> PAGE_SHIFT?:1);
if (!bio)
return -ENOMEM;

Expand Down
10 changes: 5 additions & 5 deletions fs/btrfs/scrub.c
Original file line number Diff line number Diff line change
Expand Up @@ -1296,7 +1296,7 @@ static void scrub_recheck_block(struct btrfs_fs_info *fs_info,
}

WARN_ON(!page->page);
bio = bio_alloc(GFP_NOFS, 1);
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
page->io_error = 1;
sblock->no_io_error_seen = 0;
Expand Down Expand Up @@ -1431,7 +1431,7 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad,
return -EIO;
}

bio = bio_alloc(GFP_NOFS, 1);
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio)
return -EIO;
bio->bi_bdev = page_bad->dev->bdev;
Expand Down Expand Up @@ -1522,7 +1522,7 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
sbio->dev = wr_ctx->tgtdev;
bio = sbio->bio;
if (!bio) {
bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
if (!bio) {
mutex_unlock(&wr_ctx->wr_lock);
return -ENOMEM;
Expand Down Expand Up @@ -1930,7 +1930,7 @@ static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx,
sbio->dev = spage->dev;
bio = sbio->bio;
if (!bio) {
bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
if (!bio)
return -ENOMEM;
sbio->bio = bio;
Expand Down Expand Up @@ -3307,7 +3307,7 @@ static int write_page_nocow(struct scrub_ctx *sctx,
"btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n");
return -EIO;
}
bio = bio_alloc(GFP_NOFS, 1);
bio = btrfs_io_bio_alloc(GFP_NOFS, 1);
if (!bio) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
Expand Down
Loading

0 comments on commit 9be3395

Please sign in to comment.