Skip to content

Commit

Permalink
btrfs: use bios instead of buffer_heads from super block writeout
Browse files Browse the repository at this point in the history
Similar to the superblock read path, change the write path to using bios
and pages instead of buffer_heads. This allows us to skip over the
buffer_head code, for writing the superblock to disk.

This is based on a patch originally authored by Nikolay Borisov.

Co-developed-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
  • Loading branch information
Johannes Thumshirn authored and David Sterba committed Mar 23, 2020
1 parent 8f32380 commit 314b6dd
Showing 1 changed file with 73 additions and 54 deletions.
127 changes: 73 additions & 54 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
#include <linux/writeback.h>
#include <linux/buffer_head.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include <linux/slab.h>
Expand Down Expand Up @@ -3395,25 +3394,34 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
}
ALLOW_ERROR_INJECTION(open_ctree, ERRNO);

static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
static void btrfs_end_super_write(struct bio *bio)
{
if (uptodate) {
set_buffer_uptodate(bh);
} else {
struct btrfs_device *device = (struct btrfs_device *)
bh->b_private;

btrfs_warn_rl_in_rcu(device->fs_info,
"lost page write due to IO error on %s",
rcu_str_deref(device->name));
/* note, we don't set_buffer_write_io_error because we have
* our own ways of dealing with the IO errors
*/
clear_buffer_uptodate(bh);
btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
struct btrfs_device *device = bio->bi_private;
struct bio_vec *bvec;
struct bvec_iter_all iter_all;
struct page *page;

bio_for_each_segment_all(bvec, bio, iter_all) {
page = bvec->bv_page;

if (bio->bi_status) {
btrfs_warn_rl_in_rcu(device->fs_info,
"lost page write due to IO error on %s (%d)",
rcu_str_deref(device->name),
blk_status_to_errno(bio->bi_status));
ClearPageUptodate(page);
SetPageError(page);
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_WRITE_ERRS);
} else {
SetPageUptodate(page);
}

put_page(page);
unlock_page(page);
}
unlock_buffer(bh);
put_bh(bh);

bio_put(bio);
}

struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
Expand Down Expand Up @@ -3473,32 +3481,34 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)

/*
* Write superblock @sb to the @device. Do not wait for completion, all the
* buffer heads we write are pinned.
* pages we use for writing are locked.
*
* Write @max_mirrors copies of the superblock, where 0 means default that fit
* the expected device size at commit time. Note that max_mirrors must be
* same for write and wait phases.
*
* Return number of errors when buffer head is not found or submission fails.
* Return number of errors when page is not found or submission fails.
*/
static int write_dev_supers(struct btrfs_device *device,
struct btrfs_super_block *sb, int max_mirrors)
{
struct btrfs_fs_info *fs_info = device->fs_info;
struct address_space *mapping = device->bdev->bd_inode->i_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct buffer_head *bh;
int i;
int ret;
int errors = 0;
u64 bytenr;
int op_flags;

if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;

shash->tfm = fs_info->csum_shash;

for (i = 0; i < max_mirrors; i++) {
struct page *page;
struct bio *bio;
struct btrfs_super_block *disk_super;

bytenr = btrfs_sb_offset(i);
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
device->commit_total_bytes)
Expand All @@ -3511,37 +3521,45 @@ static int write_dev_supers(struct btrfs_device *device,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
crypto_shash_final(shash, sb->csum);

/* One reference for us, and we leave it for the caller */
bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
BTRFS_SUPER_INFO_SIZE);
if (!bh) {
page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
GFP_NOFS);
if (!page) {
btrfs_err(device->fs_info,
"couldn't get super buffer head for bytenr %llu",
"couldn't get super block page for bytenr %llu",
bytenr);
errors++;
continue;
}

memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
/* Bump the refcount for wait_dev_supers() */
get_page(page);

/* one reference for submit_bh */
get_bh(bh);
disk_super = page_address(page);
memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);

set_buffer_uptodate(bh);
lock_buffer(bh);
bh->b_end_io = btrfs_end_buffer_write_sync;
bh->b_private = device;
/*
* Directly use bios here instead of relying on the page cache
* to do I/O, so we don't lose the ability to do integrity
* checking.
*/
bio = bio_alloc(GFP_NOFS, 1);
bio_set_dev(bio, device->bdev);
bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
bio->bi_private = device;
bio->bi_end_io = btrfs_end_super_write;
__bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
offset_in_page(bytenr));

/*
* we fua the first super. The others we allow
* to go down lazy.
* We FUA only the first super block. The others we allow to
* go down lazy and there's a short window where the on-disk
* copies might still contain the older version.
*/
op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO;
if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
op_flags |= REQ_FUA;
ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
if (ret)
errors++;
bio->bi_opf |= REQ_FUA;

btrfsic_submit_bio(bio);
}
return errors < i ? 0 : -1;
}
Expand All @@ -3550,12 +3568,11 @@ static int write_dev_supers(struct btrfs_device *device,
* Wait for write completion of superblocks done by write_dev_supers,
* @max_mirrors same for write and wait phases.
*
* Return number of errors when buffer head is not found or not marked up to
* Return number of errors when page is not found or not marked up to
* date.
*/
static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
{
struct buffer_head *bh;
int i;
int errors = 0;
bool primary_failed = false;
Expand All @@ -3565,32 +3582,34 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;

for (i = 0; i < max_mirrors; i++) {
struct page *page;

bytenr = btrfs_sb_offset(i);
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
device->commit_total_bytes)
break;

bh = __find_get_block(device->bdev,
bytenr / BTRFS_BDEV_BLOCKSIZE,
BTRFS_SUPER_INFO_SIZE);
if (!bh) {
page = find_get_page(device->bdev->bd_inode->i_mapping,
bytenr >> PAGE_SHIFT);
if (!page) {
errors++;
if (i == 0)
primary_failed = true;
continue;
}
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
/* Page is submitted locked and unlocked once the IO completes */
wait_on_page_locked(page);
if (PageError(page)) {
errors++;
if (i == 0)
primary_failed = true;
}

/* drop our reference */
brelse(bh);
/* Drop our reference */
put_page(page);

/* drop the reference from the writing run */
brelse(bh);
/* Drop the reference from the writing run */
put_page(page);
}

/* log error, force error return */
Expand Down

0 comments on commit 314b6dd

Please sign in to comment.