Skip to content

Commit

Permalink
btrfs: count super block write errors in device instead of tracking f…
Browse files Browse the repository at this point in the history
…olio error state

Currently the error status of super block write is tracked in page/folio
status bit Error. For that we need to keep the reference for the whole
duration of write and wait.

Count the number of superblock writeback errors in the btrfs_device.
That means we don't need the folio to stay around until it's waited for,
and can avoid the extra call to folio_get/put.

Also remove a mention of PageError in a comment as it's the last mention
of the page Error state.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
  • Loading branch information
Matthew Wilcox (Oracle) authored and David Sterba committed May 7, 2024
1 parent 617fb10 commit bc00965
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 28 deletions.
46 changes: 19 additions & 27 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -3634,11 +3634,15 @@ static void btrfs_end_super_write(struct bio *bio)
"lost super block write due to IO error on %s (%d)",
btrfs_dev_name(device),
blk_status_to_errno(bio->bi_status));
folio_set_error(fi.folio);
btrfs_dev_stat_inc_and_print(device,
BTRFS_DEV_STAT_WRITE_ERRS);
/* Ensure failure if the primary sb fails. */
if (bio->bi_opf & REQ_FUA)
atomic_add(BTRFS_SUPER_PRIMARY_WRITE_ERROR,
&device->sb_write_errors);
else
atomic_inc(&device->sb_write_errors);
}

folio_unlock(fi.folio);
folio_put(fi.folio);
}
Expand Down Expand Up @@ -3742,10 +3746,11 @@ static int write_dev_supers(struct btrfs_device *device,
struct address_space *mapping = device->bdev->bd_inode->i_mapping;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
int errors = 0;
int ret;
u64 bytenr, bytenr_orig;

atomic_set(&device->sb_write_errors, 0);

if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;

Expand All @@ -3765,7 +3770,7 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_err(device->fs_info,
"couldn't get super block location for mirror %d",
i);
errors++;
atomic_inc(&device->sb_write_errors);
continue;
}
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
Expand All @@ -3785,14 +3790,11 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_err(device->fs_info,
"couldn't get super block page for bytenr %llu",
bytenr);
errors++;
atomic_inc(&device->sb_write_errors);
continue;
}
ASSERT(folio_order(folio) == 0);

/* Bump the refcount for wait_dev_supers() */
folio_get(folio);

offset = offset_in_folio(folio, bytenr);
disk_super = folio_address(folio) + offset;
memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
Expand Down Expand Up @@ -3820,16 +3822,17 @@ static int write_dev_supers(struct btrfs_device *device,
submit_bio(bio);

if (btrfs_advance_sb_log(device, i))
errors++;
atomic_inc(&device->sb_write_errors);
}
return errors < i ? 0 : -1;
return atomic_read(&device->sb_write_errors) < i ? 0 : -1;
}

/*
* Wait for write completion of superblocks done by write_dev_supers,
* @max_mirrors same for write and wait phases.
*
* Return number of errors when folio is not found or not marked up to date.
* Return -1 if primary super block write failed or when there were no super block
* copies written. Otherwise 0.
*/
static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
{
Expand Down Expand Up @@ -3860,30 +3863,19 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)

folio = filemap_get_folio(device->bdev->bd_inode->i_mapping,
bytenr >> PAGE_SHIFT);
if (IS_ERR(folio)) {
errors++;
if (i == 0)
primary_failed = true;
/* If the folio has been removed, then we know it completed. */
if (IS_ERR(folio))
continue;
}
ASSERT(folio_order(folio) == 0);

/* Folio will be unlocked once the write completes. */
folio_wait_locked(folio);
if (folio_test_error(folio)) {
errors++;
if (i == 0)
primary_failed = true;
}

/* Drop our reference */
folio_put(folio);

/* Drop the reference from the writing run */
folio_put(folio);
}

/* log error, force error return */
errors += atomic_read(&device->sb_write_errors);
if (errors >= BTRFS_SUPER_PRIMARY_WRITE_ERROR)
primary_failed = true;
if (primary_failed) {
btrfs_err(device->fs_info, "error writing primary super block to device %llu",
device->devid);
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/extent_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1602,7 +1602,7 @@ static void set_btree_ioerr(struct extent_buffer *eb)
* can be no longer dirty nor marked anymore for writeback (if a
* subsequent modification to the extent buffer didn't happen before the
* transaction commit), which makes filemap_fdata[write|wait]_range not
* able to find the pages tagged with SetPageError at transaction
* able to find the pages which contain errors at transaction
* commit time. So if this happens we must abort the transaction,
* otherwise we commit a super block with btree roots that point to
* btree nodes/leafs whose content on disk is invalid - either garbage
Expand Down
9 changes: 9 additions & 0 deletions fs/btrfs/volumes.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ enum btrfs_raid_types {
#define BTRFS_DEV_STATE_FLUSH_SENT (4)
#define BTRFS_DEV_STATE_NO_READA (5)

/* Special value encoding failure to write primary super block. */
#define BTRFS_SUPER_PRIMARY_WRITE_ERROR (INT_MAX / 2)

struct btrfs_fs_devices;

struct btrfs_device {
Expand Down Expand Up @@ -142,6 +145,12 @@ struct btrfs_device {
/* type and info about this device */
u64 type;

/*
* Counter of super block write errors, values larger than
* BTRFS_SUPER_PRIMARY_WRITE_ERROR encode primary super block write failure.
*/
atomic_t sb_write_errors;

/* minimal io size for this device */
u32 sector_size;

Expand Down

0 comments on commit bc00965

Please sign in to comment.