Skip to content

Commit

Permalink
btrfs: implement log-structured superblock for ZONED mode
Browse files Browse the repository at this point in the history
Superblock (and its copies) is the only data structure in btrfs which
has a fixed location on a device. Since we cannot overwrite in a
sequential write required zone, we cannot place superblock in the zone.
One easy solution is limiting superblock and copies to be placed only in
conventional zones.  However, this method has two downsides: one is
reduced number of superblock copies. The location of the second copy of
superblock is 256GB, which is in a sequential write required zone on
typical devices in the market today.  So, the number of superblock and
copies is limited to be two.  Second downside is that we cannot support
devices which have no conventional zones at all.

To solve these two problems, we employ superblock log writing. It uses
two adjacent zones as a circular buffer to write updated superblocks.
Once the first zone is filled up, start writing into the second one.
Then, when both zones are filled up and before starting to write to the
first zone again, it reset the first zone.

We can determine the position of the latest superblock by reading write
pointer information from a device. One corner case is when both zones
are full. For this situation, we read out the last superblock of each
zone, and compare them to determine which zone is older.

The following zones are reserved as the circular buffer on ZONED btrfs.

- The primary superblock: zones 0 and 1
- The first copy: zones 16 and 17
- The second copy: zones 1024 or zone at 256GB which is minimum, and
  next to it

If these reserved zones are conventional, superblock is written fixed at
the start of the zone without logging.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
  • Loading branch information
Naohiro Aota authored and David Sterba committed Dec 9, 2020
1 parent a589dde commit 1265925
Show file tree
Hide file tree
Showing 6 changed files with 429 additions and 12 deletions.
9 changes: 9 additions & 0 deletions fs/btrfs/block-group.c
Original file line number Diff line number Diff line change
Expand Up @@ -1679,6 +1679,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
static int exclude_super_stripes(struct btrfs_block_group *cache)
{
struct btrfs_fs_info *fs_info = cache->fs_info;
const bool zoned = btrfs_is_zoned(fs_info);
u64 bytenr;
u64 *logical;
int stripe_len;
Expand All @@ -1700,6 +1701,14 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
if (ret)
return ret;

/* Shouldn't have super stripes in sequential zones */
if (zoned && nr) {
btrfs_err(fs_info,
"zoned: block group %llu must not contain super block",
cache->start);
return -EUCLEAN;
}

while (nr--) {
u64 len = min_t(u64, stripe_len,
cache->start + cache->length - logical[nr]);
Expand Down
42 changes: 35 additions & 7 deletions fs/btrfs/disk-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -3488,10 +3488,17 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
{
struct btrfs_super_block *super;
struct page *page;
u64 bytenr;
u64 bytenr, bytenr_orig;
struct address_space *mapping = bdev->bd_inode->i_mapping;
int ret;

bytenr_orig = btrfs_sb_offset(copy_num);
ret = btrfs_sb_log_location_bdev(bdev, copy_num, READ, &bytenr);
if (ret == -ENOENT)
return ERR_PTR(-EINVAL);
else if (ret)
return ERR_PTR(ret);

bytenr = btrfs_sb_offset(copy_num);
if (bytenr + BTRFS_SUPER_INFO_SIZE >= i_size_read(bdev->bd_inode))
return ERR_PTR(-EINVAL);

Expand All @@ -3505,7 +3512,7 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
return ERR_PTR(-ENODATA);
}

if (btrfs_super_bytenr(super) != bytenr) {
if (btrfs_super_bytenr(super) != bytenr_orig) {
btrfs_release_disk_super(super);
return ERR_PTR(-EINVAL);
}
Expand Down Expand Up @@ -3560,7 +3567,8 @@ static int write_dev_supers(struct btrfs_device *device,
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
int i;
int errors = 0;
u64 bytenr;
int ret;
u64 bytenr, bytenr_orig;

if (max_mirrors == 0)
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
Expand All @@ -3572,12 +3580,22 @@ static int write_dev_supers(struct btrfs_device *device,
struct bio *bio;
struct btrfs_super_block *disk_super;

bytenr = btrfs_sb_offset(i);
bytenr_orig = btrfs_sb_offset(i);
ret = btrfs_sb_log_location(device, i, WRITE, &bytenr);
if (ret == -ENOENT) {
continue;
} else if (ret < 0) {
btrfs_err(device->fs_info,
"couldn't get super block location for mirror %d",
i);
errors++;
continue;
}
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
device->commit_total_bytes)
break;

btrfs_set_super_bytenr(sb, bytenr);
btrfs_set_super_bytenr(sb, bytenr_orig);

crypto_shash_digest(shash, (const char *)sb + BTRFS_CSUM_SIZE,
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE,
Expand Down Expand Up @@ -3622,6 +3640,7 @@ static int write_dev_supers(struct btrfs_device *device,
bio->bi_opf |= REQ_FUA;

btrfsic_submit_bio(bio);
btrfs_advance_sb_log(device, i);
}
return errors < i ? 0 : -1;
}
Expand All @@ -3638,6 +3657,7 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
int i;
int errors = 0;
bool primary_failed = false;
int ret;
u64 bytenr;

if (max_mirrors == 0)
Expand All @@ -3646,7 +3666,15 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
for (i = 0; i < max_mirrors; i++) {
struct page *page;

bytenr = btrfs_sb_offset(i);
ret = btrfs_sb_log_location(device, i, READ, &bytenr);
if (ret == -ENOENT) {
break;
} else if (ret < 0) {
errors++;
if (i == 0)
primary_failed = true;
continue;
}
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
device->commit_total_bytes)
break;
Expand Down
3 changes: 3 additions & 0 deletions fs/btrfs/scrub.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "rcu-string.h"
#include "raid56.h"
#include "block-group.h"
#include "zoned.h"

/*
* This is only the first step towards a full-features scrub. It reads all
Expand Down Expand Up @@ -3732,6 +3733,8 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
if (bytenr + BTRFS_SUPER_INFO_SIZE >
scrub_dev->commit_total_bytes)
break;
if (!btrfs_check_super_location(scrub_dev, bytenr))
continue;

ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr,
scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i,
Expand Down
20 changes: 15 additions & 5 deletions fs/btrfs/volumes.c
Original file line number Diff line number Diff line change
Expand Up @@ -1276,7 +1276,7 @@ void btrfs_release_disk_super(struct btrfs_super_block *super)
}

static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
u64 bytenr)
u64 bytenr, u64 bytenr_orig)
{
struct btrfs_super_block *disk_super;
struct page *page;
Expand Down Expand Up @@ -1307,7 +1307,7 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev
/* align our pointer to the offset of the super block */
disk_super = p + offset_in_page(bytenr);

if (btrfs_super_bytenr(disk_super) != bytenr ||
if (btrfs_super_bytenr(disk_super) != bytenr_orig ||
btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
btrfs_release_disk_super(p);
return ERR_PTR(-EINVAL);
Expand Down Expand Up @@ -1342,7 +1342,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
bool new_device_added = false;
struct btrfs_device *device = NULL;
struct block_device *bdev;
u64 bytenr;
u64 bytenr, bytenr_orig;
int ret;

lockdep_assert_held(&uuid_mutex);

Expand All @@ -1352,14 +1353,18 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
* So, we need to add a special mount option to scan for
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
*/
bytenr = btrfs_sb_offset(0);
flags |= FMODE_EXCL;

bdev = blkdev_get_by_path(path, flags, holder);
if (IS_ERR(bdev))
return ERR_CAST(bdev);

disk_super = btrfs_read_disk_super(bdev, bytenr);
bytenr_orig = btrfs_sb_offset(0);
ret = btrfs_sb_log_location_bdev(bdev, 0, READ, &bytenr);
if (ret)
return ERR_PTR(ret);

disk_super = btrfs_read_disk_super(bdev, bytenr, bytenr_orig);
if (IS_ERR(disk_super)) {
device = ERR_CAST(disk_super);
goto error_bdev_put;
Expand Down Expand Up @@ -2023,6 +2028,11 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
if (IS_ERR(disk_super))
continue;

if (bdev_is_zoned(bdev)) {
btrfs_reset_sb_log_zones(bdev, copy_num);
continue;
}

memset(&disk_super->magic, 0, sizeof(disk_super->magic));

page = virt_to_page(disk_super);
Expand Down
Loading

0 comments on commit 1265925

Please sign in to comment.