Skip to content

Commit

Permalink
btrfs: only write the sectors in the vertical stripe which has data s…
Browse files Browse the repository at this point in the history
…tripes

commit bd8f7e6 upstream.

If we have only 8K partial write at the beginning of a full RAID56
stripe, we will write the following contents:

                    0  8K           32K             64K
Disk 1	(data):     |XX|            |               |
Disk 2  (data):     |               |               |
Disk 3  (parity):   |XXXXXXXXXXXXXXX|XXXXXXXXXXXXXXX|

|X| means the sector will be written back to disk.

Note that, although we won't write any sectors from disk 2, but we will
write the full 64KiB of parity to disk.

This behavior is fine for now, but not for the future (especially for
RAID56J, as we waste quite some space to journal the unused parity
stripes).

So here we will also utilize the btrfs_raid_bio::dbitmap, anytime we
queue a higher level bio into an rbio, we will update rbio::dbitmap to
indicate which vertical stripes we need to writeback.

And at finish_rmw(), we also check dbitmap to see if we need to write
any sector in the vertical stripe.

So after the patch, above example will only lead to the following
writeback pattern:

                    0  8K           32K             64K
Disk 1	(data):     |XX|            |               |
Disk 2  (data):     |               |               |
Disk 3  (parity):   |XX|            |               |

Acked-by: David Sterba <dsterba@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  • Loading branch information
Qu Wenruo authored and Greg Kroah-Hartman committed Aug 21, 2022
1 parent c0931e4 commit 2bec2cb
Showing 1 changed file with 51 additions and 4 deletions.
55 changes: 51 additions & 4 deletions fs/btrfs/raid56.c
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ static void merge_rbio(struct btrfs_raid_bio *dest,
{
bio_list_merge(&dest->bio_list, &victim->bio_list);
dest->bio_list_bytes += victim->bio_list_bytes;
/* Also inherit the bitmaps from @victim. */
bitmap_or(dest->dbitmap, victim->dbitmap, dest->dbitmap,
dest->stripe_npages);
dest->generic_bio_cnt += victim->generic_bio_cnt;
bio_list_init(&victim->bio_list);
}
Expand Down Expand Up @@ -864,6 +867,12 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)

if (rbio->generic_bio_cnt)
btrfs_bio_counter_sub(rbio->bioc->fs_info, rbio->generic_bio_cnt);
/*
* Clear the data bitmap, as the rbio may be cached for later usage.
* do this before before unlock_stripe() so there will be no new bio
* for this bio.
*/
bitmap_clear(rbio->dbitmap, 0, rbio->stripe_npages);

/*
* At this moment, rbio->bio_list is empty, however since rbio does not
Expand Down Expand Up @@ -1195,6 +1204,9 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
else
BUG();

/* We should have at least one data sector. */
ASSERT(bitmap_weight(rbio->dbitmap, rbio->stripe_npages));

/* at this point we either have a full stripe,
* or we've read the full stripe from the drive.
* recalculate the parity and write the new results.
Expand Down Expand Up @@ -1266,6 +1278,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
for (stripe = 0; stripe < rbio->real_stripes; stripe++) {
for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;

/* This vertical stripe has no data, skip it. */
if (!test_bit(pagenr, rbio->dbitmap))
continue;

if (stripe < rbio->nr_data) {
page = page_in_rbio(rbio, stripe, pagenr, 1);
if (!page)
Expand All @@ -1290,6 +1307,11 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)

for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) {
struct page *page;

/* This vertical stripe has no data, skip it. */
if (!test_bit(pagenr, rbio->dbitmap))
continue;

if (stripe < rbio->nr_data) {
page = page_in_rbio(rbio, stripe, pagenr, 1);
if (!page)
Expand Down Expand Up @@ -1713,6 +1735,33 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
run_plug(plug);
}

/* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
{
const struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
const u64 full_stripe_start = rbio->bioc->raid_map[0];
const u32 orig_len = orig_bio->bi_iter.bi_size;
const u32 sectorsize = fs_info->sectorsize;
u64 cur_logical;

ASSERT(orig_logical >= full_stripe_start &&
orig_logical + orig_len <= full_stripe_start +
rbio->nr_data * rbio->stripe_len);

bio_list_add(&rbio->bio_list, orig_bio);
rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;

/* Update the dbitmap. */
for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len;
cur_logical += sectorsize) {
int bit = ((u32)(cur_logical - full_stripe_start) >>
fs_info->sectorsize_bits) % rbio->stripe_npages;

set_bit(bit, rbio->dbitmap);
}
}

/*
* our main entry point for writes from the rest of the FS.
*/
Expand All @@ -1730,9 +1779,8 @@ int raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc,
btrfs_put_bioc(bioc);
return PTR_ERR(rbio);
}
bio_list_add(&rbio->bio_list, bio);
rbio->bio_list_bytes = bio->bi_iter.bi_size;
rbio->operation = BTRFS_RBIO_WRITE;
rbio_add_bio(rbio, bio);

btrfs_bio_counter_inc_noblocked(fs_info);
rbio->generic_bio_cnt = 1;
Expand Down Expand Up @@ -2134,8 +2182,7 @@ int raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
}

rbio->operation = BTRFS_RBIO_READ_REBUILD;
bio_list_add(&rbio->bio_list, bio);
rbio->bio_list_bytes = bio->bi_iter.bi_size;
rbio_add_bio(rbio, bio);

rbio->faila = find_logical_bio_stripe(rbio, bio);
if (rbio->faila == -1) {
Expand Down

0 comments on commit 2bec2cb

Please sign in to comment.