diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 876bc7e3d736e..3b39b87558d59 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -121,6 +121,7 @@ struct scrub_stripe { atomic_t pending_io; wait_queue_head_t io_wait; + wait_queue_head_t repair_wait; /* * Indicate the states of the stripe. Bits are defined in @@ -156,6 +157,8 @@ struct scrub_stripe { * group. */ u8 *csums; + + struct work_struct work; }; struct scrub_recover { @@ -381,6 +384,7 @@ int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe stripe->state = 0; init_waitqueue_head(&stripe->io_wait); + init_waitqueue_head(&stripe->repair_wait); atomic_set(&stripe->pending_io, 0); ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages); @@ -403,7 +407,7 @@ int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe return -ENOMEM; } -void wait_scrub_stripe_io(struct scrub_stripe *stripe) +static void wait_scrub_stripe_io(struct scrub_stripe *stripe) { wait_event(stripe->io_wait, atomic_read(&stripe->pending_io) == 0); } @@ -2327,7 +2331,7 @@ static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr) } /* Verify specified sectors of a stripe. */ -void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap) +static void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; const u32 sectors_per_tree = fs_info->nodesize >> fs_info->sectorsize_bits; @@ -2340,6 +2344,203 @@ void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap) } } +static int calc_sector_number(struct scrub_stripe *stripe, struct bio_vec *first_bvec) +{ + int i; + + for (i = 0; i < stripe->nr_sectors; i++) { + if (scrub_stripe_get_page(stripe, i) == first_bvec->bv_page && + scrub_stripe_get_page_offset(stripe, i) == first_bvec->bv_offset) + break; + } + ASSERT(i < stripe->nr_sectors); + return i; +} + +/* + * Repair read is different to the regular read: + * + * - Only reads the failed sectors + * - May have extra blocksize limits + */ +static void scrub_repair_read_endio(struct btrfs_bio *bbio) +{ + struct scrub_stripe *stripe = bbio->private; + struct btrfs_fs_info *fs_info = stripe->bg->fs_info; + struct bio_vec *bvec; + int sector_nr = calc_sector_number(stripe, bio_first_bvec_all(&bbio->bio)); + u32 bio_size = 0; + int i; + + ASSERT(sector_nr < stripe->nr_sectors); + + bio_for_each_bvec_all(bvec, &bbio->bio, i) + bio_size += bvec->bv_len; + + if (bbio->bio.bi_status) { + bitmap_set(&stripe->io_error_bitmap, sector_nr, + bio_size >> fs_info->sectorsize_bits); + bitmap_set(&stripe->error_bitmap, sector_nr, + bio_size >> fs_info->sectorsize_bits); + } else { + bitmap_clear(&stripe->io_error_bitmap, sector_nr, + bio_size >> fs_info->sectorsize_bits); + } + bio_put(&bbio->bio); + if (atomic_dec_and_test(&stripe->pending_io)) + wake_up(&stripe->io_wait); +} + +static int calc_next_mirror(int mirror, int num_copies) +{ + ASSERT(mirror <= num_copies); + return (mirror + 1 > num_copies) ? 1 : mirror + 1; +} + +static void scrub_stripe_submit_repair_read(struct scrub_stripe *stripe, + int mirror, int blocksize, bool wait) +{ + struct btrfs_fs_info *fs_info = stripe->bg->fs_info; + struct btrfs_bio *bbio = NULL; + const unsigned long old_error_bitmap = stripe->error_bitmap; + int i; + + ASSERT(stripe->mirror_num >= 1); + ASSERT(atomic_read(&stripe->pending_io) == 0); + + for_each_set_bit(i, &old_error_bitmap, stripe->nr_sectors) { + struct page *page; + int pgoff; + int ret; + + page = scrub_stripe_get_page(stripe, i); + pgoff = scrub_stripe_get_page_offset(stripe, i); + + /* The current sector cannot be merged, submit the bio. */ + if (bbio && ((i > 0 && !test_bit(i - 1, &stripe->error_bitmap)) || + bbio->bio.bi_iter.bi_size >= blocksize)) { + ASSERT(bbio->bio.bi_iter.bi_size); + atomic_inc(&stripe->pending_io); + btrfs_submit_bio(bbio, mirror); + if (wait) + wait_scrub_stripe_io(stripe); + bbio = NULL; + } + + if (!bbio) { + bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ, + fs_info, scrub_repair_read_endio, stripe); + bbio->bio.bi_iter.bi_sector = (stripe->logical + + (i << fs_info->sectorsize_bits)) >> SECTOR_SHIFT; + } + + ret = bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); + ASSERT(ret == fs_info->sectorsize); + } + if (bbio) { + ASSERT(bbio->bio.bi_iter.bi_size); + atomic_inc(&stripe->pending_io); + btrfs_submit_bio(bbio, mirror); + if (wait) + wait_scrub_stripe_io(stripe); + } +} + +/* + * The main entrance for all read related scrub work, including: + * + * - Wait for the initial read to finish + * - Verify and locate any bad sectors + * - Go through the remaining mirrors and try to read as large blocksize as + * possible + * - Go through all mirrors (including the failed mirror) sector-by-sector + * + * Writeback does not happen here, it needs extra synchronization. + */ +static void scrub_stripe_read_repair_worker(struct work_struct *work) +{ + struct scrub_stripe *stripe = container_of(work, struct scrub_stripe, work); + struct btrfs_fs_info *fs_info = stripe->bg->fs_info; + int num_copies = btrfs_num_copies(fs_info, stripe->bg->start, + stripe->bg->length); + int mirror; + int i; + + ASSERT(stripe->mirror_num > 0); + + wait_scrub_stripe_io(stripe); + scrub_verify_one_stripe(stripe, stripe->extent_sector_bitmap); + /* Save the initial failed bitmap for later repair and report usage. */ + stripe->init_error_bitmap = stripe->error_bitmap; + + if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors)) + goto out; + + /* + * Try all remaining mirrors. + * + * Here we still try to read as large block as possible, as this is + * faster and we have extra safety nets to rely on. + */ + for (mirror = calc_next_mirror(stripe->mirror_num, num_copies); + mirror != stripe->mirror_num; + mirror = calc_next_mirror(mirror, num_copies)) { + const unsigned long old_error_bitmap = stripe->error_bitmap; + + scrub_stripe_submit_repair_read(stripe, mirror, + BTRFS_STRIPE_LEN, false); + wait_scrub_stripe_io(stripe); + scrub_verify_one_stripe(stripe, old_error_bitmap); + if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) + goto out; + } + + /* + * Last safety net, try re-checking all mirrors, including the failed + * one, sector-by-sector. + * + * As if one sector failed the drive's internal csum, the whole read + * containing the offending sector would be marked as error. + * Thus here we do sector-by-sector read. + * + * This can be slow, thus we only try it as the last resort. + */ + + for (i = 0, mirror = stripe->mirror_num; + i < num_copies; + i++, mirror = calc_next_mirror(mirror, num_copies)) { + const unsigned long old_error_bitmap = stripe->error_bitmap; + + scrub_stripe_submit_repair_read(stripe, mirror, + fs_info->sectorsize, true); + wait_scrub_stripe_io(stripe); + scrub_verify_one_stripe(stripe, old_error_bitmap); + if (bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) + goto out; + } +out: + set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state); + wake_up(&stripe->repair_wait); +} + +void scrub_read_endio(struct btrfs_bio *bbio) +{ + struct scrub_stripe *stripe = bbio->private; + + if (bbio->bio.bi_status) { + bitmap_set(&stripe->io_error_bitmap, 0, stripe->nr_sectors); + bitmap_set(&stripe->error_bitmap, 0, stripe->nr_sectors); + } else { + bitmap_clear(&stripe->io_error_bitmap, 0, stripe->nr_sectors); + } + bio_put(&bbio->bio); + if (atomic_dec_and_test(&stripe->pending_io)) { + wake_up(&stripe->io_wait); + INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker); + queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work); + } +} + static int scrub_checksum_tree_block(struct scrub_block *sblock) { struct scrub_ctx *sctx = sblock->sctx; diff --git a/fs/btrfs/scrub.h b/fs/btrfs/scrub.h index 45ff7e1498069..bcc9d398fe07f 100644 --- a/fs/btrfs/scrub.h +++ b/fs/btrfs/scrub.h @@ -19,11 +19,10 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, */ struct scrub_stripe; int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe); -void wait_scrub_stripe_io(struct scrub_stripe *stripe); int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, struct btrfs_device *dev, u64 physical, int mirror_num, u64 logical_start, u32 logical_len, struct scrub_stripe *stripe); -void scrub_verify_one_stripe(struct scrub_stripe *stripe, unsigned long bitmap); +void scrub_read_endio(struct btrfs_bio *bbio); #endif