Skip to content

Commit

Permalink
Merge branch 'md-6.14-bitmap' into md-6.14
Browse files Browse the repository at this point in the history
Move bitmap_{start, end}write calls to md layer. These changes help
address hangs in bitmap_startwrite([1],[2]).

[1] https://lore.kernel.org/all/CAJpMwyjmHQLvm6zg1cmQErttNNQPDAAXPKM3xgTjMhbfts986Q@mail.gmail.com/
[2] https://lore.kernel.org/all/ADF7D720-5764-4AF3-B68E-1845988737AA@flyingcircus.io/

* md-6.14-bitmap:
  md/md-bitmap: move bitmap_{start, end}write to md upper layer
  md/raid5: implement pers->bitmap_sector()
  md: add a new callback pers->bitmap_sector()
  md/md-bitmap: remove the last parameter for bimtap_ops->endwrite()
  md/md-bitmap: factor behind write counters out from bitmap_{start/end}write()
  • Loading branch information
Song Liu committed Jan 13, 2025
2 parents 4fa9161 + cd5fc65 commit c9b39e5
Show file tree
Hide file tree
Showing 11 changed files with 149 additions and 147 deletions.
74 changes: 45 additions & 29 deletions drivers/md/md-bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1671,24 +1671,13 @@ __acquires(bitmap->lock)
}

static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
unsigned long sectors, bool behind)
unsigned long sectors)
{
struct bitmap *bitmap = mddev->bitmap;

if (!bitmap)
return 0;

if (behind) {
int bw;
atomic_inc(&bitmap->behind_writes);
bw = atomic_read(&bitmap->behind_writes);
if (bw > bitmap->behind_writes_used)
bitmap->behind_writes_used = bw;

pr_debug("inc write-behind count %d/%lu\n",
bw, bitmap->mddev->bitmap_info.max_write_behind);
}

while (sectors) {
sector_t blocks;
bitmap_counter_t *bmc;
Expand Down Expand Up @@ -1737,21 +1726,13 @@ static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
}

static void bitmap_endwrite(struct mddev *mddev, sector_t offset,
unsigned long sectors, bool success, bool behind)
unsigned long sectors)
{
struct bitmap *bitmap = mddev->bitmap;

if (!bitmap)
return;

if (behind) {
if (atomic_dec_and_test(&bitmap->behind_writes))
wake_up(&bitmap->behind_wait);
pr_debug("dec write-behind count %d/%lu\n",
atomic_read(&bitmap->behind_writes),
bitmap->mddev->bitmap_info.max_write_behind);
}

while (sectors) {
sector_t blocks;
unsigned long flags;
Expand All @@ -1764,15 +1745,16 @@ static void bitmap_endwrite(struct mddev *mddev, sector_t offset,
return;
}

if (success && !bitmap->mddev->degraded &&
bitmap->events_cleared < bitmap->mddev->events) {
bitmap->events_cleared = bitmap->mddev->events;
bitmap->need_sync = 1;
sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
}

if (!success && !NEEDED(*bmc))
if (!bitmap->mddev->degraded) {
if (bitmap->events_cleared < bitmap->mddev->events) {
bitmap->events_cleared = bitmap->mddev->events;
bitmap->need_sync = 1;
sysfs_notify_dirent_safe(
bitmap->sysfs_can_clear);
}
} else if (!NEEDED(*bmc)) {
*bmc |= NEEDED_MASK;
}

if (COUNTER(*bmc) == COUNTER_MAX)
wake_up(&bitmap->overflow_wait);
Expand Down Expand Up @@ -2062,6 +2044,37 @@ static void md_bitmap_free(void *data)
kfree(bitmap);
}

static void bitmap_start_behind_write(struct mddev *mddev)
{
struct bitmap *bitmap = mddev->bitmap;
int bw;

if (!bitmap)
return;

atomic_inc(&bitmap->behind_writes);
bw = atomic_read(&bitmap->behind_writes);
if (bw > bitmap->behind_writes_used)
bitmap->behind_writes_used = bw;

pr_debug("inc write-behind count %d/%lu\n",
bw, bitmap->mddev->bitmap_info.max_write_behind);
}

static void bitmap_end_behind_write(struct mddev *mddev)
{
struct bitmap *bitmap = mddev->bitmap;

if (!bitmap)
return;

if (atomic_dec_and_test(&bitmap->behind_writes))
wake_up(&bitmap->behind_wait);
pr_debug("dec write-behind count %d/%lu\n",
atomic_read(&bitmap->behind_writes),
bitmap->mddev->bitmap_info.max_write_behind);
}

static void bitmap_wait_behind_writes(struct mddev *mddev)
{
struct bitmap *bitmap = mddev->bitmap;
Expand Down Expand Up @@ -2981,6 +2994,9 @@ static struct bitmap_operations bitmap_ops = {
.dirty_bits = bitmap_dirty_bits,
.unplug = bitmap_unplug,
.daemon_work = bitmap_daemon_work,

.start_behind_write = bitmap_start_behind_write,
.end_behind_write = bitmap_end_behind_write,
.wait_behind_writes = bitmap_wait_behind_writes,

.startwrite = bitmap_startwrite,
Expand Down
7 changes: 5 additions & 2 deletions drivers/md/md-bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,15 @@ struct bitmap_operations {
unsigned long e);
void (*unplug)(struct mddev *mddev, bool sync);
void (*daemon_work)(struct mddev *mddev);

void (*start_behind_write)(struct mddev *mddev);
void (*end_behind_write)(struct mddev *mddev);
void (*wait_behind_writes)(struct mddev *mddev);

int (*startwrite)(struct mddev *mddev, sector_t offset,
unsigned long sectors, bool behind);
unsigned long sectors);
void (*endwrite)(struct mddev *mddev, sector_t offset,
unsigned long sectors, bool success, bool behind);
unsigned long sectors);
bool (*start_sync)(struct mddev *mddev, sector_t offset,
sector_t *blocks, bool degraded);
void (*end_sync)(struct mddev *mddev, sector_t offset, sector_t *blocks);
Expand Down
29 changes: 29 additions & 0 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -8745,12 +8745,32 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
}
EXPORT_SYMBOL_GPL(md_submit_discard_bio);

static void md_bitmap_start(struct mddev *mddev,
struct md_io_clone *md_io_clone)
{
if (mddev->pers->bitmap_sector)
mddev->pers->bitmap_sector(mddev, &md_io_clone->offset,
&md_io_clone->sectors);

mddev->bitmap_ops->startwrite(mddev, md_io_clone->offset,
md_io_clone->sectors);
}

static void md_bitmap_end(struct mddev *mddev, struct md_io_clone *md_io_clone)
{
mddev->bitmap_ops->endwrite(mddev, md_io_clone->offset,
md_io_clone->sectors);
}

static void md_end_clone_io(struct bio *bio)
{
struct md_io_clone *md_io_clone = bio->bi_private;
struct bio *orig_bio = md_io_clone->orig_bio;
struct mddev *mddev = md_io_clone->mddev;

if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
md_bitmap_end(mddev, md_io_clone);

if (bio->bi_status && !orig_bio->bi_status)
orig_bio->bi_status = bio->bi_status;

Expand All @@ -8775,6 +8795,12 @@ static void md_clone_bio(struct mddev *mddev, struct bio **bio)
if (blk_queue_io_stat(bdev->bd_disk->queue))
md_io_clone->start_time = bio_start_io_acct(*bio);

if (bio_data_dir(*bio) == WRITE && mddev->bitmap) {
md_io_clone->offset = (*bio)->bi_iter.bi_sector;
md_io_clone->sectors = bio_sectors(*bio);
md_bitmap_start(mddev, md_io_clone);
}

clone->bi_end_io = md_end_clone_io;
clone->bi_private = md_io_clone;
*bio = clone;
Expand All @@ -8793,6 +8819,9 @@ void md_free_cloned_bio(struct bio *bio)
struct bio *orig_bio = md_io_clone->orig_bio;
struct mddev *mddev = md_io_clone->mddev;

if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
md_bitmap_end(mddev, md_io_clone);

if (bio->bi_status && !orig_bio->bi_status)
orig_bio->bi_status = bio->bi_status;

Expand Down
5 changes: 5 additions & 0 deletions drivers/md/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,9 @@ struct md_personality
void *(*takeover) (struct mddev *mddev);
/* Changes the consistency policy of an active array. */
int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
/* convert io ranges from array to bitmap */
void (*bitmap_sector)(struct mddev *mddev, sector_t *offset,
unsigned long *sectors);
};

struct md_sysfs_entry {
Expand Down Expand Up @@ -828,6 +831,8 @@ struct md_io_clone {
struct mddev *mddev;
struct bio *orig_bio;
unsigned long start_time;
sector_t offset;
unsigned long sectors;
struct bio bio_clone;
};

Expand Down
34 changes: 6 additions & 28 deletions drivers/md/raid1.c
Original file line number Diff line number Diff line change
Expand Up @@ -420,10 +420,8 @@ static void close_write(struct r1bio *r1_bio)
r1_bio->behind_master_bio = NULL;
}

/* clear the bitmap if all writes complete successfully */
mddev->bitmap_ops->endwrite(mddev, r1_bio->sector, r1_bio->sectors,
!test_bit(R1BIO_Degraded, &r1_bio->state),
test_bit(R1BIO_BehindIO, &r1_bio->state));
if (test_bit(R1BIO_BehindIO, &r1_bio->state))
mddev->bitmap_ops->end_behind_write(mddev);
md_write_end(mddev);
}

Expand Down Expand Up @@ -480,8 +478,6 @@ static void raid1_end_write_request(struct bio *bio)
if (!test_bit(Faulty, &rdev->flags))
set_bit(R1BIO_WriteError, &r1_bio->state);
else {
/* Fail the request */
set_bit(R1BIO_Degraded, &r1_bio->state);
/* Finished with this branch */
r1_bio->bios[mirror] = NULL;
to_put = bio;
Expand Down Expand Up @@ -1535,11 +1531,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
write_behind = true;

r1_bio->bios[i] = NULL;
if (!rdev || test_bit(Faulty, &rdev->flags)) {
if (i < conf->raid_disks)
set_bit(R1BIO_Degraded, &r1_bio->state);
if (!rdev || test_bit(Faulty, &rdev->flags))
continue;
}

atomic_inc(&rdev->nr_pending);
if (test_bit(WriteErrorSeen, &rdev->flags)) {
Expand All @@ -1558,16 +1551,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
*/
max_sectors = bad_sectors;
rdev_dec_pending(rdev, mddev);
/* We don't set R1BIO_Degraded as that
* only applies if the disk is
* missing, so it might be re-added,
* and we want to know to recover this
* chunk.
* In this case the device is here,
* and the fact that this chunk is not
* in-sync is recorded in the bad
* block log
*/
continue;
}
if (is_bad) {
Expand Down Expand Up @@ -1645,9 +1628,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
stats.behind_writes < max_write_behind)
alloc_behind_master_bio(r1_bio, bio);

mddev->bitmap_ops->startwrite(
mddev, r1_bio->sector, r1_bio->sectors,
test_bit(R1BIO_BehindIO, &r1_bio->state));
if (test_bit(R1BIO_BehindIO, &r1_bio->state))
mddev->bitmap_ops->start_behind_write(mddev);
first_clone = 0;
}

Expand Down Expand Up @@ -2614,12 +2596,10 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
* errors.
*/
fail = true;
if (!narrow_write_error(r1_bio, m)) {
if (!narrow_write_error(r1_bio, m))
md_error(conf->mddev,
conf->mirrors[m].rdev);
/* an I/O failed, we can't clear the bitmap */
set_bit(R1BIO_Degraded, &r1_bio->state);
}
rdev_dec_pending(conf->mirrors[m].rdev,
conf->mddev);
}
Expand Down Expand Up @@ -2710,8 +2690,6 @@ static void raid1d(struct md_thread *thread)
list_del(&r1_bio->retry_list);
idx = sector_to_idx(r1_bio->sector);
atomic_dec(&conf->nr_queued[idx]);
if (mddev->degraded)
set_bit(R1BIO_Degraded, &r1_bio->state);
if (test_bit(R1BIO_WriteError, &r1_bio->state))
close_write(r1_bio);
raid_end_bio_io(r1_bio);
Expand Down
1 change: 0 additions & 1 deletion drivers/md/raid1.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ struct r1bio {
enum r1bio_state {
R1BIO_Uptodate,
R1BIO_IsSync,
R1BIO_Degraded,
R1BIO_BehindIO,
/* Set ReadError on bios that experience a readerror so that
* raid1d knows what to do with them.
Expand Down
26 changes: 2 additions & 24 deletions drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -428,10 +428,6 @@ static void close_write(struct r10bio *r10_bio)
{
struct mddev *mddev = r10_bio->mddev;

/* clear the bitmap if all writes complete successfully */
mddev->bitmap_ops->endwrite(mddev, r10_bio->sector, r10_bio->sectors,
!test_bit(R10BIO_Degraded, &r10_bio->state),
false);
md_write_end(mddev);
}

Expand Down Expand Up @@ -501,7 +497,6 @@ static void raid10_end_write_request(struct bio *bio)
set_bit(R10BIO_WriteError, &r10_bio->state);
else {
/* Fail the request */
set_bit(R10BIO_Degraded, &r10_bio->state);
r10_bio->devs[slot].bio = NULL;
to_put = bio;
dec_rdev = 1;
Expand Down Expand Up @@ -1438,10 +1433,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
r10_bio->devs[i].bio = NULL;
r10_bio->devs[i].repl_bio = NULL;

if (!rdev && !rrdev) {
set_bit(R10BIO_Degraded, &r10_bio->state);
if (!rdev && !rrdev)
continue;
}
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
sector_t first_bad;
sector_t dev_sector = r10_bio->devs[i].addr;
Expand All @@ -1458,14 +1451,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
* to other devices yet
*/
max_sectors = bad_sectors;
/* We don't set R10BIO_Degraded as that
* only applies if the disk is missing,
* so it might be re-added, and we want to
* know to recover this chunk.
* In this case the device is here, and the
* fact that this chunk is not in-sync is
* recorded in the bad block log.
*/
continue;
}
if (is_bad) {
Expand Down Expand Up @@ -1519,8 +1504,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
md_account_bio(mddev, &bio);
r10_bio->master_bio = bio;
atomic_set(&r10_bio->remaining, 1);
mddev->bitmap_ops->startwrite(mddev, r10_bio->sector, r10_bio->sectors,
false);

for (i = 0; i < conf->copies; i++) {
if (r10_bio->devs[i].bio)
Expand Down Expand Up @@ -2966,11 +2949,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev_dec_pending(rdev, conf->mddev);
} else if (bio != NULL && bio->bi_status) {
fail = true;
if (!narrow_write_error(r10_bio, m)) {
if (!narrow_write_error(r10_bio, m))
md_error(conf->mddev, rdev);
set_bit(R10BIO_Degraded,
&r10_bio->state);
}
rdev_dec_pending(rdev, conf->mddev);
}
bio = r10_bio->devs[m].repl_bio;
Expand Down Expand Up @@ -3029,8 +3009,6 @@ static void raid10d(struct md_thread *thread)
r10_bio = list_first_entry(&tmp, struct r10bio,
retry_list);
list_del(&r10_bio->retry_list);
if (mddev->degraded)
set_bit(R10BIO_Degraded, &r10_bio->state);

if (test_bit(R10BIO_WriteError,
&r10_bio->state))
Expand Down
1 change: 0 additions & 1 deletion drivers/md/raid10.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ enum r10bio_state {
R10BIO_IsSync,
R10BIO_IsRecover,
R10BIO_IsReshape,
R10BIO_Degraded,
/* Set ReadError on bios that experience a read error
* so that raid10d knows what to do with them.
*/
Expand Down
Loading

0 comments on commit c9b39e5

Please sign in to comment.