Skip to content

Commit

Permalink
md: Whenassemble the array, consult the superblock of the freshest de…
Browse files Browse the repository at this point in the history
…vice

Upon assembling the array, both kernel and mdadm allow the devices to have event
counter difference of 1, and still consider them as up-to-date.
However, a device whose event count is behind by 1, may in fact not be up-to-date,
and array resync with such a device may cause data corruption.
To avoid this, consult the superblock of the freshest device about the status
of a device, whose event counter is behind by 1.

Signed-off-by: Alex Lyakas <alex.lyakas@zadara.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/1702470271-16073-1-git-send-email-alex.lyakas@zadara.com
  • Loading branch information
Alex Lyakas authored and Song Liu committed Dec 15, 2023
1 parent af140f8 commit dc1cc22
Showing 1 changed file with 44 additions and 10 deletions.
54 changes: 44 additions & 10 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -1206,6 +1206,7 @@ struct super_type {
struct md_rdev *refdev,
int minor_version);
int (*validate_super)(struct mddev *mddev,
struct md_rdev *freshest,
struct md_rdev *rdev);
void (*sync_super)(struct mddev *mddev,
struct md_rdev *rdev);
Expand Down Expand Up @@ -1343,8 +1344,9 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor

/*
* validate_super for 0.90.0
* note: we are not using "freshest" for 0.9 superblock
*/
static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
static int super_90_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
{
mdp_disk_t *desc;
mdp_super_t *sb = page_address(rdev->sb_page);
Expand Down Expand Up @@ -1856,7 +1858,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
return ret;
}

static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struct md_rdev *rdev)
{
struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
__u64 ev1 = le64_to_cpu(sb->events);
Expand Down Expand Up @@ -1952,13 +1954,15 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
}
} else if (mddev->pers == NULL) {
/* Insist of good event counter while assembling, except for
* spares (which don't need an event count) */
++ev1;
* spares (which don't need an event count).
* Similar to mdadm, we allow event counter difference of 1
* from the freshest device.
*/
if (rdev->desc_nr >= 0 &&
rdev->desc_nr < le32_to_cpu(sb->max_dev) &&
(le16_to_cpu(sb->dev_roles[rdev->desc_nr]) < MD_DISK_ROLE_MAX ||
le16_to_cpu(sb->dev_roles[rdev->desc_nr]) == MD_DISK_ROLE_JOURNAL))
if (ev1 < mddev->events)
if (ev1 + 1 < mddev->events)
return -EINVAL;
} else if (mddev->bitmap) {
/* If adding to array with a bitmap, then we can accept an
Expand All @@ -1979,8 +1983,38 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
role = MD_DISK_ROLE_SPARE;
rdev->desc_nr = -1;
} else
} else if (mddev->pers == NULL && freshest && ev1 < mddev->events) {
/*
* If we are assembling, and our event counter is smaller than the
* highest event counter, we cannot trust our superblock about the role.
* It could happen that our rdev was marked as Faulty, and all other
* superblocks were updated with +1 event counter.
* Then, before the next superblock update, which typically happens when
* remove_and_add_spares() removes the device from the array, there was
* a crash or reboot.
* If we allow current rdev without consulting the freshest superblock,
* we could cause data corruption.
* Note that in this case our event counter is smaller by 1 than the
* highest, otherwise, this rdev would not be allowed into array;
* both kernel and mdadm allow event counter difference of 1.
*/
struct mdp_superblock_1 *freshest_sb = page_address(freshest->sb_page);
u32 freshest_max_dev = le32_to_cpu(freshest_sb->max_dev);

if (rdev->desc_nr >= freshest_max_dev) {
/* this is unexpected, better not proceed */
pr_warn("md: %s: rdev[%pg]: desc_nr(%d) >= freshest(%pg)->sb->max_dev(%u)\n",
mdname(mddev), rdev->bdev, rdev->desc_nr,
freshest->bdev, freshest_max_dev);
return -EUCLEAN;
}

role = le16_to_cpu(freshest_sb->dev_roles[rdev->desc_nr]);
pr_debug("md: %s: rdev[%pg]: role=%d(0x%x) according to freshest %pg\n",
mdname(mddev), rdev->bdev, role, role, freshest->bdev);
} else {
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
}
switch(role) {
case MD_DISK_ROLE_SPARE: /* spare */
break;
Expand Down Expand Up @@ -2887,7 +2921,7 @@ static int add_bound_rdev(struct md_rdev *rdev)
* and should be added immediately.
*/
super_types[mddev->major_version].
validate_super(mddev, rdev);
validate_super(mddev, NULL/*freshest*/, rdev);
err = mddev->pers->hot_add_disk(mddev, rdev);
if (err) {
md_kick_rdev_from_array(rdev);
Expand Down Expand Up @@ -3824,7 +3858,7 @@ static int analyze_sbs(struct mddev *mddev)
}

super_types[mddev->major_version].
validate_super(mddev, freshest);
validate_super(mddev, NULL/*freshest*/, freshest);

i = 0;
rdev_for_each_safe(rdev, tmp, mddev) {
Expand All @@ -3839,7 +3873,7 @@ static int analyze_sbs(struct mddev *mddev)
}
if (rdev != freshest) {
if (super_types[mddev->major_version].
validate_super(mddev, rdev)) {
validate_super(mddev, freshest, rdev)) {
pr_warn("md: kicking non-fresh %pg from array!\n",
rdev->bdev);
md_kick_rdev_from_array(rdev);
Expand Down Expand Up @@ -6847,7 +6881,7 @@ int md_add_new_disk(struct mddev *mddev, struct mdu_disk_info_s *info)
rdev->saved_raid_disk = rdev->raid_disk;
} else
super_types[mddev->major_version].
validate_super(mddev, rdev);
validate_super(mddev, NULL/*freshest*/, rdev);
if ((info->state & (1<<MD_DISK_SYNC)) &&
rdev->raid_disk != info->raid_disk) {
/* This was a hot-add request, but events doesn't
Expand Down

0 comments on commit dc1cc22

Please sign in to comment.