Skip to content

Commit

Permalink
md: fix problem when adding device to read-only array with bitmap.
Browse files Browse the repository at this point in the history
If an array is started degraded, and then the missing device
is found it can be re-added and a minimal bitmap-based recovery
will bring it fully up-to-date.

If the array is read-only a recovery would not be allowed.
But also if the array is read-only and the missing device was
present very recently, then there could be no need for any
recovery at all, so we simply include the device in the read-only
array without any recovery.

However... if the missing device was removed a little longer ago
it could be missing some updates, but if a bitmap is present it will
be conditionally accepted pending a bitmap-based update.  We don't
currently detect this case properly and will include that old
device into the read-only array with no recovery even though it really
needs a recovery.

This patch keeps track of whether a bitmap-based-recovery is really
needed or not in the new Bitmap_sync rdev flag.  If that is set,
then the device will not be added to a read-only array.

Cc: Andrei Warkentin <andreiw@vmware.com>
Fixes: d70ed2e
Cc: stable@vger.kernel.org (3.2+)
Signed-off-by: NeilBrown <neilb@suse.de>
  • Loading branch information
NeilBrown committed Jan 14, 2014
1 parent e8b8491 commit 8313b8e
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
18 changes: 15 additions & 3 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -1087,6 +1087,7 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
rdev->raid_disk = -1;
clear_bit(Faulty, &rdev->flags);
clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);

if (mddev->raid_disks == 0) {
Expand Down Expand Up @@ -1165,6 +1166,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
*/
if (ev1 < mddev->bitmap->events_cleared)
return 0;
if (ev1 < mddev->events)
set_bit(Bitmap_sync, &rdev->flags);
} else {
if (ev1 < mddev->events)
/* just a hot-add of a new device, leave raid_disk at -1 */
Expand Down Expand Up @@ -1573,6 +1576,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
rdev->raid_disk = -1;
clear_bit(Faulty, &rdev->flags);
clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
clear_bit(WriteMostly, &rdev->flags);

if (mddev->raid_disks == 0) {
Expand Down Expand Up @@ -1655,6 +1659,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
*/
if (ev1 < mddev->bitmap->events_cleared)
return 0;
if (ev1 < mddev->events)
set_bit(Bitmap_sync, &rdev->flags);
} else {
if (ev1 < mddev->events)
/* just a hot-add of a new device, leave raid_disk at -1 */
Expand Down Expand Up @@ -2798,6 +2804,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
else
rdev->saved_raid_disk = -1;
clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
err = rdev->mddev->pers->
hot_add_disk(rdev->mddev, rdev);
if (err) {
Expand Down Expand Up @@ -5770,6 +5777,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
info->raid_disk < mddev->raid_disks) {
rdev->raid_disk = info->raid_disk;
set_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
} else
rdev->raid_disk = -1;
} else
Expand Down Expand Up @@ -7716,7 +7724,8 @@ static int remove_and_add_spares(struct mddev *mddev,
if (test_bit(Faulty, &rdev->flags))
continue;
if (mddev->ro &&
rdev->saved_raid_disk < 0)
! (rdev->saved_raid_disk >= 0 &&
!test_bit(Bitmap_sync, &rdev->flags)))
continue;

rdev->recovery_offset = 0;
Expand Down Expand Up @@ -7797,9 +7806,12 @@ void md_check_recovery(struct mddev *mddev)
* As we only add devices that are already in-sync,
* we can activate the spares immediately.
*/
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
remove_and_add_spares(mddev, NULL);
mddev->pers->spare_active(mddev);
/* There is no thread, but we need to call
* ->spare_active and clear saved_raid_disk
*/
md_reap_sync_thread(mddev);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
goto unlock;
}

Expand Down
3 changes: 3 additions & 0 deletions drivers/md/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ struct md_rdev {
enum flag_bits {
Faulty, /* device is known to have a fault */
In_sync, /* device is in_sync with rest of array */
Bitmap_sync, /* ..actually, not quite In_sync. Need a
* bitmap-based recovery to get fully in sync
*/
Unmerged, /* device is being added to array and should
* be considerred for bvec_merge_fn but not
* yet for actual IO
Expand Down

0 comments on commit 8313b8e

Please sign in to comment.