From cc13b1d1500656a20e41960668f3392dda9fa6e2 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 5 May 2014 13:34:37 +1000 Subject: [PATCH 1/2] md/raid10: call wait_barrier() for each request submitted. wait_barrier() includes a counter, so we must call it precisely once (unless balanced by allow_barrier()) for each request submitted. Since commit 20d0189b1012a37d2533a87fb451f7852f2418d1 block: Introduce new bio_split() in 3.14-rc1, we don't call it for the extra requests generated when we need to split a bio. When this happens the counter goes negative, any resync/recovery will never start, and "mdadm --stop" will hang. Reported-by: Chris Murphy Fixes: 20d0189b1012a37d2533a87fb451f7852f2418d1 Cc: stable@vger.kernel.org (3.14+) Cc: Kent Overstreet Signed-off-by: NeilBrown --- drivers/md/raid10.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 33fc408e5eac..cb882aae9e20 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1172,6 +1172,13 @@ static void __make_request(struct mddev *mddev, struct bio *bio) int max_sectors; int sectors; + /* + * Register the new request and wait if the reconstruction + * thread has put up a bar for new requests. + * Continue immediately if no resync is active currently. + */ + wait_barrier(conf); + sectors = bio_sectors(bio); while (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && bio->bi_iter.bi_sector < conf->reshape_progress && @@ -1552,12 +1559,6 @@ static void make_request(struct mddev *mddev, struct bio *bio) md_write_start(mddev, bio); - /* - * Register the new request and wait if the reconstruction - * thread has put up a bar for new requests. - * Continue immediately if no resync is active currently. - */ - wait_barrier(conf); do { From 0f62fb220aa4ebabe8547d3a9ce4a16d3c045f21 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 6 May 2014 09:36:08 +1000 Subject: [PATCH 2/2] md: avoid possible spinning md thread at shutdown. If an md array with externally managed metadata (e.g. DDF or IMSM) is in use, then we should not set safemode==2 at shutdown because: 1/ this is ineffective: user-space need to be involved in any 'safemode' handling, 2/ The safemode management code doesn't cope with safemode==2 on external metadata and md_check_recover enters an infinite loop. Even at shutdown, an infinite-looping process can be problematic, so this could cause shutdown to hang. Cc: stable@vger.kernel.org (any kernel) Signed-off-by: NeilBrown --- drivers/md/md.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 8fda38d23e38..237b7e0ddc7a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -8516,7 +8516,8 @@ static int md_notify_reboot(struct notifier_block *this, if (mddev_trylock(mddev)) { if (mddev->pers) __md_stop_writes(mddev); - mddev->safemode = 2; + if (mddev->persistent) + mddev->safemode = 2; mddev_unlock(mddev); } need_delay = 1;