Skip to content

Commit

Permalink
[PATCH] md: improve locking on 'safemode' and move superblock writes
Browse files Browse the repository at this point in the history
When md marks the superblock dirty before a write, it calls
generic_make_request (to write the superblock) from within
generic_make_request (to write the first dirty block), which could cause
problems later.

With this patch, the superblock write is always done by the helper thread, and
write request are delayed until that write completes.

Also, the locking around marking the array dirty and writing the superblock is
improved to avoid possible races.

Signed-off-by: Neil Brown <neilb@cse.unsw.edu.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
NeilBrown authored and Linus Torvalds committed Jun 22, 2005
1 parent fca4d84 commit 06d91a5
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 21 deletions.
73 changes: 59 additions & 14 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,8 @@ static mddev_t * mddev_find(dev_t unit)
INIT_LIST_HEAD(&new->all_mddevs);
init_timer(&new->safemode_timer);
atomic_set(&new->active, 1);
bio_list_init(&new->write_list);
spin_lock_init(&new->write_lock);

new->queue = blk_alloc_queue(GFP_KERNEL);
if (!new->queue) {
Expand Down Expand Up @@ -1251,9 +1253,11 @@ static void md_update_sb(mddev_t * mddev)
int err, count = 100;
struct list_head *tmp;
mdk_rdev_t *rdev;
int sync_req;

mddev->sb_dirty = 0;
repeat:
spin_lock(&mddev->write_lock);
sync_req = mddev->in_sync;
mddev->utime = get_seconds();
mddev->events ++;

Expand All @@ -1272,8 +1276,12 @@ static void md_update_sb(mddev_t * mddev)
* do not write anything to disk if using
* nonpersistent superblocks
*/
if (!mddev->persistent)
if (!mddev->persistent) {
mddev->sb_dirty = 0;
spin_unlock(&mddev->write_lock);
return;
}
spin_unlock(&mddev->write_lock);

dprintk(KERN_INFO
"md: updating %s RAID superblock on device (in sync %d)\n",
Expand Down Expand Up @@ -1304,6 +1312,15 @@ static void md_update_sb(mddev_t * mddev)
printk(KERN_ERR \
"md: excessive errors occurred during superblock update, exiting\n");
}
spin_lock(&mddev->write_lock);
if (mddev->in_sync != sync_req) {
/* have to write it out again */
spin_unlock(&mddev->write_lock);
goto repeat;
}
mddev->sb_dirty = 0;
spin_unlock(&mddev->write_lock);

}

/*
Expand Down Expand Up @@ -3178,19 +3195,31 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
}


void md_write_start(mddev_t *mddev)
/* md_write_start(mddev, bi)
* If we need to update some array metadata (e.g. 'active' flag
* in superblock) before writing, queue bi for later writing
* and return 0, else return 1 and it will be written now
*/
int md_write_start(mddev_t *mddev, struct bio *bi)
{
if (!atomic_read(&mddev->writes_pending)) {
mddev_lock_uninterruptible(mddev);
if (mddev->in_sync) {
mddev->in_sync = 0;
del_timer(&mddev->safemode_timer);
md_update_sb(mddev);
}
atomic_inc(&mddev->writes_pending);
mddev_unlock(mddev);
} else
atomic_inc(&mddev->writes_pending);
if (bio_data_dir(bi) != WRITE)
return 1;

atomic_inc(&mddev->writes_pending);
spin_lock(&mddev->write_lock);
if (mddev->in_sync == 0 && mddev->sb_dirty == 0) {
spin_unlock(&mddev->write_lock);
return 1;
}
bio_list_add(&mddev->write_list, bi);

if (mddev->in_sync) {
mddev->in_sync = 0;
mddev->sb_dirty = 1;
}
spin_unlock(&mddev->write_lock);
md_wakeup_thread(mddev->thread);
return 0;
}

void md_write_end(mddev_t *mddev)
Expand Down Expand Up @@ -3472,6 +3501,7 @@ void md_check_recovery(mddev_t *mddev)
mddev->sb_dirty ||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
mddev->write_list.head ||
(mddev->safemode == 1) ||
(mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending)
&& !mddev->in_sync && mddev->recovery_cp == MaxSector)
Expand All @@ -3480,17 +3510,32 @@ void md_check_recovery(mddev_t *mddev)

if (mddev_trylock(mddev)==0) {
int spares =0;
struct bio *blist;

spin_lock(&mddev->write_lock);
if (mddev->safemode && !atomic_read(&mddev->writes_pending) &&
!mddev->in_sync && mddev->recovery_cp == MaxSector) {
mddev->in_sync = 1;
mddev->sb_dirty = 1;
}
if (mddev->safemode == 1)
mddev->safemode = 0;
blist = bio_list_get(&mddev->write_list);
spin_unlock(&mddev->write_lock);

if (mddev->sb_dirty)
md_update_sb(mddev);

while (blist) {
struct bio *b = blist;
blist = blist->bi_next;
b->bi_next = NULL;
generic_make_request(b);
/* we already counted this, so need to un-count */
md_write_end(mddev);
}


if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
!test_bit(MD_RECOVERY_DONE, &mddev->recovery)) {
/* resync/recovery still happening */
Expand Down
4 changes: 3 additions & 1 deletion drivers/md/raid1.c
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
* thread has put up a bar for new requests.
* Continue immediately if no resync is active currently.
*/
if (md_write_start(mddev, bio)==0)
return 0;
spin_lock_irq(&conf->resync_lock);
wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
conf->nr_pending++;
Expand Down Expand Up @@ -611,7 +613,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
rcu_read_unlock();

atomic_set(&r1_bio->remaining, 1);
md_write_start(mddev);

for (i = 0; i < disks; i++) {
struct bio *mbio;
if (!r1_bio->bios[i])
Expand Down
5 changes: 4 additions & 1 deletion drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,9 @@ static int make_request(request_queue_t *q, struct bio * bio)
return 0;
}

if (md_write_start(mddev, bio) == 0)
return 0;

/*
* Register the new request and wait if the reconstruction
* thread has put up a bar for new requests.
Expand Down Expand Up @@ -774,7 +777,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
rcu_read_unlock();

atomic_set(&r10_bio->remaining, 1);
md_write_start(mddev);

for (i = 0; i < conf->copies; i++) {
struct bio *mbio;
int d = r10_bio->devs[i].devnum;
Expand Down
6 changes: 4 additions & 2 deletions drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -1411,6 +1411,9 @@ static int make_request (request_queue_t *q, struct bio * bi)
sector_t logical_sector, last_sector;
struct stripe_head *sh;

if (md_write_start(mddev, bi)==0)
return 0;

if (bio_data_dir(bi)==WRITE) {
disk_stat_inc(mddev->gendisk, writes);
disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi));
Expand All @@ -1423,8 +1426,7 @@ static int make_request (request_queue_t *q, struct bio * bi)
last_sector = bi->bi_sector + (bi->bi_size>>9);
bi->bi_next = NULL;
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
if ( bio_data_dir(bi) == WRITE )
md_write_start(mddev);

for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w);

Expand Down
6 changes: 4 additions & 2 deletions drivers/md/raid6main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1570,6 +1570,9 @@ static int make_request (request_queue_t *q, struct bio * bi)
sector_t logical_sector, last_sector;
struct stripe_head *sh;

if (md_write_start(mddev, bi)==0)
return 0;

if (bio_data_dir(bi)==WRITE) {
disk_stat_inc(mddev->gendisk, writes);
disk_stat_add(mddev->gendisk, write_sectors, bio_sectors(bi));
Expand All @@ -1583,8 +1586,7 @@ static int make_request (request_queue_t *q, struct bio * bi)

bi->bi_next = NULL;
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
if ( bio_data_dir(bi) == WRITE )
md_write_start(mddev);

for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w);

Expand Down
2 changes: 1 addition & 1 deletion include/linux/raid/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev),
extern void md_unregister_thread (mdk_thread_t *thread);
extern void md_wakeup_thread(mdk_thread_t *thread);
extern void md_check_recovery(mddev_t *mddev);
extern void md_write_start(mddev_t *mddev);
extern int md_write_start(mddev_t *mddev, struct bio *bi);
extern void md_write_end(mddev_t *mddev);
extern void md_handle_safemode(mddev_t *mddev);
extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
Expand Down
7 changes: 7 additions & 0 deletions include/linux/raid/md_k.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
#ifndef _MD_K_H
#define _MD_K_H

/* and dm-bio-list.h is not under include/linux because.... ??? */
#include "../../../drivers/md/dm-bio-list.h"

#define MD_RESERVED 0UL
#define LINEAR 1UL
#define RAID0 2UL
Expand Down Expand Up @@ -252,6 +255,10 @@ struct mddev_s
atomic_t recovery_active; /* blocks scheduled, but not written */
wait_queue_head_t recovery_wait;
sector_t recovery_cp;

spinlock_t write_lock;
struct bio_list write_list;

unsigned int safemode; /* if set, update "clean" superblock
* when no writes pending.
*/
Expand Down

0 comments on commit 06d91a5

Please sign in to comment.