Skip to content

Commit

Permalink
Merge tag 'md/3.13' of git://neil.brown.name/md
Browse files Browse the repository at this point in the history
Pull md update from Neil Brown:
 "Mostly optimisations and obscure bug fixes.
   - raid5 gets less lock contention
   - raid1 gets less contention between normal-io and resync-io during
     resync"

* tag 'md/3.13' of git://neil.brown.name/md:
  md/raid5: Use conf->device_lock protect changing of multi-thread resources.
  md/raid5: Before freeing old multi-thread worker, it should flush them.
  md/raid5: For stripe with R5_ReadNoMerge, we replace REQ_FLUSH with REQ_NOMERGE.
  UAPI: include <asm/byteorder.h> in linux/raid/md_p.h
  raid1: Rewrite the implementation of iobarrier.
  raid1: Add some macros to make code clearly.
  raid1: Replace raise_barrier/lower_barrier with freeze_array/unfreeze_array when reconfiguring the array.
  raid1: Add a field array_frozen to indicate whether raid in freeze state.
  md: Convert use of typedef ctl_table to struct ctl_table
  md/raid5: avoid deadlock when raid5 array has unack badblocks during md_stop_writes.
  md: use MD_RECOVERY_INTR instead of kthread_should_stop in resync thread.
  md: fix some places where mddev_lock return value is not checked.
  raid5: Retry R5_ReadNoMerge flag when hit a read error.
  raid5: relieve lock contention in get_active_stripe()
  raid5: relieve lock contention in get_active_stripe()
  wait: add wait_event_cmd()
  md/raid5.c: add proper locking to error path of raid5_start_reshape.
  md: fix calculation of stacking limits on level change.
  raid5: Use slow_path to release stripe when mddev->thread is null
  • Loading branch information
Linus Torvalds committed Nov 20, 2013
2 parents b4789b8 + 60aaf93 commit 6d6e352
Show file tree
Hide file tree
Showing 8 changed files with 592 additions and 186 deletions.
133 changes: 80 additions & 53 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ static inline int speed_max(struct mddev *mddev)

static struct ctl_table_header *raid_table_header;

static ctl_table raid_table[] = {
static struct ctl_table raid_table[] = {
{
.procname = "speed_limit_min",
.data = &sysctl_speed_limit_min,
Expand All @@ -130,7 +130,7 @@ static ctl_table raid_table[] = {
{ }
};

static ctl_table raid_dir_table[] = {
static struct ctl_table raid_dir_table[] = {
{
.procname = "raid",
.maxlen = 0,
Expand All @@ -140,7 +140,7 @@ static ctl_table raid_dir_table[] = {
{ }
};

static ctl_table raid_root_table[] = {
static struct ctl_table raid_root_table[] = {
{
.procname = "dev",
.maxlen = 0,
Expand Down Expand Up @@ -562,11 +562,19 @@ static struct mddev * mddev_find(dev_t unit)
goto retry;
}

static inline int mddev_lock(struct mddev * mddev)
static inline int __must_check mddev_lock(struct mddev * mddev)
{
return mutex_lock_interruptible(&mddev->reconfig_mutex);
}

/* Sometimes we need to take the lock in a situation where
* failure due to interrupts is not acceptable.
*/
static inline void mddev_lock_nointr(struct mddev * mddev)
{
mutex_lock(&mddev->reconfig_mutex);
}

static inline int mddev_is_locked(struct mddev *mddev)
{
return mutex_is_locked(&mddev->reconfig_mutex);
Expand Down Expand Up @@ -2978,7 +2986,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
for_each_mddev(mddev, tmp) {
struct md_rdev *rdev2;

mddev_lock(mddev);
mddev_lock_nointr(mddev);
rdev_for_each(rdev2, mddev)
if (rdev->bdev == rdev2->bdev &&
rdev != rdev2 &&
Expand All @@ -2994,7 +3002,7 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
break;
}
}
mddev_lock(my_mddev);
mddev_lock_nointr(my_mddev);
if (overlap) {
/* Someone else could have slipped in a size
* change here, but doing so is just silly.
Expand Down Expand Up @@ -3580,6 +3588,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->in_sync = 1;
del_timer_sync(&mddev->safemode_timer);
}
blk_set_stacking_limits(&mddev->queue->limits);
pers->run(mddev);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
mddev_resume(mddev);
Expand Down Expand Up @@ -5258,7 +5267,7 @@ static void __md_stop_writes(struct mddev *mddev)

void md_stop_writes(struct mddev *mddev)
{
mddev_lock(mddev);
mddev_lock_nointr(mddev);
__md_stop_writes(mddev);
mddev_unlock(mddev);
}
Expand Down Expand Up @@ -5291,20 +5300,35 @@ EXPORT_SYMBOL_GPL(md_stop);
static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
{
int err = 0;
int did_freeze = 0;

if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
/* Thread might be blocked waiting for metadata update
* which will now never happen */
wake_up_process(mddev->sync_thread->tsk);
}
mddev_unlock(mddev);
wait_event(resync_wait, mddev->sync_thread == NULL);
mddev_lock_nointr(mddev);

mutex_lock(&mddev->open_mutex);
if (atomic_read(&mddev->openers) > !!bdev) {
if (atomic_read(&mddev->openers) > !!bdev ||
mddev->sync_thread ||
(bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
printk("md: %s still in use.\n",mdname(mddev));
if (did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
err = -EBUSY;
goto out;
}
if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
/* Someone opened the device since we flushed it
* so page cache could be dirty and it is too late
* to flush. So abort
*/
mutex_unlock(&mddev->open_mutex);
return -EBUSY;
}
if (mddev->pers) {
__md_stop_writes(mddev);

Expand All @@ -5315,7 +5339,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
set_disk_ro(mddev->gendisk, 1);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
sysfs_notify_dirent_safe(mddev->sysfs_state);
err = 0;
err = 0;
}
out:
mutex_unlock(&mddev->open_mutex);
Expand All @@ -5331,20 +5355,34 @@ static int do_md_stop(struct mddev * mddev, int mode,
{
struct gendisk *disk = mddev->gendisk;
struct md_rdev *rdev;
int did_freeze = 0;

if (!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery)) {
did_freeze = 1;
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
/* Thread might be blocked waiting for metadata update
* which will now never happen */
wake_up_process(mddev->sync_thread->tsk);
}
mddev_unlock(mddev);
wait_event(resync_wait, mddev->sync_thread == NULL);
mddev_lock_nointr(mddev);

mutex_lock(&mddev->open_mutex);
if (atomic_read(&mddev->openers) > !!bdev ||
mddev->sysfs_active) {
mddev->sysfs_active ||
mddev->sync_thread ||
(bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) {
printk("md: %s still in use.\n",mdname(mddev));
mutex_unlock(&mddev->open_mutex);
return -EBUSY;
}
if (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags)) {
/* Someone opened the device since we flushed it
* so page cache could be dirty and it is too late
* to flush. So abort
*/
mutex_unlock(&mddev->open_mutex);
if (did_freeze) {
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
return -EBUSY;
}
if (mddev->pers) {
Expand Down Expand Up @@ -6551,7 +6589,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
wait_event(mddev->sb_wait,
!test_bit(MD_CHANGE_DEVS, &mddev->flags) &&
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
mddev_lock(mddev);
mddev_lock_nointr(mddev);
}
} else {
err = -EROFS;
Expand Down Expand Up @@ -7361,9 +7399,6 @@ void md_do_sync(struct md_thread *thread)
mddev->curr_resync = 2;

try_again:
if (kthread_should_stop())
set_bit(MD_RECOVERY_INTR, &mddev->recovery);

if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
goto skip;
for_each_mddev(mddev2, tmp) {
Expand All @@ -7388,7 +7423,7 @@ void md_do_sync(struct md_thread *thread)
* be caught by 'softlockup'
*/
prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
if (!kthread_should_stop() &&
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
mddev2->curr_resync >= mddev->curr_resync) {
printk(KERN_INFO "md: delaying %s of %s"
" until %s has finished (they"
Expand Down Expand Up @@ -7464,7 +7499,7 @@ void md_do_sync(struct md_thread *thread)
last_check = 0;

if (j>2) {
printk(KERN_INFO
printk(KERN_INFO
"md: resuming %s of %s from checkpoint.\n",
desc, mdname(mddev));
mddev->curr_resync = j;
Expand Down Expand Up @@ -7501,25 +7536,27 @@ void md_do_sync(struct md_thread *thread)
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
}

while (j >= mddev->resync_max && !kthread_should_stop()) {
while (j >= mddev->resync_max &&
!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
/* As this condition is controlled by user-space,
* we can block indefinitely, so use '_interruptible'
* to avoid triggering warnings.
*/
flush_signals(current); /* just in case */
wait_event_interruptible(mddev->recovery_wait,
mddev->resync_max > j
|| kthread_should_stop());
|| test_bit(MD_RECOVERY_INTR,
&mddev->recovery));
}

if (kthread_should_stop())
goto interrupted;
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
break;

sectors = mddev->pers->sync_request(mddev, j, &skipped,
currspeed < speed_min(mddev));
if (sectors == 0) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
goto out;
break;
}

if (!skipped) { /* actual IO requested */
Expand Down Expand Up @@ -7556,10 +7593,8 @@ void md_do_sync(struct md_thread *thread)
last_mark = next;
}


if (kthread_should_stop())
goto interrupted;

if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
break;

/*
* this loop exits only if either when we are slower than
Expand All @@ -7582,11 +7617,12 @@ void md_do_sync(struct md_thread *thread)
}
}
}
printk(KERN_INFO "md: %s: %s done.\n",mdname(mddev), desc);
printk(KERN_INFO "md: %s: %s %s.\n",mdname(mddev), desc,
test_bit(MD_RECOVERY_INTR, &mddev->recovery)
? "interrupted" : "done");
/*
* this also signals 'finished resyncing' to md_stop
*/
out:
blk_finish_plug(&plug);
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));

Expand Down Expand Up @@ -7640,16 +7676,6 @@ void md_do_sync(struct md_thread *thread)
set_bit(MD_RECOVERY_DONE, &mddev->recovery);
md_wakeup_thread(mddev->thread);
return;

interrupted:
/*
* got a signal, exit.
*/
printk(KERN_INFO
"md: md_do_sync() got signal ... exiting\n");
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
goto out;

}
EXPORT_SYMBOL_GPL(md_do_sync);

Expand Down Expand Up @@ -7894,6 +7920,7 @@ void md_reap_sync_thread(struct mddev *mddev)

/* resync has finished, collect result */
md_unregister_thread(&mddev->sync_thread);
wake_up(&resync_wait);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
/* success...*/
Expand Down
Loading

0 comments on commit 6d6e352

Please sign in to comment.