Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://neil.brown.name/md
Browse files Browse the repository at this point in the history
* 'for-linus' of git://neil.brown.name/md: (52 commits)
  md: Protect access to mddev->disks list using RCU
  md: only count actual openers as access which prevent a 'stop'
  md: linear: Make array_size sector-based and rename it to array_sectors.
  md: Make mddev->array_size sector-based.
  md: Make super_type->rdev_size_change() take sector-based sizes.
  md: Fix check for overlapping devices.
  md: Tidy up rdev_size_store a bit:
  md: Remove some unused macros.
  md: Turn rdev->sb_offset into a sector-based quantity.
  md: Make calc_dev_sboffset() return a sector count.
  md: Replace calc_dev_size() by calc_num_sectors().
  md: Make update_size() take the number of sectors.
  md: Better control of when do_md_stop is allowed to stop the array.
  md: get_disk_info(): Don't convert between signed and unsigned and back.
  md: Simplify restart_array().
  md: alloc_disk_sb(): Return proper error value.
  md: Simplify sb_equal().
  md: Simplify uuid_equal().
  md: sb_equal(): Fix misleading printk.
  md: Fix a typo in the comment to cmd_match().
  ...
  • Loading branch information
Linus Torvalds committed Jul 21, 2008
2 parents 519f014 + 4b80991 commit 8a39262
Show file tree
Hide file tree
Showing 16 changed files with 842 additions and 790 deletions.
30 changes: 29 additions & 1 deletion Documentation/md.txt
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,11 @@ All md devices contain:
writing the word for the desired state, however some states
cannot be explicitly set, and some transitions are not allowed.

Select/poll works on this file. All changes except between
active_idle and active (which can be frequent and are not
very interesting) are notified. active->active_idle is
reported if the metadata is externally managed.

clear
No devices, no size, no level
Writing is equivalent to STOP_ARRAY ioctl
Expand Down Expand Up @@ -292,6 +297,10 @@ Each directory contains:
writemostly - device will only be subject to read
requests if there are no other options.
This applies only to raid1 arrays.
blocked - device has failed, metadata is "external",
and the failure hasn't been acknowledged yet.
Writes that would write to this device if
it were not faulty are blocked.
spare - device is working, but not a full member.
This includes spares that are in the process
of being recovered to
Expand All @@ -301,6 +310,12 @@ Each directory contains:
Writing "remove" removes the device from the array.
Writing "writemostly" sets the writemostly flag.
Writing "-writemostly" clears the writemostly flag.
Writing "blocked" sets the "blocked" flag.
Writing "-blocked" clear the "blocked" flag and allows writes
to complete.

This file responds to select/poll. Any change to 'faulty'
or 'blocked' causes an event.

errors
An approximate count of read errors that have been detected on
Expand Down Expand Up @@ -332,7 +347,7 @@ Each directory contains:
for storage of data. This will normally be the same as the
component_size. This can be written while assembling an
array. If a value less than the current component_size is
written, component_size will be reduced to this value.
written, it will be rejected.


An active md device will also contain and entry for each active device
Expand Down Expand Up @@ -381,6 +396,19 @@ also have
'check' and 'repair' will start the appropriate process
providing the current state is 'idle'.

This file responds to select/poll. Any important change in the value
triggers a poll event. Sometimes the value will briefly be
"recover" if a recovery seems to be needed, but cannot be
achieved. In that case, the transition to "recover" isn't
notified, but the transition away is.

degraded
This contains a count of the number of devices by which the
arrays is degraded. So an optimal array with show '0'. A
single failed/missing drive will show '1', etc.
This file responds to select/poll, any increase or decrease
in the count of missing devices will trigger an event.

mismatch_count
When performing 'check' and 'repair', and possibly when
performing 'resync', md will count the number of errors that are
Expand Down
54 changes: 39 additions & 15 deletions drivers/md/bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde
|| test_bit(Faulty, &rdev->flags))
continue;

target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512);
target = rdev->sb_start + offset + index * (PAGE_SIZE/512);

if (sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)) {
page->index = index;
Expand All @@ -241,10 +241,10 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde
static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
{
mdk_rdev_t *rdev;
struct list_head *tmp;
mddev_t *mddev = bitmap->mddev;

rdev_for_each(rdev, tmp, mddev)
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev)
if (test_bit(In_sync, &rdev->flags)
&& !test_bit(Faulty, &rdev->flags)) {
int size = PAGE_SIZE;
Expand All @@ -260,32 +260,37 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait)
+ (long)(page->index * (PAGE_SIZE/512))
+ size/512 > 0)
/* bitmap runs in to metadata */
return -EINVAL;
goto bad_alignment;
if (rdev->data_offset + mddev->size*2
> rdev->sb_offset*2 + bitmap->offset)
> rdev->sb_start + bitmap->offset)
/* data runs in to bitmap */
return -EINVAL;
} else if (rdev->sb_offset*2 < rdev->data_offset) {
goto bad_alignment;
} else if (rdev->sb_start < rdev->data_offset) {
/* METADATA BITMAP DATA */
if (rdev->sb_offset*2
if (rdev->sb_start
+ bitmap->offset
+ page->index*(PAGE_SIZE/512) + size/512
> rdev->data_offset)
/* bitmap runs in to data */
return -EINVAL;
goto bad_alignment;
} else {
/* DATA METADATA BITMAP - no problems */
}
md_super_write(mddev, rdev,
(rdev->sb_offset<<1) + bitmap->offset
rdev->sb_start + bitmap->offset
+ page->index * (PAGE_SIZE/512),
size,
page);
}
rcu_read_unlock();

if (wait)
md_super_wait(mddev);
return 0;

bad_alignment:
rcu_read_unlock();
return -EINVAL;
}

static void bitmap_file_kick(struct bitmap *bitmap);
Expand Down Expand Up @@ -454,8 +459,11 @@ void bitmap_update_sb(struct bitmap *bitmap)
spin_unlock_irqrestore(&bitmap->lock, flags);
sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
sb->events = cpu_to_le64(bitmap->mddev->events);
if (!bitmap->mddev->degraded)
sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
if (bitmap->mddev->events < bitmap->events_cleared) {
/* rocking back to read-only */
bitmap->events_cleared = bitmap->mddev->events;
sb->events_cleared = cpu_to_le64(bitmap->events_cleared);
}
kunmap_atomic(sb, KM_USER0);
write_page(bitmap, bitmap->sb_page, 1);
}
Expand Down Expand Up @@ -1085,9 +1093,19 @@ void bitmap_daemon_work(struct bitmap *bitmap)
} else
spin_unlock_irqrestore(&bitmap->lock, flags);
lastpage = page;
/*
printk("bitmap clean at page %lu\n", j);
*/

/* We are possibly going to clear some bits, so make
* sure that events_cleared is up-to-date.
*/
if (bitmap->need_sync) {
bitmap_super_t *sb;
bitmap->need_sync = 0;
sb = kmap_atomic(bitmap->sb_page, KM_USER0);
sb->events_cleared =
cpu_to_le64(bitmap->events_cleared);
kunmap_atomic(sb, KM_USER0);
write_page(bitmap, bitmap->sb_page, 1);
}
spin_lock_irqsave(&bitmap->lock, flags);
clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN);
}
Expand Down Expand Up @@ -1257,6 +1275,12 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
return;
}

if (success &&
bitmap->events_cleared < bitmap->mddev->events) {
bitmap->events_cleared = bitmap->mddev->events;
bitmap->need_sync = 1;
}

if (!success && ! (*bmc & NEEDED_MASK))
*bmc |= NEEDED_MASK;

Expand Down
2 changes: 1 addition & 1 deletion drivers/md/faulty.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ static int run(mddev_t *mddev)
rdev_for_each(rdev, tmp, mddev)
conf->rdev = rdev;

mddev->array_size = mddev->size;
mddev->array_sectors = mddev->size * 2;
mddev->private = conf;

reconfig(mddev, mddev->layout, -1);
Expand Down
20 changes: 10 additions & 10 deletions drivers/md/linear.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,13 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
return NULL;

cnt = 0;
conf->array_size = 0;
conf->array_sectors = 0;

rdev_for_each(rdev, tmp, mddev) {
int j = rdev->raid_disk;
dev_info_t *disk = conf->disks + j;

if (j < 0 || j > raid_disks || disk->rdev) {
if (j < 0 || j >= raid_disks || disk->rdev) {
printk("linear: disk numbering problem. Aborting!\n");
goto out;
}
Expand All @@ -146,7 +146,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);

disk->size = rdev->size;
conf->array_size += rdev->size;
conf->array_sectors += rdev->size * 2;

cnt++;
}
Expand All @@ -155,7 +155,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
goto out;
}

min_spacing = conf->array_size;
min_spacing = conf->array_sectors / 2;
sector_div(min_spacing, PAGE_SIZE/sizeof(struct dev_info *));

/* min_spacing is the minimum spacing that will fit the hash
Expand All @@ -164,7 +164,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
* that is larger than min_spacing as use the size of that as
* the actual spacing
*/
conf->hash_spacing = conf->array_size;
conf->hash_spacing = conf->array_sectors / 2;
for (i=0; i < cnt-1 ; i++) {
sector_t sz = 0;
int j;
Expand Down Expand Up @@ -194,7 +194,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
unsigned round;
unsigned long base;

sz = conf->array_size >> conf->preshift;
sz = conf->array_sectors >> (conf->preshift + 1);
sz += 1; /* force round-up */
base = conf->hash_spacing >> conf->preshift;
round = sector_div(sz, base);
Expand All @@ -221,7 +221,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
curr_offset = 0;
i = 0;
for (curr_offset = 0;
curr_offset < conf->array_size;
curr_offset < conf->array_sectors / 2;
curr_offset += conf->hash_spacing) {

while (i < raid_disks-1 &&
Expand Down Expand Up @@ -258,7 +258,7 @@ static int linear_run (mddev_t *mddev)
if (!conf)
return 1;
mddev->private = conf;
mddev->array_size = conf->array_size;
mddev->array_sectors = conf->array_sectors;

blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
mddev->queue->unplug_fn = linear_unplug;
Expand Down Expand Up @@ -292,8 +292,8 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
newconf->prev = mddev_to_conf(mddev);
mddev->private = newconf;
mddev->raid_disks++;
mddev->array_size = newconf->array_size;
set_capacity(mddev->gendisk, mddev->array_size << 1);
mddev->array_sectors = newconf->array_sectors;
set_capacity(mddev->gendisk, mddev->array_sectors);
return 0;
}

Expand Down
Loading

0 comments on commit 8a39262

Please sign in to comment.