Skip to content

Commit

Permalink
Merge branch 'for-linus' of git://neil.brown.name/md
Browse files Browse the repository at this point in the history
* 'for-linus' of git://neil.brown.name/md: (75 commits)
  md/raid10: handle further errors during fix_read_error better.
  md/raid10: Handle read errors during recovery better.
  md/raid10: simplify read error handling during recovery.
  md/raid10: record bad blocks due to write errors during resync/recovery.
  md/raid10:  attempt to fix read errors during resync/check
  md/raid10:  Handle write errors by updating badblock log.
  md/raid10: clear bad-block record when write succeeds.
  md/raid10: avoid writing to known bad blocks on known bad drives.
  md/raid10 record bad blocks as needed during recovery.
  md/raid10: avoid reading known bad blocks during resync/recovery.
  md/raid10 - avoid reading from known bad blocks - part 3
  md/raid10: avoid reading from known bad blocks - part 2
  md/raid10: avoid reading from known bad blocks - part 1
  md/raid10: Split handle_read_error out from raid10d.
  md/raid10: simplify/reindent some loops.
  md/raid5: Clear bad blocks on successful write.
  md/raid5.  Don't write to known bad block on doubtful devices.
  md/raid5: write errors should be recorded as bad blocks if possible.
  md/raid5: use bad-block log to improve handling of uncorrectable read errors.
  md/raid5: avoid reading from known bad blocks.
  ...
  • Loading branch information
Linus Torvalds committed Jul 28, 2011
2 parents 6f56c21 + 58c54fc commit 6140333
Show file tree
Hide file tree
Showing 12 changed files with 3,093 additions and 1,379 deletions.
29 changes: 23 additions & 6 deletions Documentation/md.txt
Original file line number Diff line number Diff line change
Expand Up @@ -360,28 +360,32 @@ Each directory contains:
A file recording the current state of the device in the array
which can be a comma separated list of
faulty - device has been kicked from active use due to
a detected fault
a detected fault or it has unacknowledged bad
blocks
in_sync - device is a fully in-sync member of the array
writemostly - device will only be subject to read
requests if there are no other options.
This applies only to raid1 arrays.
blocked - device has failed, metadata is "external",
and the failure hasn't been acknowledged yet.
blocked - device has failed, and the failure hasn't been
acknowledged yet by the metadata handler.
Writes that would write to this device if
it were not faulty are blocked.
spare - device is working, but not a full member.
This includes spares that are in the process
of being recovered to
write_error - device has ever seen a write error.
This list may grow in future.
This can be written to.
Writing "faulty" simulates a failure on the device.
Writing "remove" removes the device from the array.
Writing "writemostly" sets the writemostly flag.
Writing "-writemostly" clears the writemostly flag.
Writing "blocked" sets the "blocked" flag.
Writing "-blocked" clears the "blocked" flag and allows writes
to complete.
Writing "-blocked" clears the "blocked" flags and allows writes
to complete and possibly simulates an error.
Writing "in_sync" sets the in_sync flag.
Writing "write_error" sets writeerrorseen flag.
Writing "-write_error" clears writeerrorseen flag.

This file responds to select/poll. Any change to 'faulty'
or 'blocked' causes an event.
Expand Down Expand Up @@ -419,7 +423,6 @@ Each directory contains:
written, it will be rejected.

recovery_start

When the device is not 'in_sync', this records the number of
sectors from the start of the device which are known to be
correct. This is normally zero, but during a recovery
Expand All @@ -435,6 +438,20 @@ Each directory contains:
Setting this to 'none' is equivalent to setting 'in_sync'.
Setting to any other value also clears the 'in_sync' flag.

bad_blocks
This gives the list of all known bad blocks in the form of
start address and length (in sectors respectively). If output
is too big to fit in a page, it will be truncated. Writing
"sector length" to this file adds new acknowledged (i.e.
recorded to disk safely) bad blocks.

unacknowledged_bad_blocks
This gives the list of known-but-not-yet-saved-to-disk bad
blocks in the same form of 'bad_blocks'. If output is too big
to fit in a page, it will be truncated. Writing to this file
adds bad blocks without acknowledging them. This is largely
for testing.



An active md device will also contain and entry for each active device
Expand Down
137 changes: 46 additions & 91 deletions drivers/md/bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
#include "md.h"
#include "bitmap.h"

#include <linux/dm-dirty-log.h>
/* debug macros */

#define DEBUG 0
Expand Down Expand Up @@ -775,10 +774,8 @@ static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned lon
* 0 or page 1
*/
static inline struct page *filemap_get_page(struct bitmap *bitmap,
unsigned long chunk)
unsigned long chunk)
{
if (bitmap->filemap == NULL)
return NULL;
if (file_page_index(bitmap, chunk) >= bitmap->file_pages)
return NULL;
return bitmap->filemap[file_page_index(bitmap, chunk)
Expand Down Expand Up @@ -878,28 +875,19 @@ enum bitmap_page_attr {
static inline void set_page_attr(struct bitmap *bitmap, struct page *page,
enum bitmap_page_attr attr)
{
if (page)
__set_bit((page->index<<2) + attr, bitmap->filemap_attr);
else
__set_bit(attr, &bitmap->logattrs);
__set_bit((page->index<<2) + attr, bitmap->filemap_attr);
}

static inline void clear_page_attr(struct bitmap *bitmap, struct page *page,
enum bitmap_page_attr attr)
{
if (page)
__clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
else
__clear_bit(attr, &bitmap->logattrs);
__clear_bit((page->index<<2) + attr, bitmap->filemap_attr);
}

static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page,
enum bitmap_page_attr attr)
{
if (page)
return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
else
return test_bit(attr, &bitmap->logattrs);
return test_bit((page->index<<2) + attr, bitmap->filemap_attr);
}

/*
Expand All @@ -912,30 +900,26 @@ static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *p
static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
{
unsigned long bit;
struct page *page = NULL;
struct page *page;
void *kaddr;
unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap);

if (!bitmap->filemap) {
struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log;
if (log)
log->type->mark_region(log, chunk);
} else {
if (!bitmap->filemap)
return;

page = filemap_get_page(bitmap, chunk);
if (!page)
return;
bit = file_page_offset(bitmap, chunk);
page = filemap_get_page(bitmap, chunk);
if (!page)
return;
bit = file_page_offset(bitmap, chunk);

/* set the bit */
kaddr = kmap_atomic(page, KM_USER0);
if (bitmap->flags & BITMAP_HOSTENDIAN)
set_bit(bit, kaddr);
else
__test_and_set_bit_le(bit, kaddr);
kunmap_atomic(kaddr, KM_USER0);
PRINTK("set file bit %lu page %lu\n", bit, page->index);
}
/* set the bit */
kaddr = kmap_atomic(page, KM_USER0);
if (bitmap->flags & BITMAP_HOSTENDIAN)
set_bit(bit, kaddr);
else
__set_bit_le(bit, kaddr);
kunmap_atomic(kaddr, KM_USER0);
PRINTK("set file bit %lu page %lu\n", bit, page->index);
/* record page number so it gets flushed to disk when unplug occurs */
set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY);
}
Expand All @@ -952,16 +936,6 @@ void bitmap_unplug(struct bitmap *bitmap)

if (!bitmap)
return;
if (!bitmap->filemap) {
/* Must be using a dirty_log */
struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log;
dirty = test_and_clear_bit(BITMAP_PAGE_DIRTY, &bitmap->logattrs);
need_write = test_and_clear_bit(BITMAP_PAGE_NEEDWRITE, &bitmap->logattrs);
if (dirty || need_write)
if (log->type->flush(log))
bitmap->flags |= BITMAP_WRITE_ERROR;
goto out;
}

/* look at each page to see if there are any set bits that need to be
* flushed out to disk */
Expand Down Expand Up @@ -990,7 +964,6 @@ void bitmap_unplug(struct bitmap *bitmap)
else
md_super_wait(bitmap->mddev);
}
out:
if (bitmap->flags & BITMAP_WRITE_ERROR)
bitmap_file_kick(bitmap);
}
Expand Down Expand Up @@ -1199,7 +1172,6 @@ void bitmap_daemon_work(mddev_t *mddev)
struct page *page = NULL, *lastpage = NULL;
sector_t blocks;
void *paddr;
struct dm_dirty_log *log = mddev->bitmap_info.log;

/* Use a mutex to guard daemon_work against
* bitmap_destroy.
Expand All @@ -1224,12 +1196,11 @@ void bitmap_daemon_work(mddev_t *mddev)
spin_lock_irqsave(&bitmap->lock, flags);
for (j = 0; j < bitmap->chunks; j++) {
bitmap_counter_t *bmc;
if (!bitmap->filemap) {
if (!log)
/* error or shutdown */
break;
} else
page = filemap_get_page(bitmap, j);
if (!bitmap->filemap)
/* error or shutdown */
break;

page = filemap_get_page(bitmap, j);

if (page != lastpage) {
/* skip this page unless it's marked as needing cleaning */
Expand Down Expand Up @@ -1298,34 +1269,29 @@ void bitmap_daemon_work(mddev_t *mddev)
-1);

/* clear the bit */
if (page) {
paddr = kmap_atomic(page, KM_USER0);
if (bitmap->flags & BITMAP_HOSTENDIAN)
clear_bit(file_page_offset(bitmap, j),
paddr);
else
__test_and_clear_bit_le(file_page_offset(bitmap, j),
paddr);
kunmap_atomic(paddr, KM_USER0);
} else
log->type->clear_region(log, j);
paddr = kmap_atomic(page, KM_USER0);
if (bitmap->flags & BITMAP_HOSTENDIAN)
clear_bit(file_page_offset(bitmap, j),
paddr);
else
__clear_bit_le(
file_page_offset(bitmap,
j),
paddr);
kunmap_atomic(paddr, KM_USER0);
}
} else
j |= PAGE_COUNTER_MASK;
}
spin_unlock_irqrestore(&bitmap->lock, flags);

/* now sync the final page */
if (lastpage != NULL || log != NULL) {
if (lastpage != NULL) {
spin_lock_irqsave(&bitmap->lock, flags);
if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) {
clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
spin_unlock_irqrestore(&bitmap->lock, flags);
if (lastpage)
write_page(bitmap, lastpage, 0);
else
if (log->type->flush(log))
bitmap->flags |= BITMAP_WRITE_ERROR;
write_page(bitmap, lastpage, 0);
} else {
set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE);
spin_unlock_irqrestore(&bitmap->lock, flags);
Expand Down Expand Up @@ -1767,12 +1733,10 @@ int bitmap_create(mddev_t *mddev)
BUILD_BUG_ON(sizeof(bitmap_super_t) != 256);

if (!file
&& !mddev->bitmap_info.offset
&& !mddev->bitmap_info.log) /* bitmap disabled, nothing to do */
&& !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */
return 0;

BUG_ON(file && mddev->bitmap_info.offset);
BUG_ON(mddev->bitmap_info.offset && mddev->bitmap_info.log);

bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
if (!bitmap)
Expand Down Expand Up @@ -1863,6 +1827,7 @@ int bitmap_create(mddev_t *mddev)
int bitmap_load(mddev_t *mddev)
{
int err = 0;
sector_t start = 0;
sector_t sector = 0;
struct bitmap *bitmap = mddev->bitmap;

Expand All @@ -1881,24 +1846,14 @@ int bitmap_load(mddev_t *mddev)
}
bitmap_close_sync(bitmap);

if (mddev->bitmap_info.log) {
unsigned long i;
struct dm_dirty_log *log = mddev->bitmap_info.log;
for (i = 0; i < bitmap->chunks; i++)
if (!log->type->in_sync(log, i, 1))
bitmap_set_memory_bits(bitmap,
(sector_t)i << CHUNK_BLOCK_SHIFT(bitmap),
1);
} else {
sector_t start = 0;
if (mddev->degraded == 0
|| bitmap->events_cleared == mddev->events)
/* no need to keep dirty bits to optimise a
* re-add of a missing device */
start = mddev->recovery_cp;

err = bitmap_init_from_disk(bitmap, start);
}
if (mddev->degraded == 0
|| bitmap->events_cleared == mddev->events)
/* no need to keep dirty bits to optimise a
* re-add of a missing device */
start = mddev->recovery_cp;

err = bitmap_init_from_disk(bitmap, start);

if (err)
goto out;

Expand Down
5 changes: 0 additions & 5 deletions drivers/md/bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,10 +212,6 @@ struct bitmap {
unsigned long file_pages; /* number of pages in the file */
int last_page_size; /* bytes in the last page */

unsigned long logattrs; /* used when filemap_attr doesn't exist
* because we are working with a dirty_log
*/

unsigned long flags;

int allclean;
Expand All @@ -237,7 +233,6 @@ struct bitmap {
wait_queue_head_t behind_wait;

struct sysfs_dirent *sysfs_can_clear;

};

/* the bitmap API */
Expand Down
Loading

0 comments on commit 6140333

Please sign in to comment.