Skip to content

Commit

Permalink
[PATCH] md: count corrected read errors per drive
Browse files Browse the repository at this point in the history
Store this total in superblock (As appropriate), and make it available to
userspace via sysfs.

Signed-off-by: Neil Brown <neilb@suse.de>
Acked-by: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
  • Loading branch information
NeilBrown authored and Linus Torvalds committed Jan 6, 2006
1 parent d9d166c commit 4dbcdc7
Show file tree
Hide file tree
Showing 7 changed files with 57 additions and 4 deletions.
11 changes: 11 additions & 0 deletions Documentation/md.txt
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,17 @@ Each directory contains:
of being recoverred to
This list make grow in future.

errors
An approximate count of read errors that have been detected on
this device but have not caused the device to be evicted from
the array (either because they were corrected or because they
happened while the array was read-only). When using version-1
metadata, this value persists across restarts of the array.

This value can be written while assembling an array thus
providing an ongoing count for arrays with metadata managed by
userspace.


An active md device will also contain and entry for each active device
in the array. These are named
Expand Down
27 changes: 26 additions & 1 deletion drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -1000,6 +1000,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
}
rdev->preferred_minor = 0xffff;
rdev->data_offset = le64_to_cpu(sb->data_offset);
atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));

rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
Expand Down Expand Up @@ -1139,6 +1140,8 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
else
sb->resync_offset = cpu_to_le64(0);

sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors);

if (mddev->bitmap && mddev->bitmap_file == NULL) {
sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
Expand Down Expand Up @@ -1592,9 +1595,30 @@ super_show(mdk_rdev_t *rdev, char *page)
}
static struct rdev_sysfs_entry rdev_super = __ATTR_RO(super);

static ssize_t
errors_show(mdk_rdev_t *rdev, char *page)
{
return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
}

static ssize_t
errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
{
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);
if (*buf && (*e == 0 || *e == '\n')) {
atomic_set(&rdev->corrected_errors, n);
return len;
}
return -EINVAL;
}
static struct rdev_sysfs_entry rdev_errors =
__ATTR(errors, 0644, errors_show, errors_store);

static struct attribute *rdev_default_attrs[] = {
&rdev_state.attr,
&rdev_super.attr,
&rdev_errors.attr,
NULL,
};
static ssize_t
Expand Down Expand Up @@ -1674,6 +1698,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
rdev->data_offset = 0;
atomic_set(&rdev->nr_pending, 0);
atomic_set(&rdev->read_errors, 0);
atomic_set(&rdev->corrected_errors, 0);

size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
if (!size) {
Expand Down Expand Up @@ -4729,7 +4754,7 @@ static int set_ro(const char *val, struct kernel_param *kp)
int num = simple_strtoul(val, &e, 10);
if (*val && (*e == '\0' || *e == '\n')) {
start_readonly = num;
return 0;;
return 0;
}
return -EINVAL;
}
Expand Down
2 changes: 2 additions & 0 deletions drivers/md/raid1.c
Original file line number Diff line number Diff line change
Expand Up @@ -1265,6 +1265,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue;
rdev = conf->mirrors[d].rdev;
atomic_add(s, &rdev->corrected_errors);
if (sync_page_io(rdev->bdev,
sect + rdev->data_offset,
s<<9,
Expand Down Expand Up @@ -1463,6 +1464,7 @@ static void raid1d(mddev_t *mddev)
d = conf->raid_disks;
d--;
rdev = conf->mirrors[d].rdev;
atomic_add(s, &rdev->corrected_errors);
if (rdev &&
test_bit(In_sync, &rdev->flags)) {
if (sync_page_io(rdev->bdev,
Expand Down
11 changes: 8 additions & 3 deletions drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -1122,9 +1122,13 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)

if (test_bit(BIO_UPTODATE, &bio->bi_flags))
set_bit(R10BIO_Uptodate, &r10_bio->state);
else if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
md_error(r10_bio->mddev,
conf->mirrors[d].rdev);
else {
atomic_add(r10_bio->sectors,
&conf->mirrors[d].rdev->corrected_errors);
if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
md_error(r10_bio->mddev,
conf->mirrors[d].rdev);
}

/* for reconstruct, we always reschedule after a read.
* for resync, only after all reads
Expand Down Expand Up @@ -1430,6 +1434,7 @@ static void raid10d(mddev_t *mddev)
sl--;
d = r10_bio->devs[sl].devnum;
rdev = conf->mirrors[d].rdev;
atomic_add(s, &rdev->corrected_errors);
if (rdev &&
test_bit(In_sync, &rdev->flags)) {
if (sync_page_io(rdev->bdev,
Expand Down
3 changes: 3 additions & 0 deletions drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -1400,6 +1400,9 @@ static void handle_stripe(struct stripe_head *sh)
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE;
bi->bi_next = NULL;
if (rw == WRITE &&
test_bit(R5_ReWrite, &sh->dev[i].flags))
atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
generic_make_request(bi);
} else {
if (rw == 1)
Expand Down
3 changes: 3 additions & 0 deletions drivers/md/raid6main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1562,6 +1562,9 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE;
bi->bi_next = NULL;
if (rw == WRITE &&
test_bit(R5_ReWrite, &sh->dev[i].flags))
atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
generic_make_request(bi);
} else {
if (rw == 1)
Expand Down
4 changes: 4 additions & 0 deletions include/linux/raid/md_k.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ struct mdk_rdev_s
atomic_t read_errors; /* number of consecutive read errors that
* we have tried to ignore.
*/
atomic_t corrected_errors; /* number of corrected read errors,
* for reporting to userspace and storing
* in superblock.
*/
};

struct mddev_s
Expand Down

0 comments on commit 4dbcdc7

Please sign in to comment.