Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 176095
b: refs/heads/master
c: 1e50915
h: refs/heads/master
i:
  176093: b88f125
  176091: 8378346
  176087: 61b8685
  176079: 8161e60
  176063: 08cf4db
v: v3
  • Loading branch information
Robert Becker authored and NeilBrown committed Dec 14, 2009
1 parent 475c947 commit d6087fd
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 1 deletion.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 67b8dc4b06b0e97df55fd76e209f34f9a52e820e
refs/heads/master: 1e50915fe0bbf7a46db0fa7e1e604d3fc95f057d
34 changes: 34 additions & 0 deletions trunk/drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);

#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }

/*
* Default number of read corrections we'll attempt on an rdev
* before ejecting it from the array. We divide the read error
* count by 2 for every hour elapsed between read errors.
*/
#define MD_DEFAULT_MAX_CORRECTED_READ_ERRORS 20
/*
* Current RAID-1,4,5 parallel reconstruction 'guaranteed speed limit'
* is 1000 KB/sec, so the extra system load does not show up that much.
Expand Down Expand Up @@ -2653,6 +2659,8 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
rdev->flags = 0;
rdev->data_offset = 0;
rdev->sb_events = 0;
rdev->last_read_error.tv_sec = 0;
rdev->last_read_error.tv_nsec = 0;
atomic_set(&rdev->nr_pending, 0);
atomic_set(&rdev->read_errors, 0);
atomic_set(&rdev->corrected_errors, 0);
Expand Down Expand Up @@ -3289,6 +3297,29 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len)
static struct md_sysfs_entry md_array_state =
__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);

static ssize_t
max_corrected_read_errors_show(mddev_t *mddev, char *page) {
return sprintf(page, "%d\n",
atomic_read(&mddev->max_corr_read_errors));
}

static ssize_t
max_corrected_read_errors_store(mddev_t *mddev, const char *buf, size_t len)
{
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);

if (*buf && (*e == 0 || *e == '\n')) {
atomic_set(&mddev->max_corr_read_errors, n);
return len;
}
return -EINVAL;
}

static struct md_sysfs_entry max_corr_read_errors =
__ATTR(max_read_errors, S_IRUGO|S_IWUSR, max_corrected_read_errors_show,
max_corrected_read_errors_store);

static ssize_t
null_show(mddev_t *mddev, char *page)
{
Expand Down Expand Up @@ -3914,6 +3945,7 @@ static struct attribute *md_default_attrs[] = {
&md_array_state.attr,
&md_reshape_position.attr,
&md_array_size.attr,
&max_corr_read_errors.attr,
NULL,
};

Expand Down Expand Up @@ -4333,6 +4365,8 @@ static int do_md_run(mddev_t * mddev)
mddev->ro = 0;

atomic_set(&mddev->writes_pending,0);
atomic_set(&mddev->max_corr_read_errors,
MD_DEFAULT_MAX_CORRECTED_READ_ERRORS);
mddev->safemode = 0;
mddev->safemode_timer.function = md_safemode_timeout;
mddev->safemode_timer.data = (unsigned long) mddev;
Expand Down
4 changes: 4 additions & 0 deletions trunk/drivers/md/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ struct mdk_rdev_s
atomic_t read_errors; /* number of consecutive read errors that
* we have tried to ignore.
*/
struct timespec last_read_error; /* monotonic time since our
* last read error
*/
atomic_t corrected_errors; /* number of corrected read errors,
* for reporting to userspace and storing
* in superblock.
Expand Down Expand Up @@ -299,6 +302,7 @@ struct mddev_s
int external;
} bitmap_info;

atomic_t max_corr_read_errors; /* max read retries */
struct list_head all_mddevs;

/* Generic barrier handling.
Expand Down
74 changes: 74 additions & 0 deletions trunk/drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -1431,6 +1431,43 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
}


/*
* Used by fix_read_error() to decay the per rdev read_errors.
* We halve the read error count for every hour that has elapsed
* since the last recorded read error.
*
*/
static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev)
{
struct timespec cur_time_mon;
unsigned long hours_since_last;
unsigned int read_errors = atomic_read(&rdev->read_errors);

ktime_get_ts(&cur_time_mon);

if (rdev->last_read_error.tv_sec == 0 &&
rdev->last_read_error.tv_nsec == 0) {
/* first time we've seen a read error */
rdev->last_read_error = cur_time_mon;
return;
}

hours_since_last = (cur_time_mon.tv_sec -
rdev->last_read_error.tv_sec) / 3600;

rdev->last_read_error = cur_time_mon;

/*
* if hours_since_last is > the number of bits in read_errors
* just set read errors to 0. We do this to avoid
* overflowing the shift of read_errors by hours_since_last.
*/
if (hours_since_last >= 8 * sizeof(read_errors))
atomic_set(&rdev->read_errors, 0);
else
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
}

/*
* This is a kernel thread which:
*
Expand All @@ -1444,6 +1481,43 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
int sect = 0; /* Offset from r10_bio->sector */
int sectors = r10_bio->sectors;
mdk_rdev_t*rdev;
int max_read_errors = atomic_read(&mddev->max_corr_read_errors);

rcu_read_lock();
{
int d = r10_bio->devs[r10_bio->read_slot].devnum;
char b[BDEVNAME_SIZE];
int cur_read_error_count = 0;

rdev = rcu_dereference(conf->mirrors[d].rdev);
bdevname(rdev->bdev, b);

if (test_bit(Faulty, &rdev->flags)) {
rcu_read_unlock();
/* drive has already been failed, just ignore any
more fix_read_error() attempts */
return;
}

check_decay_read_errors(mddev, rdev);
atomic_inc(&rdev->read_errors);
cur_read_error_count = atomic_read(&rdev->read_errors);
if (cur_read_error_count > max_read_errors) {
rcu_read_unlock();
printk(KERN_NOTICE
"raid10: %s: Raid device exceeded "
"read_error threshold "
"[cur %d:max %d]\n",
b, cur_read_error_count, max_read_errors);
printk(KERN_NOTICE
"raid10: %s: Failing raid "
"device\n", b);
md_error(mddev, conf->mirrors[d].rdev);
return;
}
}
rcu_read_unlock();

while(sectors) {
int s = sectors;
int sl = r10_bio->read_slot;
Expand Down

0 comments on commit d6087fd

Please sign in to comment.