Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 104250
b: refs/heads/master
c: ecc65c9
h: refs/heads/master
v: v3
  • Loading branch information
Dan Williams authored and Neil Brown committed Jun 27, 2008
1 parent 677d48c commit 7434c95
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 96 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: f0e43bcdebf709d747a3effb210aff1941e819ab
refs/heads/master: ecc65c9b3f9b9d740a5deade3d85b39be56401b6
172 changes: 83 additions & 89 deletions trunk/drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,11 @@ static void ops_complete_compute5(void *stripe_head_ref)
set_bit(R5_UPTODATE, &tgt->flags);
BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags));
clear_bit(R5_Wantcompute, &tgt->flags);
set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
clear_bit(STRIPE_COMPUTE_RUN, &sh->state);
if (sh->check_state == check_state_compute_run)
sh->check_state = check_state_compute_result;
else
set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
}
Expand Down Expand Up @@ -838,7 +842,7 @@ static void ops_complete_check(void *stripe_head_ref)
pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);

set_bit(STRIPE_OP_CHECK, &sh->ops.complete);
sh->check_state = check_state_check_result;
set_bit(STRIPE_HANDLE, &sh->state);
release_stripe(sh);
}
Expand Down Expand Up @@ -870,7 +874,8 @@ static void ops_run_check(struct stripe_head *sh)
ops_complete_check, sh);
}

static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
static void raid5_run_ops(struct stripe_head *sh, unsigned long pending,
unsigned long ops_request)
{
int overlap_clear = 0, i, disks = sh->disks;
struct dma_async_tx_descriptor *tx = NULL;
Expand All @@ -880,7 +885,8 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
overlap_clear++;
}

if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending))
if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending) ||
test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request))
tx = ops_run_compute5(sh, pending);

if (test_bit(STRIPE_OP_PREXOR, &pending))
Expand All @@ -894,7 +900,7 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
if (test_bit(STRIPE_OP_POSTXOR, &pending))
ops_run_postxor(sh, tx, pending);

if (test_bit(STRIPE_OP_CHECK, &pending))
if (test_bit(STRIPE_OP_CHECK, &ops_request))
ops_run_check(sh);

if (overlap_clear)
Expand Down Expand Up @@ -1961,8 +1967,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
/* don't schedule compute operations or reads on the parity block while
* a check is in flight
*/
if ((disk_idx == sh->pd_idx) &&
test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
if (disk_idx == sh->pd_idx && sh->check_state)
return ~0;

/* is the data in this block needed, and can we get it? */
Expand All @@ -1983,9 +1988,8 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
* 3/ We hold off parity block re-reads until check operations
* have quiesced.
*/
if ((s->uptodate == disks - 1) &&
(s->failed && disk_idx == s->failed_num) &&
!test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
if ((s->uptodate == disks - 1) && !sh->check_state &&
(s->failed && disk_idx == s->failed_num)) {
set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
set_bit(R5_Wantcompute, &dev->flags);
sh->ops.target = disk_idx;
Expand Down Expand Up @@ -2021,12 +2025,8 @@ static void handle_issuing_new_read_requests5(struct stripe_head *sh,
{
int i;

/* Clear completed compute operations. Parity recovery
* (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled
* later on in this routine
*/
if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
!test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
/* Clear completed compute operations */
if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete)) {
clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
Expand Down Expand Up @@ -2350,90 +2350,85 @@ static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
struct stripe_head_state *s, int disks)
{
int canceled_check = 0;
struct r5dev *dev = NULL;

set_bit(STRIPE_HANDLE, &sh->state);

/* complete a check operation */
if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
switch (sh->check_state) {
case check_state_idle:
/* start a new check operation if there are no failures */
if (s->failed == 0) {
if (sh->ops.zero_sum_result == 0)
/* parity is correct (on disc,
* not in buffer any more)
*/
set_bit(STRIPE_INSYNC, &sh->state);
else {
conf->mddev->resync_mismatches +=
STRIPE_SECTORS;
if (test_bit(
MD_RECOVERY_CHECK, &conf->mddev->recovery))
/* don't try to repair!! */
set_bit(STRIPE_INSYNC, &sh->state);
else {
set_bit(STRIPE_OP_COMPUTE_BLK,
&sh->ops.pending);
set_bit(STRIPE_OP_MOD_REPAIR_PD,
&sh->ops.pending);
set_bit(R5_Wantcompute,
&sh->dev[sh->pd_idx].flags);
sh->ops.target = sh->pd_idx;
sh->ops.count++;
s->uptodate++;
}
}
} else
canceled_check = 1; /* STRIPE_INSYNC is not set */
}

/* start a new check operation if there are no failures, the stripe is
* not insync, and a repair is not in flight
*/
if (s->failed == 0 &&
!test_bit(STRIPE_INSYNC, &sh->state) &&
!test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
BUG_ON(s->uptodate != disks);
sh->check_state = check_state_run;
set_bit(STRIPE_OP_CHECK, &s->ops_request);
clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
sh->ops.count++;
s->uptodate--;
break;
}
}

/* check if we can clear a parity disk reconstruct */
if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {

clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
}

dev = &sh->dev[s->failed_num];
/* fall through */
case check_state_compute_result:
sh->check_state = check_state_idle;
if (!dev)
dev = &sh->dev[sh->pd_idx];

/* check that a write has not made the stripe insync */
if (test_bit(STRIPE_INSYNC, &sh->state))
break;

/* Wait for check parity and compute block operations to complete
* before write-back. If a failure occurred while the check operation
* was in flight we need to cycle this stripe through handle_stripe
* since the parity block may not be uptodate
*/
if (!canceled_check && !test_bit(STRIPE_INSYNC, &sh->state) &&
!test_bit(STRIPE_OP_CHECK, &sh->ops.pending) &&
!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
struct r5dev *dev;
/* either failed parity check, or recovery is happening */
if (s->failed == 0)
s->failed_num = sh->pd_idx;
dev = &sh->dev[s->failed_num];
BUG_ON(!test_bit(R5_UPTODATE, &dev->flags));
BUG_ON(s->uptodate != disks);

set_bit(R5_LOCKED, &dev->flags);
s->locked++;
set_bit(R5_Wantwrite, &dev->flags);

clear_bit(STRIPE_DEGRADED, &sh->state);
s->locked++;
set_bit(STRIPE_INSYNC, &sh->state);
break;
case check_state_run:
break; /* we will be called again upon completion */
case check_state_check_result:
sh->check_state = check_state_idle;

/* if a failure occurred during the check operation, leave
* STRIPE_INSYNC not set and let the stripe be handled again
*/
if (s->failed)
break;

/* handle a successful check operation, if parity is correct
* we are done. Otherwise update the mismatch count and repair
* parity if !MD_RECOVERY_CHECK
*/
if (sh->ops.zero_sum_result == 0)
/* parity is correct (on disc,
* not in buffer any more)
*/
set_bit(STRIPE_INSYNC, &sh->state);
else {
conf->mddev->resync_mismatches += STRIPE_SECTORS;
if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
/* don't try to repair!! */
set_bit(STRIPE_INSYNC, &sh->state);
else {
sh->check_state = check_state_compute_run;
set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request);
set_bit(R5_Wantcompute,
&sh->dev[sh->pd_idx].flags);
sh->ops.target = sh->pd_idx;
s->uptodate++;
}
}
break;
case check_state_compute_run:
break;
default:
printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n",
__func__, sh->check_state,
(unsigned long long) sh->sector);
BUG();
}
}

Expand Down Expand Up @@ -2807,19 +2802,18 @@ static void handle_stripe5(struct stripe_head *sh)
* block.
*/
if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) &&
!test_bit(STRIPE_OP_CHECK, &sh->ops.pending))
!sh->check_state)
handle_issuing_new_write_requests5(conf, sh, &s, disks);

/* maybe we need to check and possibly fix the parity for this stripe
* Any reads will already have been scheduled, so we just see if enough
* data is available. The parity check is held off while parity
* dependent operations are in flight.
*/
if ((s.syncing && s.locked == 0 &&
if (sh->check_state ||
(s.syncing && s.locked == 0 &&
!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) &&
!test_bit(STRIPE_INSYNC, &sh->state)) ||
test_bit(STRIPE_OP_CHECK, &sh->ops.pending) ||
test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending))
!test_bit(STRIPE_INSYNC, &sh->state)))
handle_parity_checks5(conf, sh, &s, disks);

if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
Expand Down Expand Up @@ -2897,8 +2891,8 @@ static void handle_stripe5(struct stripe_head *sh)
if (unlikely(blocked_rdev))
md_wait_for_blocked_rdev(blocked_rdev, conf->mddev);

if (pending)
raid5_run_ops(sh, pending);
if (pending || s.ops_request)
raid5_run_ops(sh, pending, s.ops_request);

ops_run_io(sh, &s);

Expand Down
46 changes: 40 additions & 6 deletions trunk/include/linux/raid/raid5.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,41 @@
* the compute block completes.
*/

/*
* Operations state - intermediate states that are visible outside of sh->lock
* In general _idle indicates nothing is running, _run indicates a data
* processing operation is active, and _result means the data processing result
* is stable and can be acted upon. For simple operations like biofill and
* compute that only have an _idle and _run state they are indicated with
* sh->state flags (STRIPE_BIOFILL_RUN and STRIPE_COMPUTE_RUN)
*/
/**
* enum check_states - handles syncing / repairing a stripe
* @check_state_idle - check operations are quiesced
* @check_state_run - check operation is running
* @check_state_result - set outside lock when check result is valid
* @check_state_compute_run - check failed and we are repairing
* @check_state_compute_result - set outside lock when compute result is valid
*/
enum check_states {
check_state_idle = 0,
check_state_run, /* parity check */
check_state_check_result,
check_state_compute_run, /* parity repair */
check_state_compute_result,
};

/**
* enum reconstruct_states - handles writing or expanding a stripe
*/
enum reconstruct_states {
reconstruct_state_idle = 0,
reconstruct_state_drain_run, /* write */
reconstruct_state_run, /* expand */
reconstruct_state_drain_result,
reconstruct_state_result,
};

struct stripe_head {
struct hlist_node hash;
struct list_head lru; /* inactive_list or handle_list */
Expand All @@ -169,6 +204,7 @@ struct stripe_head {
spinlock_t lock;
int bm_seq; /* sequence number for bitmap flushes */
int disks; /* disks in stripe */
enum check_states check_state;
/* stripe_operations
* @pending - pending ops flags (set for request->issue->complete)
* @ack - submitted ops flags (set for issue->complete)
Expand Down Expand Up @@ -202,6 +238,7 @@ struct stripe_head_state {
int locked, uptodate, to_read, to_write, failed, written;
int to_fill, compute, req_compute, non_overwrite;
int failed_num;
unsigned long ops_request;
};

/* r6_state - extra state data only relevant to r6 */
Expand Down Expand Up @@ -254,8 +291,10 @@ struct r6_state {
#define STRIPE_EXPAND_READY 11
#define STRIPE_IO_STARTED 12 /* do not count towards 'bypass_count' */
#define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */
#define STRIPE_BIOFILL_RUN 14
#define STRIPE_COMPUTE_RUN 15
/*
* Operations flags (in issue order)
* Operation request flags
*/
#define STRIPE_OP_BIOFILL 0
#define STRIPE_OP_COMPUTE_BLK 1
Expand All @@ -264,11 +303,6 @@ struct r6_state {
#define STRIPE_OP_POSTXOR 4
#define STRIPE_OP_CHECK 5

/* modifiers to the base operations
* STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back
*/
#define STRIPE_OP_MOD_REPAIR_PD 7

/*
* Plugging:
*
Expand Down

0 comments on commit 7434c95

Please sign in to comment.