Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 334249
b: refs/heads/master
c: 620125f
h: refs/heads/master
i:
  334247: 117333c
v: v3
  • Loading branch information
Shaohua Li authored and NeilBrown committed Oct 11, 2012
1 parent 993de11 commit d743569
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 4 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 582e2e056a5c3410174c23f5134e6b00e0db9101
refs/heads/master: 620125f2bf8ff0c4969b79653b54d7bcc9d40637
168 changes: 165 additions & 3 deletions trunk/drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
rw = WRITE_FUA;
else
rw = WRITE;
if (test_and_clear_bit(R5_Discard, &sh->dev[i].flags))
rw |= REQ_DISCARD;
} else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags))
rw = READ;
else if (test_and_clear_bit(R5_WantReplace,
Expand Down Expand Up @@ -1170,8 +1172,13 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
set_bit(R5_WantFUA, &dev->flags);
if (wbi->bi_rw & REQ_SYNC)
set_bit(R5_SyncIO, &dev->flags);
tx = async_copy_data(1, wbi, dev->page,
dev->sector, tx);
if (wbi->bi_rw & REQ_DISCARD) {
memset(page_address(dev->page), 0,
STRIPE_SECTORS << 9);
set_bit(R5_Discard, &dev->flags);
} else
tx = async_copy_data(1, wbi, dev->page,
dev->sector, tx);
wbi = r5_next_bio(wbi, dev->sector);
}
}
Expand Down Expand Up @@ -1237,6 +1244,20 @@ ops_run_reconstruct5(struct stripe_head *sh, struct raid5_percpu *percpu,
pr_debug("%s: stripe %llu\n", __func__,
(unsigned long long)sh->sector);

for (i = 0; i < sh->disks; i++) {
if (pd_idx == i)
continue;
if (!test_bit(R5_Discard, &sh->dev[i].flags))
break;
}
if (i >= sh->disks) {
atomic_inc(&sh->count);
memset(page_address(sh->dev[pd_idx].page), 0,
STRIPE_SECTORS << 9);
set_bit(R5_Discard, &sh->dev[pd_idx].flags);
ops_complete_reconstruct(sh);
return;
}
/* check if prexor is active which means only process blocks
* that are part of a read-modify-write (written)
*/
Expand Down Expand Up @@ -1281,10 +1302,28 @@ ops_run_reconstruct6(struct stripe_head *sh, struct raid5_percpu *percpu,
{
struct async_submit_ctl submit;
struct page **blocks = percpu->scribble;
int count;
int count, i;

pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector);

for (i = 0; i < sh->disks; i++) {
if (sh->pd_idx == i || sh->qd_idx == i)
continue;
if (!test_bit(R5_Discard, &sh->dev[i].flags))
break;
}
if (i >= sh->disks) {
atomic_inc(&sh->count);
memset(page_address(sh->dev[sh->pd_idx].page), 0,
STRIPE_SECTORS << 9);
memset(page_address(sh->dev[sh->qd_idx].page), 0,
STRIPE_SECTORS << 9);
set_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
set_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
ops_complete_reconstruct(sh);
return;
}

count = set_syndrome_sources(blocks, sh);

atomic_inc(&sh->count);
Expand Down Expand Up @@ -4067,6 +4106,88 @@ static void release_stripe_plug(struct mddev *mddev,
release_stripe(sh);
}

static void make_discard_request(struct mddev *mddev, struct bio *bi)
{
struct r5conf *conf = mddev->private;
sector_t logical_sector, last_sector;
struct stripe_head *sh;
int remaining;
int stripe_sectors;

if (mddev->reshape_position != MaxSector)
/* Skip discard while reshape is happening */
return;

logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
last_sector = bi->bi_sector + (bi->bi_size>>9);

bi->bi_next = NULL;
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */

stripe_sectors = conf->chunk_sectors *
(conf->raid_disks - conf->max_degraded);
logical_sector = DIV_ROUND_UP_SECTOR_T(logical_sector,
stripe_sectors);
sector_div(last_sector, stripe_sectors);

logical_sector *= conf->chunk_sectors;
last_sector *= conf->chunk_sectors;

for (; logical_sector < last_sector;
logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w);
int d;
again:
sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
prepare_to_wait(&conf->wait_for_overlap, &w,
TASK_UNINTERRUPTIBLE);
spin_lock_irq(&sh->stripe_lock);
for (d = 0; d < conf->raid_disks; d++) {
if (d == sh->pd_idx || d == sh->qd_idx)
continue;
if (sh->dev[d].towrite || sh->dev[d].toread) {
set_bit(R5_Overlap, &sh->dev[d].flags);
spin_unlock_irq(&sh->stripe_lock);
release_stripe(sh);
schedule();
goto again;
}
}
finish_wait(&conf->wait_for_overlap, &w);
for (d = 0; d < conf->raid_disks; d++) {
if (d == sh->pd_idx || d == sh->qd_idx)
continue;
sh->dev[d].towrite = bi;
set_bit(R5_OVERWRITE, &sh->dev[d].flags);
raid5_inc_bi_active_stripes(bi);
}
spin_unlock_irq(&sh->stripe_lock);
if (conf->mddev->bitmap) {
for (d = 0;
d < conf->raid_disks - conf->max_degraded;
d++)
bitmap_startwrite(mddev->bitmap,
sh->sector,
STRIPE_SECTORS,
0);
sh->bm_seq = conf->seq_flush + 1;
set_bit(STRIPE_BIT_DELAY, &sh->state);
}

set_bit(STRIPE_HANDLE, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state);
if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
atomic_inc(&conf->preread_active_stripes);
release_stripe_plug(mddev, sh);
}

remaining = raid5_dec_bi_active_stripes(bi);
if (remaining == 0) {
md_write_end(mddev);
bio_endio(bi, 0);
}
}

static void make_request(struct mddev *mddev, struct bio * bi)
{
struct r5conf *conf = mddev->private;
Expand All @@ -4089,6 +4210,11 @@ static void make_request(struct mddev *mddev, struct bio * bi)
chunk_aligned_read(mddev,bi))
return;

if (unlikely(bi->bi_rw & REQ_DISCARD)) {
make_discard_request(mddev, bi);
return;
}

logical_sector = bi->bi_sector & ~((sector_t)STRIPE_SECTORS-1);
last_sector = bi->bi_sector + (bi->bi_size>>9);
bi->bi_next = NULL;
Expand Down Expand Up @@ -5362,6 +5488,7 @@ static int run(struct mddev *mddev)

if (mddev->queue) {
int chunk_size;
bool discard_supported = true;
/* read-ahead size must cover two whole stripes, which
* is 2 * (datadisks) * chunksize where 'n' is the
* number of raid devices
Expand All @@ -5381,13 +5508,48 @@ static int run(struct mddev *mddev)
blk_queue_io_min(mddev->queue, chunk_size);
blk_queue_io_opt(mddev->queue, chunk_size *
(conf->raid_disks - conf->max_degraded));
/*
* We can only discard a whole stripe. It doesn't make sense to
* discard data disk but write parity disk
*/
stripe = stripe * PAGE_SIZE;
mddev->queue->limits.discard_alignment = stripe;
mddev->queue->limits.discard_granularity = stripe;
/*
* unaligned part of discard request will be ignored, so can't
* guarantee discard_zerors_data
*/
mddev->queue->limits.discard_zeroes_data = 0;

rdev_for_each(rdev, mddev) {
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->new_data_offset << 9);
/*
* discard_zeroes_data is required, otherwise data
* could be lost. Consider a scenario: discard a stripe
* (the stripe could be inconsistent if
* discard_zeroes_data is 0); write one disk of the
* stripe (the stripe could be inconsistent again
* depending on which disks are used to calculate
* parity); the disk is broken; The stripe data of this
* disk is lost.
*/
if (!blk_queue_discard(bdev_get_queue(rdev->bdev)) ||
!bdev_get_queue(rdev->bdev)->
limits.discard_zeroes_data)
discard_supported = false;
}

if (discard_supported &&
mddev->queue->limits.max_discard_sectors >= stripe &&
mddev->queue->limits.discard_granularity >= stripe)
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
mddev->queue);
else
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD,
mddev->queue);
}

return 0;
Expand Down
1 change: 1 addition & 0 deletions trunk/drivers/md/raid5.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ enum r5dev_flags {
R5_WantReplace, /* We need to update the replacement, we have read
* data in, and now is a good time to write it out.
*/
R5_Discard, /* Discard the stripe */
};

/*
Expand Down

0 comments on commit d743569

Please sign in to comment.