Skip to content

Commit

Permalink
block, dm: don't copy bios for request clones
Browse files Browse the repository at this point in the history
Currently dm-multipath has to clone the bios for every request sent
to the lower devices, which wastes cpu cycles and ties down memory.

This patch instead adds a new REQ_CLONE flag that instructs req_bio_endio
to not complete bios attached to a request, which we set on clone
requests similar to bios in a flush sequence.  With this change I/O
errors on a path failure only get propagated to dm-multipath, which
can then either resubmit the I/O or complete the bios on the original
request.

I've done some basic testing of this on a Linux target with ALUA support,
and it survives path failures during I/O nicely.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
  • Loading branch information
Christoph Hellwig authored and Jens Axboe committed May 22, 2015
1 parent 326e1db commit 5f1b670
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 230 deletions.
94 changes: 11 additions & 83 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ EXPORT_SYMBOL(blk_rq_init);
static void req_bio_endio(struct request *rq, struct bio *bio,
unsigned int nbytes, int error)
{
if (error)
if (error && !(rq->cmd_flags & REQ_CLONE))
clear_bit(BIO_UPTODATE, &bio->bi_flags);
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
error = -EIO;
Expand All @@ -128,7 +128,8 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
bio_advance(bio, nbytes);

/* don't actually finish bio if it's part of flush sequence */
if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
if (bio->bi_iter.bi_size == 0 &&
!(rq->cmd_flags & (REQ_FLUSH_SEQ|REQ_CLONE)))
bio_endio(bio, error);
}

Expand Down Expand Up @@ -2909,95 +2910,22 @@ int blk_lld_busy(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_lld_busy);

/**
* blk_rq_unprep_clone - Helper function to free all bios in a cloned request
* @rq: the clone request to be cleaned up
*
* Description:
* Free all bios in @rq for a cloned request.
*/
void blk_rq_unprep_clone(struct request *rq)
{
struct bio *bio;

while ((bio = rq->bio) != NULL) {
rq->bio = bio->bi_next;

bio_put(bio);
}
}
EXPORT_SYMBOL_GPL(blk_rq_unprep_clone);

/*
* Copy attributes of the original request to the clone request.
* The actual data parts (e.g. ->cmd, ->sense) are not copied.
*/
static void __blk_rq_prep_clone(struct request *dst, struct request *src)
void blk_rq_prep_clone(struct request *dst, struct request *src)
{
dst->cpu = src->cpu;
dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE;
dst->cmd_flags |= (src->cmd_flags & REQ_CLONE_MASK);
dst->cmd_flags |= REQ_NOMERGE | REQ_CLONE;
dst->cmd_type = src->cmd_type;
dst->__sector = blk_rq_pos(src);
dst->__data_len = blk_rq_bytes(src);
dst->nr_phys_segments = src->nr_phys_segments;
dst->ioprio = src->ioprio;
dst->extra_len = src->extra_len;
}

/**
* blk_rq_prep_clone - Helper function to setup clone request
* @rq: the request to be setup
* @rq_src: original request to be cloned
* @bs: bio_set that bios for clone are allocated from
* @gfp_mask: memory allocation mask for bio
* @bio_ctr: setup function to be called for each clone bio.
* Returns %0 for success, non %0 for failure.
* @data: private data to be passed to @bio_ctr
*
* Description:
* Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq.
* The actual data parts of @rq_src (e.g. ->cmd, ->sense)
* are not copied, and copying such parts is the caller's responsibility.
* Also, pages which the original bios are pointing to are not copied
* and the cloned bios just point same pages.
* So cloned bios must be completed before original bios, which means
* the caller must complete @rq before @rq_src.
*/
int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
struct bio_set *bs, gfp_t gfp_mask,
int (*bio_ctr)(struct bio *, struct bio *, void *),
void *data)
{
struct bio *bio, *bio_src;

if (!bs)
bs = fs_bio_set;

__rq_for_each_bio(bio_src, rq_src) {
bio = bio_clone_fast(bio_src, gfp_mask, bs);
if (!bio)
goto free_and_out;

if (bio_ctr && bio_ctr(bio, bio_src, data))
goto free_and_out;

if (rq->bio) {
rq->biotail->bi_next = bio;
rq->biotail = bio;
} else
rq->bio = rq->biotail = bio;
}

__blk_rq_prep_clone(rq, rq_src);

return 0;

free_and_out:
if (bio)
bio_put(bio);
blk_rq_unprep_clone(rq);

return -ENOMEM;
dst->bio = src->bio;
dst->biotail = src->biotail;
dst->cmd = src->cmd;
dst->cmd_len = src->cmd_len;
dst->sense = src->sense;
}
EXPORT_SYMBOL_GPL(blk_rq_prep_clone);

Expand Down
25 changes: 16 additions & 9 deletions drivers/md/dm-table.c
Original file line number Diff line number Diff line change
Expand Up @@ -940,21 +940,28 @@ static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *
{
unsigned type = dm_table_get_type(t);
unsigned per_bio_data_size = 0;
struct dm_target *tgt;
unsigned i;

if (unlikely(type == DM_TYPE_NONE)) {
switch (type) {
case DM_TYPE_BIO_BASED:
for (i = 0; i < t->num_targets; i++) {
struct dm_target *tgt = t->targets + i;

per_bio_data_size = max(per_bio_data_size,
tgt->per_bio_data_size);
}
t->mempools = dm_alloc_bio_mempools(t->integrity_supported,
per_bio_data_size);
break;
case DM_TYPE_REQUEST_BASED:
case DM_TYPE_MQ_REQUEST_BASED:
t->mempools = dm_alloc_rq_mempools(md, type);
break;
default:
DMWARN("no table type is set, can't allocate mempools");
return -EINVAL;
}

if (type == DM_TYPE_BIO_BASED)
for (i = 0; i < t->num_targets; i++) {
tgt = t->targets + i;
per_bio_data_size = max(per_bio_data_size, tgt->per_bio_data_size);
}

t->mempools = dm_alloc_md_mempools(md, type, t->integrity_supported, per_bio_data_size);
if (!t->mempools)
return -ENOMEM;

Expand Down
Loading

0 comments on commit 5f1b670

Please sign in to comment.