Skip to content

Commit

Permalink
blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request …
Browse files Browse the repository at this point in the history
…feedback

blk_insert_cloned_request() is called in the fast path of a dm-rq driver
(e.g. blk-mq request-based DM mpath).  blk_insert_cloned_request() uses
blk_mq_request_bypass_insert() to directly append the request to the
blk-mq hctx->dispatch_list of the underlying queue.

1) This way isn't efficient enough because the hctx spinlock is always
used.

2) With blk_insert_cloned_request(), we completely bypass underlying
queue's elevator and depend on the upper-level dm-rq driver's elevator
to schedule IO.  But dm-rq currently can't get the underlying queue's
dispatch feedback at all.  Without knowing whether a request was issued
or not (e.g. due to underlying queue being busy) the dm-rq elevator will
not be able to provide effective IO merging (as a side-effect of dm-rq
currently blindly destaging a request from its elevator only to requeue
it after a delay, which kills any opportunity for merging).  This
obviously causes very bad sequential IO performance.

Fix this by updating blk_insert_cloned_request() to use
blk_mq_request_direct_issue().  blk_mq_request_direct_issue() allows a
request to be issued directly to the underlying queue and returns the
dispatch feedback (blk_status_t).  If blk_mq_request_direct_issue()
returns BLK_SYS_RESOURCE the dm-rq driver will now use DM_MAPIO_REQUEUE
to _not_ destage the request.  Whereby preserving the opportunity to
merge IO.

With this, request-based DM's blk-mq sequential IO performance is vastly
improved (as much as 3X in mpath/virtio-scsi testing).

Signed-off-by: Ming Lei <ming.lei@redhat.com>
[blk-mq.c changes heavily influenced by Ming Lei's initial solution, but
they were refactored to make them less fragile and easier to read/review]
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
Ming Lei authored and Jens Axboe committed Jan 17, 2018
1 parent 0f95549 commit 396eaf2
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 13 deletions.
3 changes: 1 addition & 2 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2500,8 +2500,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
* bypass a potential scheduler on the bottom device for
* insert.
*/
blk_mq_request_bypass_insert(rq, true);
return BLK_STS_OK;
return blk_mq_request_direct_issue(rq);
}

spin_lock_irqsave(q->queue_lock, flags);
Expand Down
37 changes: 29 additions & 8 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -1775,15 +1775,19 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,

static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx,
struct request *rq,
bool run_queue)
bool run_queue, bool bypass_insert)
{
blk_mq_sched_insert_request(rq, false, run_queue, false,
hctx->flags & BLK_MQ_F_BLOCKING);
if (!bypass_insert)
blk_mq_sched_insert_request(rq, false, run_queue, false,
hctx->flags & BLK_MQ_F_BLOCKING);
else
blk_mq_request_bypass_insert(rq, run_queue);
}

static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
struct request *rq,
blk_qc_t *cookie)
blk_qc_t *cookie,
bool bypass_insert)
{
struct request_queue *q = rq->q;
bool run_queue = true;
Expand All @@ -1794,7 +1798,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
goto insert;
}

if (q->elevator)
if (q->elevator && !bypass_insert)
goto insert;

if (!blk_mq_get_driver_tag(rq, NULL, false))
Expand All @@ -1807,7 +1811,9 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,

return __blk_mq_issue_directly(hctx, rq, cookie);
insert:
__blk_mq_fallback_to_insert(hctx, rq, run_queue);
__blk_mq_fallback_to_insert(hctx, rq, run_queue, bypass_insert);
if (bypass_insert)
return BLK_STS_RESOURCE;

return BLK_STS_OK;
}
Expand All @@ -1822,15 +1828,30 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,

hctx_lock(hctx, &srcu_idx);

ret = __blk_mq_try_issue_directly(hctx, rq, cookie);
ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
if (ret == BLK_STS_RESOURCE)
__blk_mq_fallback_to_insert(hctx, rq, true);
__blk_mq_fallback_to_insert(hctx, rq, true, false);
else if (ret != BLK_STS_OK)
blk_mq_end_request(rq, ret);

hctx_unlock(hctx, srcu_idx);
}

blk_status_t blk_mq_request_direct_issue(struct request *rq)
{
blk_status_t ret;
int srcu_idx;
blk_qc_t unused_cookie;
struct blk_mq_ctx *ctx = rq->mq_ctx;
struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);

hctx_lock(hctx, &srcu_idx);
ret = __blk_mq_try_issue_directly(hctx, rq, &unused_cookie, true);
hctx_unlock(hctx, srcu_idx);

return ret;
}

static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
{
const int is_sync = op_is_sync(bio->bi_opf);
Expand Down
3 changes: 3 additions & 0 deletions block/blk-mq.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue);
void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
struct list_head *list);

/* Used by blk_insert_cloned_request() to issue request directly */
blk_status_t blk_mq_request_direct_issue(struct request *rq);

/*
* CPU -> queue mappings
*/
Expand Down
19 changes: 16 additions & 3 deletions drivers/md/dm-rq.c
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ static void end_clone_request(struct request *clone, blk_status_t error)
dm_complete_request(tio->orig, error);
}

static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
static blk_status_t dm_dispatch_clone_request(struct request *clone, struct request *rq)
{
blk_status_t r;

Expand All @@ -404,9 +404,10 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq)

clone->start_time = jiffies;
r = blk_insert_cloned_request(clone->q, clone);
if (r)
if (r != BLK_STS_OK && r != BLK_STS_RESOURCE)
/* must complete clone in terms of original request */
dm_complete_request(rq, r);
return r;
}

static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
Expand Down Expand Up @@ -476,8 +477,10 @@ static int map_request(struct dm_rq_target_io *tio)
struct mapped_device *md = tio->md;
struct request *rq = tio->orig;
struct request *clone = NULL;
blk_status_t ret;

r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
check_again:
switch (r) {
case DM_MAPIO_SUBMITTED:
/* The target has taken the I/O to submit by itself later */
Expand All @@ -492,7 +495,17 @@ static int map_request(struct dm_rq_target_io *tio)
/* The target has remapped the I/O so dispatch it */
trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
blk_rq_pos(rq));
dm_dispatch_clone_request(clone, rq);
ret = dm_dispatch_clone_request(clone, rq);
if (ret == BLK_STS_RESOURCE) {
blk_rq_unprep_clone(clone);
tio->ti->type->release_clone_rq(clone);
tio->clone = NULL;
if (!rq->q->mq_ops)
r = DM_MAPIO_DELAY_REQUEUE;
else
r = DM_MAPIO_REQUEUE;
goto check_again;
}
break;
case DM_MAPIO_REQUEUE:
/* The target wants to requeue the I/O */
Expand Down

0 comments on commit 396eaf2

Please sign in to comment.