Skip to content

Commit

Permalink
blk-mq: Optimise blk_mq_queue_tag_busy_iter() for shared tags
Browse files Browse the repository at this point in the history
Kashyap reports high CPU usage in blk_mq_queue_tag_busy_iter() and callees
using megaraid SAS RAID card since moving to shared tags [0].

Previously, when shared tags was shared sbitmap, this function was less
than optimum since we would iter through all tags for all hctx's,
yet only ever match upto tagset depth number of rqs.

Since the change to shared tags, things are even less efficient if we have
parallel callers of blk_mq_queue_tag_busy_iter(). This is because in
bt_iter() -> blk_mq_find_and_get_req() there would be more contention on
accessing each request ref and tags->lock since they are now shared among
all HW queues.

Optimise by having separate calls to bt_for_each() for when we're using
shared tags. In this case no longer pass a hctx, as it is no longer
relevant, and teach bt_iter() about this.

Ming suggested something along the lines of this change, apart from a
different implementation.

[0] https://lore.kernel.org/linux-block/e4e92abbe9d52bcba6b8cc6c91c442cc@mail.gmail.com/

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reported-and-tested-by: Kashyap Desai <kashyap.desai@broadcom.com>
Fixes: e155b0c ("blk-mq: Use shared tags for shared sbitmap support")
Link: https://lore.kernel.org/r/1638794990-137490-4-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
John Garry authored and Jens Axboe committed Dec 6, 2021
1 parent fc39f8d commit fea9f92
Showing 1 changed file with 41 additions and 18 deletions.
59 changes: 41 additions & 18 deletions block/blk-mq-tag.c
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ void blk_mq_put_tags(struct blk_mq_tags *tags, int *tag_array, int nr_tags)

struct bt_iter_data {
struct blk_mq_hw_ctx *hctx;
struct request_queue *q;
busy_tag_iter_fn *fn;
void *data;
bool reserved;
Expand All @@ -238,11 +239,18 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
{
struct bt_iter_data *iter_data = data;
struct blk_mq_hw_ctx *hctx = iter_data->hctx;
struct blk_mq_tags *tags = hctx->tags;
struct request_queue *q = iter_data->q;
struct blk_mq_tag_set *set = q->tag_set;
bool reserved = iter_data->reserved;
struct blk_mq_tags *tags;
struct request *rq;
bool ret = true;

if (blk_mq_is_shared_tags(set->flags))
tags = set->shared_tags;
else
tags = hctx->tags;

if (!reserved)
bitnr += tags->nr_reserved_tags;
/*
Expand All @@ -253,7 +261,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
if (!rq)
return true;

if (rq->q == hctx->queue && rq->mq_hctx == hctx)
if (rq->q == q && (!hctx || rq->mq_hctx == hctx))
ret = iter_data->fn(rq, iter_data->data, reserved);
blk_mq_put_rq_ref(rq);
return ret;
Expand All @@ -262,6 +270,7 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
/**
* bt_for_each - iterate over the requests associated with a hardware queue
* @hctx: Hardware queue to examine.
* @q: Request queue to examine.
* @bt: sbitmap to examine. This is either the breserved_tags member
* or the bitmap_tags member of struct blk_mq_tags.
* @fn: Pointer to the function that will be called for each request
Expand All @@ -273,14 +282,16 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
* @reserved: Indicates whether @bt is the breserved_tags member or the
* bitmap_tags member of struct blk_mq_tags.
*/
static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct sbitmap_queue *bt,
busy_tag_iter_fn *fn, void *data, bool reserved)
static void bt_for_each(struct blk_mq_hw_ctx *hctx, struct request_queue *q,
struct sbitmap_queue *bt, busy_tag_iter_fn *fn,
void *data, bool reserved)
{
struct bt_iter_data iter_data = {
.hctx = hctx,
.fn = fn,
.data = data,
.reserved = reserved,
.q = q,
};

sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
Expand Down Expand Up @@ -460,9 +471,6 @@ EXPORT_SYMBOL(blk_mq_tagset_wait_completed_request);
void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
void *priv)
{
struct blk_mq_hw_ctx *hctx;
int i;

/*
* __blk_mq_update_nr_hw_queues() updates nr_hw_queues and queue_hw_ctx
* while the queue is frozen. So we can use q_usage_counter to avoid
Expand All @@ -471,19 +479,34 @@ void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_tag_iter_fn *fn,
if (!percpu_ref_tryget(&q->q_usage_counter))
return;

queue_for_each_hw_ctx(q, hctx, i) {
struct blk_mq_tags *tags = hctx->tags;

/*
* If no software queues are currently mapped to this
* hardware queue, there's nothing to check
*/
if (!blk_mq_hw_queue_mapped(hctx))
continue;
if (blk_mq_is_shared_tags(q->tag_set->flags)) {
struct blk_mq_tags *tags = q->tag_set->shared_tags;
struct sbitmap_queue *bresv = &tags->breserved_tags;
struct sbitmap_queue *btags = &tags->bitmap_tags;

if (tags->nr_reserved_tags)
bt_for_each(hctx, &tags->breserved_tags, fn, priv, true);
bt_for_each(hctx, &tags->bitmap_tags, fn, priv, false);
bt_for_each(NULL, q, bresv, fn, priv, true);
bt_for_each(NULL, q, btags, fn, priv, false);
} else {
struct blk_mq_hw_ctx *hctx;
int i;

queue_for_each_hw_ctx(q, hctx, i) {
struct blk_mq_tags *tags = hctx->tags;
struct sbitmap_queue *bresv = &tags->breserved_tags;
struct sbitmap_queue *btags = &tags->bitmap_tags;

/*
* If no software queues are currently mapped to this
* hardware queue, there's nothing to check
*/
if (!blk_mq_hw_queue_mapped(hctx))
continue;

if (tags->nr_reserved_tags)
bt_for_each(hctx, q, bresv, fn, priv, true);
bt_for_each(hctx, q, btags, fn, priv, false);
}
}
blk_queue_exit(q);
}
Expand Down

0 comments on commit fea9f92

Please sign in to comment.