Skip to content

Commit

Permalink
blk-mq: Use shared tags for shared sbitmap support
Browse files Browse the repository at this point in the history
Currently we use separate sbitmap pairs and active_queues atomic_t for
shared sbitmap support.

However a full sets of static requests are used per HW queue, which is
quite wasteful, considering that the total number of requests usable at
any given time across all HW queues is limited by the shared sbitmap depth.

As such, it is considerably more memory efficient in the case of shared
sbitmap to allocate a set of static rqs per tag set or request queue, and
not per HW queue.

So replace the sbitmap pairs and active_queues atomic_t with a shared
tags per tagset and request queue, which will hold a set of shared static
rqs.

Since there is now no valid HW queue index to be passed to the blk_mq_ops
.init and .exit_request callbacks, pass an invalid index token. This
changes the semantics of the APIs, such that the callback would need to
validate the HW queue index before using it. Currently no user of shared
sbitmap actually uses the HW queue index (as would be expected).

Signed-off-by: John Garry <john.garry@huawei.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/1633429419-228500-13-git-send-email-john.garry@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
  • Loading branch information
John Garry authored and Jens Axboe committed Oct 18, 2021
1 parent 645db34 commit e155b0c
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 152 deletions.
82 changes: 41 additions & 41 deletions block/blk-mq-sched.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,11 @@ static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
struct blk_mq_hw_ctx *hctx,
unsigned int hctx_idx)
{
if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) {
hctx->sched_tags = q->shared_sbitmap_tags;
return 0;
}

hctx->sched_tags = blk_mq_alloc_map_and_rqs(q->tag_set, hctx_idx,
q->nr_requests);

Expand All @@ -527,61 +532,54 @@ static int blk_mq_sched_alloc_map_and_rqs(struct request_queue *q,
return 0;
}

static void blk_mq_exit_sched_shared_sbitmap(struct request_queue *queue)
{
blk_mq_free_rq_map(queue->shared_sbitmap_tags);
queue->shared_sbitmap_tags = NULL;
}

/* called in queue's release handler, tagset has gone away */
static void blk_mq_sched_tags_teardown(struct request_queue *q)
static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int flags)
{
struct blk_mq_hw_ctx *hctx;
int i;

queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->sched_tags) {
blk_mq_free_rq_map(hctx->sched_tags, hctx->flags);
if (!blk_mq_is_sbitmap_shared(q->tag_set->flags))
blk_mq_free_rq_map(hctx->sched_tags);
hctx->sched_tags = NULL;
}
}

if (blk_mq_is_sbitmap_shared(flags))
blk_mq_exit_sched_shared_sbitmap(q);
}

static int blk_mq_init_sched_shared_sbitmap(struct request_queue *queue)
{
struct blk_mq_tag_set *set = queue->tag_set;
int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags);
struct blk_mq_hw_ctx *hctx;
int ret, i;

/*
* Set initial depth at max so that we don't need to reallocate for
* updating nr_requests.
*/
ret = blk_mq_init_bitmaps(&queue->sched_bitmap_tags,
&queue->sched_breserved_tags,
MAX_SCHED_RQ, set->reserved_tags,
set->numa_node, alloc_policy);
if (ret)
return ret;

queue_for_each_hw_ctx(queue, hctx, i) {
hctx->sched_tags->bitmap_tags =
&queue->sched_bitmap_tags;
hctx->sched_tags->breserved_tags =
&queue->sched_breserved_tags;
}
queue->shared_sbitmap_tags = blk_mq_alloc_map_and_rqs(set,
BLK_MQ_NO_HCTX_IDX,
MAX_SCHED_RQ);
if (!queue->shared_sbitmap_tags)
return -ENOMEM;

blk_mq_tag_update_sched_shared_sbitmap(queue);

return 0;
}

static void blk_mq_exit_sched_shared_sbitmap(struct request_queue *queue)
{
sbitmap_queue_free(&queue->sched_bitmap_tags);
sbitmap_queue_free(&queue->sched_breserved_tags);
}

int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
{
unsigned int i, flags = q->tag_set->flags;
struct blk_mq_hw_ctx *hctx;
struct elevator_queue *eq;
unsigned int i;
int ret;

if (!e) {
Expand All @@ -598,21 +596,21 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
q->nr_requests = 2 * min_t(unsigned int, q->tag_set->queue_depth,
BLKDEV_DEFAULT_RQ);

queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i);
if (blk_mq_is_sbitmap_shared(flags)) {
ret = blk_mq_init_sched_shared_sbitmap(q);
if (ret)
goto err_free_map_and_rqs;
return ret;
}

if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) {
ret = blk_mq_init_sched_shared_sbitmap(q);
queue_for_each_hw_ctx(q, hctx, i) {
ret = blk_mq_sched_alloc_map_and_rqs(q, hctx, i);
if (ret)
goto err_free_map_and_rqs;
}

ret = e->ops.init_sched(q, e);
if (ret)
goto err_free_sbitmap;
goto err_free_map_and_rqs;

blk_mq_debugfs_register_sched(q);

Expand All @@ -632,12 +630,10 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)

return 0;

err_free_sbitmap:
if (blk_mq_is_sbitmap_shared(q->tag_set->flags))
blk_mq_exit_sched_shared_sbitmap(q);
err_free_map_and_rqs:
blk_mq_sched_free_rqs(q);
blk_mq_sched_tags_teardown(q);
blk_mq_sched_tags_teardown(q, flags);

q->elevator = NULL;
return ret;
}
Expand All @@ -651,9 +647,15 @@ void blk_mq_sched_free_rqs(struct request_queue *q)
struct blk_mq_hw_ctx *hctx;
int i;

queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->sched_tags)
blk_mq_free_rqs(q->tag_set, hctx->sched_tags, i);
if (blk_mq_is_sbitmap_shared(q->tag_set->flags)) {
blk_mq_free_rqs(q->tag_set, q->shared_sbitmap_tags,
BLK_MQ_NO_HCTX_IDX);
} else {
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->sched_tags)
blk_mq_free_rqs(q->tag_set,
hctx->sched_tags, i);
}
}
}

Expand All @@ -674,8 +676,6 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
blk_mq_debugfs_unregister_sched(q);
if (e->type->ops.exit_sched)
e->type->ops.exit_sched(e);
blk_mq_sched_tags_teardown(q);
if (blk_mq_is_sbitmap_shared(flags))
blk_mq_exit_sched_shared_sbitmap(q);
blk_mq_sched_tags_teardown(q, flags);
q->elevator = NULL;
}
63 changes: 19 additions & 44 deletions block/blk-mq-tag.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
{
if (blk_mq_is_sbitmap_shared(hctx->flags)) {
struct request_queue *q = hctx->queue;
struct blk_mq_tag_set *set = q->tag_set;

if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
!test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
atomic_inc(&set->active_queues_shared_sbitmap);
atomic_inc(&hctx->tags->active_queues);
} else {
if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
!test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
Expand All @@ -57,14 +56,14 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
{
struct blk_mq_tags *tags = hctx->tags;
struct request_queue *q = hctx->queue;
struct blk_mq_tag_set *set = q->tag_set;

if (blk_mq_is_sbitmap_shared(hctx->flags)) {
struct request_queue *q = hctx->queue;

if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
&q->queue_flags))
return;
atomic_dec(&set->active_queues_shared_sbitmap);
atomic_dec(&tags->active_queues);
} else {
if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
return;
Expand Down Expand Up @@ -510,38 +509,10 @@ static int blk_mq_init_bitmap_tags(struct blk_mq_tags *tags,
return 0;
}

int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set)
{
int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(set->flags);
int i, ret;

ret = blk_mq_init_bitmaps(&set->__bitmap_tags, &set->__breserved_tags,
set->queue_depth, set->reserved_tags,
set->numa_node, alloc_policy);
if (ret)
return ret;

for (i = 0; i < set->nr_hw_queues; i++) {
struct blk_mq_tags *tags = set->tags[i];

tags->bitmap_tags = &set->__bitmap_tags;
tags->breserved_tags = &set->__breserved_tags;
}

return 0;
}

void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set)
{
sbitmap_queue_free(&set->__bitmap_tags);
sbitmap_queue_free(&set->__breserved_tags);
}

struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
unsigned int reserved_tags,
int node, unsigned int flags)
int node, int alloc_policy)
{
int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(flags);
struct blk_mq_tags *tags;

if (total_tags > BLK_MQ_TAG_MAX) {
Expand All @@ -557,22 +528,17 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
tags->nr_reserved_tags = reserved_tags;
spin_lock_init(&tags->lock);

if (blk_mq_is_sbitmap_shared(flags))
return tags;

if (blk_mq_init_bitmap_tags(tags, node, alloc_policy) < 0) {
kfree(tags);
return NULL;
}
return tags;
}

void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags)
void blk_mq_free_tags(struct blk_mq_tags *tags)
{
if (!blk_mq_is_sbitmap_shared(flags)) {
sbitmap_queue_free(tags->bitmap_tags);
sbitmap_queue_free(tags->breserved_tags);
}
sbitmap_queue_free(tags->bitmap_tags);
sbitmap_queue_free(tags->breserved_tags);
kfree(tags);
}

Expand Down Expand Up @@ -603,6 +569,13 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,
if (tdepth > MAX_SCHED_RQ)
return -EINVAL;

/*
* Only the sbitmap needs resizing since we allocated the max
* initially.
*/
if (blk_mq_is_sbitmap_shared(set->flags))
return 0;

new = blk_mq_alloc_map_and_rqs(set, hctx->queue_num, tdepth);
if (!new)
return -ENOMEM;
Expand All @@ -623,12 +596,14 @@ int blk_mq_tag_update_depth(struct blk_mq_hw_ctx *hctx,

void blk_mq_tag_resize_shared_sbitmap(struct blk_mq_tag_set *set, unsigned int size)
{
sbitmap_queue_resize(&set->__bitmap_tags, size - set->reserved_tags);
struct blk_mq_tags *tags = set->shared_sbitmap_tags;

sbitmap_queue_resize(&tags->__bitmap_tags, size - set->reserved_tags);
}

void blk_mq_tag_update_sched_shared_sbitmap(struct request_queue *q)
{
sbitmap_queue_resize(&q->sched_bitmap_tags,
sbitmap_queue_resize(q->shared_sbitmap_tags->bitmap_tags,
q->nr_requests - q->tag_set->reserved_tags);
}

Expand Down
6 changes: 2 additions & 4 deletions block/blk-mq-tag.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,14 @@ struct blk_mq_tags {

extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags,
unsigned int reserved_tags,
int node, unsigned int flags);
extern void blk_mq_free_tags(struct blk_mq_tags *tags, unsigned int flags);
int node, int alloc_policy);
extern void blk_mq_free_tags(struct blk_mq_tags *tags);
extern int blk_mq_init_bitmaps(struct sbitmap_queue *bitmap_tags,
struct sbitmap_queue *breserved_tags,
unsigned int queue_depth,
unsigned int reserved,
int node, int alloc_policy);

extern int blk_mq_init_shared_sbitmap(struct blk_mq_tag_set *set);
extern void blk_mq_exit_shared_sbitmap(struct blk_mq_tag_set *set);
extern unsigned int blk_mq_get_tag(struct blk_mq_alloc_data *data);
extern void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
unsigned int tag);
Expand Down
Loading

0 comments on commit e155b0c

Please sign in to comment.