Skip to content

Commit

Permalink
blk-mq: ensure that hardware queues are always run on the mapped CPUs
Browse files Browse the repository at this point in the history
Instead of providing soft mappings with no guarantees on hardware
queues always being run on the right CPU, switch to a hard mapping
guarantee that ensure that we always run the hardware queue on
(one of, if more) the mapped CPU.

Signed-off-by: Jens Axboe <axboe@fb.com>
  • Loading branch information
Jens Axboe committed Apr 9, 2014
1 parent 8ab1459 commit e4043dc
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 15 deletions.
66 changes: 51 additions & 15 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,11 +209,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
break;
}

blk_mq_put_ctx(ctx);
if (!(gfp & __GFP_WAIT))
if (gfp & __GFP_WAIT) {
__blk_mq_run_hw_queue(hctx);
blk_mq_put_ctx(ctx);
} else {
blk_mq_put_ctx(ctx);
break;
}

__blk_mq_run_hw_queue(hctx);
blk_mq_wait_for_tags(hctx->tags);
} while (1);

Expand Down Expand Up @@ -514,6 +517,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
LIST_HEAD(rq_list);
int bit, queued;

WARN_ON(!preempt_count());

if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
return;

Expand Down Expand Up @@ -606,10 +611,22 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
return;

if (!async)
if (!async && cpumask_test_cpu(smp_processor_id(), hctx->cpumask))
__blk_mq_run_hw_queue(hctx);
else
else if (hctx->queue->nr_hw_queues == 1)
kblockd_schedule_delayed_work(&hctx->delayed_work, 0);
else {
unsigned int cpu;

/*
* It'd be great if the workqueue API had a way to pass
* in a mask and had some smarts for more clever placement
* than the first CPU. Or we could round-robin here. For now,
* just queue on the first CPU.
*/
cpu = cpumask_first(hctx->cpumask);
kblockd_schedule_delayed_work_on(cpu, &hctx->delayed_work, 0);
}
}

void blk_mq_run_queues(struct request_queue *q, bool async)
Expand All @@ -623,7 +640,9 @@ void blk_mq_run_queues(struct request_queue *q, bool async)
test_bit(BLK_MQ_S_STOPPED, &hctx->state))
continue;

preempt_disable();
blk_mq_run_hw_queue(hctx, async);
preempt_enable();
}
}
EXPORT_SYMBOL(blk_mq_run_queues);
Expand All @@ -648,7 +667,10 @@ EXPORT_SYMBOL(blk_mq_stop_hw_queues);
void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
{
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);

preempt_disable();
__blk_mq_run_hw_queue(hctx);
preempt_enable();
}
EXPORT_SYMBOL(blk_mq_start_hw_queue);

Expand All @@ -662,7 +684,9 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q)
continue;

clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
preempt_disable();
blk_mq_run_hw_queue(hctx, true);
preempt_enable();
}
}
EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
Expand All @@ -672,7 +696,10 @@ static void blk_mq_work_fn(struct work_struct *work)
struct blk_mq_hw_ctx *hctx;

hctx = container_of(work, struct blk_mq_hw_ctx, delayed_work.work);

preempt_disable();
__blk_mq_run_hw_queue(hctx);
preempt_enable();
}

static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
Expand Down Expand Up @@ -716,10 +743,10 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
spin_unlock(&ctx->lock);
}

blk_mq_put_ctx(current_ctx);

if (run_queue)
blk_mq_run_hw_queue(hctx, async);

blk_mq_put_ctx(current_ctx);
}

static void blk_mq_insert_requests(struct request_queue *q,
Expand Down Expand Up @@ -755,9 +782,8 @@ static void blk_mq_insert_requests(struct request_queue *q,
}
spin_unlock(&ctx->lock);

blk_mq_put_ctx(current_ctx);

blk_mq_run_hw_queue(hctx, from_schedule);
blk_mq_put_ctx(current_ctx);
}

static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
Expand Down Expand Up @@ -876,7 +902,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)

if (unlikely(is_flush_fua)) {
blk_mq_bio_to_request(rq, bio);
blk_mq_put_ctx(ctx);
blk_insert_flush(rq);
goto run_queue;
}
Expand Down Expand Up @@ -914,7 +939,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
}

spin_unlock(&ctx->lock);
blk_mq_put_ctx(ctx);

/*
* For a SYNC request, send it to the hardware immediately. For an
Expand All @@ -923,6 +947,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
*/
run_queue:
blk_mq_run_hw_queue(hctx, !is_sync || is_flush_fua);
blk_mq_put_ctx(ctx);
}

/*
Expand Down Expand Up @@ -990,9 +1015,9 @@ static void blk_mq_hctx_notify(void *data, unsigned long action,
blk_mq_hctx_mark_pending(hctx, ctx);

spin_unlock(&ctx->lock);
blk_mq_put_ctx(ctx);

blk_mq_run_hw_queue(hctx, true);
blk_mq_put_ctx(ctx);
}

static int blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx,
Expand Down Expand Up @@ -1255,12 +1280,13 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
__ctx->queue = q;

/* If the cpu isn't online, the cpu is mapped to first hctx */
hctx = q->mq_ops->map_queue(q, i);
hctx->nr_ctx++;

if (!cpu_online(i))
continue;

hctx = q->mq_ops->map_queue(q, i);
cpumask_set_cpu(i, hctx->cpumask);
hctx->nr_ctx++;

/*
* Set local node, IFF we have more than one hw queue. If
* not, we remain on the home node of the device
Expand All @@ -1277,6 +1303,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
struct blk_mq_ctx *ctx;

queue_for_each_hw_ctx(q, hctx, i) {
cpumask_clear(hctx->cpumask);
hctx->nr_ctx = 0;
}

Expand All @@ -1285,7 +1312,11 @@ static void blk_mq_map_swqueue(struct request_queue *q)
*/
queue_for_each_ctx(q, ctx, i) {
/* If the cpu isn't online, the cpu is mapped to first hctx */
if (!cpu_online(i))
continue;

hctx = q->mq_ops->map_queue(q, i);
cpumask_set_cpu(i, hctx->cpumask);
ctx->index_hw = hctx->nr_ctx;
hctx->ctxs[hctx->nr_ctx++] = ctx;
}
Expand Down Expand Up @@ -1329,6 +1360,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
if (!hctxs[i])
goto err_hctxs;

if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL))
goto err_hctxs;

hctxs[i]->numa_node = NUMA_NO_NODE;
hctxs[i]->queue_num = i;
}
Expand Down Expand Up @@ -1392,6 +1426,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
for (i = 0; i < reg->nr_hw_queues; i++) {
if (!hctxs[i])
break;
free_cpumask_var(hctxs[i]->cpumask);
reg->ops->free_hctx(hctxs[i], i);
}
kfree(hctxs);
Expand All @@ -1413,6 +1448,7 @@ void blk_mq_free_queue(struct request_queue *q)
blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
if (q->mq_ops->exit_hctx)
q->mq_ops->exit_hctx(hctx, i);
free_cpumask_var(hctx->cpumask);
q->mq_ops->free_hctx(hctx, i);
}

Expand Down
1 change: 1 addition & 0 deletions include/linux/blk-mq.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ struct blk_mq_hw_ctx {

unsigned long state; /* BLK_MQ_S_* flags */
struct delayed_work delayed_work;
cpumask_var_t cpumask;

unsigned long flags; /* BLK_MQ_F_* flags */

Expand Down

0 comments on commit e4043dc

Please sign in to comment.