Skip to content

Commit

Permalink
null_blk: set a separate timer for each command
Browse files Browse the repository at this point in the history
For the Timer IRQ mode (i.e., when command completions are delayed),
there is one timer for each CPU. Each of these timers
. has a completion queue associated with it, containing all the
  command completions to be executed when the timer fires;
. is set, and a new completion-to-execute is inserted into its
  completion queue, every time the dispatch code for a new command
  happens to be executed on the CPU related to the timer.

This implies that, if the dispatch of a new command happens to be
executed on a CPU whose timer has already been set, but has not yet
fired, then the timer is set again, to the completion time of the
newly arrived command. When the timer eventually fires, all its queued
completions are executed.

This way of handling delayed command completions entails the following
problem: if more than one command completion is inserted into the
queue of a timer before the timer fires, then the expiration time for
the timer is moved forward every time each of these completions is
enqueued. As a consequence, only the last completion enqueued enjoys a
correct execution time, while all previous completions are unjustly
delayed until the last completion is executed (and at that time they
are executed all together).

Specifically, if all the above completions are enqueued almost at the
same time, then the problem is negligible. On the opposite end, if
every completion is enqueued a while after the previous completion was
enqueued (in the extreme case, it is enqueued only right before the
timer would have expired), then every enqueued completion, except for
the last one, experiences an inflated delay, proportional to the number
of completions enqueued after it. In the end, commands, and thus I/O
requests, may be completed at an arbitrarily lower rate than the
desired one.

This commit addresses this issue by replacing per-CPU timers with
per-command timers, i.e., by associating an individual timer with each
command.

Signed-off-by: Paolo Valente <paolo.valente@unimore.it>
Signed-off-by: Arianna Avanzini <avanzini@google.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
  • Loading branch information
Paolo Valente authored and Jens Axboe committed Dec 1, 2015
1 parent a88d32a commit 3c395a9
Showing 1 changed file with 24 additions and 55 deletions.
79 changes: 24 additions & 55 deletions drivers/block/null_blk.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ struct nullb_cmd {
struct bio *bio;
unsigned int tag;
struct nullb_queue *nq;
struct hrtimer timer;
};

struct nullb_queue {
Expand Down Expand Up @@ -49,17 +50,6 @@ static int null_major;
static int nullb_indexes;
static struct kmem_cache *ppa_cache;

struct completion_queue {
struct llist_head list;
struct hrtimer timer;
};

/*
* These are per-cpu for now, they will need to be configured by the
* complete_queues parameter and appropriately mapped.
*/
static DEFINE_PER_CPU(struct completion_queue, completion_queues);

enum {
NULL_IRQ_NONE = 0,
NULL_IRQ_SOFTIRQ = 1,
Expand Down Expand Up @@ -180,6 +170,8 @@ static void free_cmd(struct nullb_cmd *cmd)
put_tag(cmd->nq, cmd->tag);
}

static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer);

static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
{
struct nullb_cmd *cmd;
Expand All @@ -190,6 +182,11 @@ static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq)
cmd = &nq->cmds[tag];
cmd->tag = tag;
cmd->nq = nq;
if (irqmode == NULL_IRQ_TIMER) {
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
cmd->timer.function = null_cmd_timer_expired;
}
return cmd;
}

Expand Down Expand Up @@ -238,47 +235,28 @@ static void end_cmd(struct nullb_cmd *cmd)

static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
{
struct completion_queue *cq;
struct llist_node *entry;
struct nullb_cmd *cmd;

cq = &per_cpu(completion_queues, smp_processor_id());

while ((entry = llist_del_all(&cq->list)) != NULL) {
entry = llist_reverse_order(entry);
do {
struct request_queue *q = NULL;
struct nullb_cmd *cmd = container_of(timer, struct nullb_cmd, timer);
struct request_queue *q = NULL;

cmd = container_of(entry, struct nullb_cmd, ll_list);
entry = entry->next;
if (cmd->rq)
q = cmd->rq->q;
end_cmd(cmd);
if (cmd->rq)
q = cmd->rq->q;

if (q && !q->mq_ops && blk_queue_stopped(q)) {
spin_lock(q->queue_lock);
if (blk_queue_stopped(q))
blk_start_queue(q);
spin_unlock(q->queue_lock);
}
} while (entry);
if (q && !q->mq_ops && blk_queue_stopped(q)) {
spin_lock(q->queue_lock);
if (blk_queue_stopped(q))
blk_start_queue(q);
spin_unlock(q->queue_lock);
}
end_cmd(cmd);

return HRTIMER_NORESTART;
}

static void null_cmd_end_timer(struct nullb_cmd *cmd)
{
struct completion_queue *cq = &per_cpu(completion_queues, get_cpu());

cmd->ll_list.next = NULL;
if (llist_add(&cmd->ll_list, &cq->list)) {
ktime_t kt = ktime_set(0, completion_nsec);
ktime_t kt = ktime_set(0, completion_nsec);

hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL_PINNED);
}

put_cpu();
hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
}

static void null_softirq_done_fn(struct request *rq)
Expand Down Expand Up @@ -376,6 +354,10 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
{
struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);

if (irqmode == NULL_IRQ_TIMER) {
hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cmd->timer.function = null_cmd_timer_expired;
}
cmd->rq = bd->rq;
cmd->nq = hctx->driver_data;

Expand Down Expand Up @@ -813,19 +795,6 @@ static int __init null_init(void)

mutex_init(&lock);

/* Initialize a separate list for each CPU for issuing softirqs */
for_each_possible_cpu(i) {
struct completion_queue *cq = &per_cpu(completion_queues, i);

init_llist_head(&cq->list);

if (irqmode != NULL_IRQ_TIMER)
continue;

hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
cq->timer.function = null_cmd_timer_expired;
}

null_major = register_blkdev(0, "nullb");
if (null_major < 0)
return null_major;
Expand Down

0 comments on commit 3c395a9

Please sign in to comment.