Skip to content

Commit

Permalink
NVMe: Separate queue alloc/free from create/delete
Browse files Browse the repository at this point in the history
This separates nvme queue allocation from creation, and queue deletion
from freeing. This is so that we may in the future temporarily disable
queues and reuse the same memory when bringing them back online, like
coming back from suspend state.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
  • Loading branch information
Keith Busch authored and Matthew Wilcox committed Sep 3, 2013
1 parent 0877cb0 commit 2240427
Showing 1 changed file with 94 additions and 39 deletions.
133 changes: 94 additions & 39 deletions drivers/block/nvme-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ struct nvme_queue {
u16 cq_head;
u8 cq_phase;
u8 cqe_seen;
u8 q_suspended;
unsigned long cmdid_data[];
};

Expand Down Expand Up @@ -117,6 +118,11 @@ static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq)
return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)];
}

static unsigned nvme_queue_extra(int depth)
{
return DIV_ROUND_UP(depth, 8) + (depth * sizeof(struct nvme_cmd_info));
}

/**
* alloc_cmdid() - Allocate a Command ID
* @nvmeq: The queue that will be used for this command
Expand Down Expand Up @@ -784,7 +790,7 @@ static void nvme_make_request(struct request_queue *q, struct bio *bio)
int result = -EBUSY;

spin_lock_irq(&nvmeq->q_lock);
if (bio_list_empty(&nvmeq->sq_cong))
if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong))
result = nvme_submit_bio_queue(nvmeq, ns, bio);
if (unlikely(result)) {
if (bio_list_empty(&nvmeq->sq_cong))
Expand Down Expand Up @@ -1018,26 +1024,44 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
}
}

static void nvme_free_queue_mem(struct nvme_queue *nvmeq)
static void nvme_free_queue(struct nvme_queue *nvmeq)
{
spin_lock_irq(&nvmeq->q_lock);
while (bio_list_peek(&nvmeq->sq_cong)) {
struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
bio_endio(bio, -EIO);
}
spin_unlock_irq(&nvmeq->q_lock);

dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
nvmeq->sq_cmds, nvmeq->sq_dma_addr);
kfree(nvmeq);
}

static void nvme_free_queue(struct nvme_dev *dev, int qid)
static void nvme_free_queues(struct nvme_dev *dev)
{
int i;

for (i = dev->queue_count - 1; i >= 0; i--) {
nvme_free_queue(dev->queues[i]);
dev->queue_count--;
dev->queues[i] = NULL;
}
}

static void nvme_disable_queue(struct nvme_dev *dev, int qid)
{
struct nvme_queue *nvmeq = dev->queues[qid];
int vector = dev->entry[nvmeq->cq_vector].vector;

spin_lock_irq(&nvmeq->q_lock);
nvme_cancel_ios(nvmeq, false);
while (bio_list_peek(&nvmeq->sq_cong)) {
struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
bio_endio(bio, -EIO);
if (nvmeq->q_suspended) {
spin_unlock_irq(&nvmeq->q_lock);
return;
}
nvmeq->q_suspended = 1;
spin_unlock_irq(&nvmeq->q_lock);

irq_set_affinity_hint(vector, NULL);
Expand All @@ -1049,15 +1073,17 @@ static void nvme_free_queue(struct nvme_dev *dev, int qid)
adapter_delete_cq(dev, qid);
}

nvme_free_queue_mem(nvmeq);
spin_lock_irq(&nvmeq->q_lock);
nvme_process_cq(nvmeq);
nvme_cancel_ios(nvmeq, false);
spin_unlock_irq(&nvmeq->q_lock);
}

static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth, int vector)
{
struct device *dmadev = &dev->pci_dev->dev;
unsigned extra = DIV_ROUND_UP(depth, 8) + (depth *
sizeof(struct nvme_cmd_info));
unsigned extra = nvme_queue_extra(depth);
struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
if (!nvmeq)
return NULL;
Expand All @@ -1084,6 +1110,8 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
nvmeq->q_depth = depth;
nvmeq->cq_vector = vector;
nvmeq->q_suspended = 1;
dev->queue_count++;

return nvmeq;

Expand All @@ -1107,18 +1135,29 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq,
IRQF_DISABLED | IRQF_SHARED, name, nvmeq);
}

static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid,
int cq_size, int vector)
static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
{
int result;
struct nvme_queue *nvmeq = nvme_alloc_queue(dev, qid, cq_size, vector);
struct nvme_dev *dev = nvmeq->dev;
unsigned extra = nvme_queue_extra(nvmeq->q_depth);

if (!nvmeq)
return ERR_PTR(-ENOMEM);
nvmeq->sq_tail = 0;
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid << (dev->db_stride + 1)];
memset(nvmeq->cmdid_data, 0, extra);
memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
nvme_cancel_ios(nvmeq, false);
nvmeq->q_suspended = 0;
}

static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
{
struct nvme_dev *dev = nvmeq->dev;
int result;

result = adapter_alloc_cq(dev, qid, nvmeq);
if (result < 0)
goto free_nvmeq;
return result;

result = adapter_alloc_sq(dev, qid, nvmeq);
if (result < 0)
Expand All @@ -1128,19 +1167,17 @@ static struct nvme_queue *nvme_create_queue(struct nvme_dev *dev, int qid,
if (result < 0)
goto release_sq;

return nvmeq;
spin_lock(&nvmeq->q_lock);
nvme_init_queue(nvmeq, qid);
spin_unlock(&nvmeq->q_lock);

return result;

release_sq:
adapter_delete_sq(dev, qid);
release_cq:
adapter_delete_cq(dev, qid);
free_nvmeq:
dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
nvmeq->sq_cmds, nvmeq->sq_dma_addr);
kfree(nvmeq);
return ERR_PTR(result);
return result;
}

static int nvme_wait_ready(struct nvme_dev *dev, u64 cap, bool enabled)
Expand Down Expand Up @@ -1221,10 +1258,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
goto free_q;

dev->queues[0] = nvmeq;
spin_lock(&nvmeq->q_lock);
nvme_init_queue(nvmeq, 0);
spin_unlock(&nvmeq->q_lock);
return result;

free_q:
nvme_free_queue_mem(nvmeq);
nvme_free_queue(nvmeq);
return result;
}

Expand Down Expand Up @@ -1386,6 +1426,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
put_nvmeq(nvmeq);
if (length != (io.nblocks + 1) << ns->lba_shift)
status = -ENOMEM;
else if (!nvmeq || nvmeq->q_suspended)
status = -EBUSY;
else
status = nvme_submit_sync_cmd(nvmeq, &c, NULL, NVME_IO_TIMEOUT);

Expand Down Expand Up @@ -1537,9 +1579,12 @@ static int nvme_kthread(void *data)
if (!nvmeq)
continue;
spin_lock_irq(&nvmeq->q_lock);
if (nvmeq->q_suspended)
goto unlock;
nvme_process_cq(nvmeq);
nvme_cancel_ios(nvmeq, true);
nvme_resubmit_bios(nvmeq);
unlock:
spin_unlock_irq(&nvmeq->q_lock);
}
}
Expand Down Expand Up @@ -1725,7 +1770,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
nr_io_queues = vecs;

result = queue_request_irq(dev, dev->queues[0], "nvme admin");
/* XXX: handle failure here */
if (result)
goto free_queues;

cpu = cpumask_first(cpu_online_mask);
for (i = 0; i < nr_io_queues; i++) {
Expand All @@ -1736,26 +1782,32 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
q_depth = min_t(int, NVME_CAP_MQES(readq(&dev->bar->cap)) + 1,
NVME_Q_DEPTH);
for (i = 0; i < nr_io_queues; i++) {
dev->queues[i + 1] = nvme_create_queue(dev, i + 1, q_depth, i);
if (IS_ERR(dev->queues[i + 1]))
return PTR_ERR(dev->queues[i + 1]);
dev->queue_count++;
dev->queues[i + 1] = nvme_alloc_queue(dev, i + 1, q_depth, i);
if (!dev->queues[i + 1]) {
result = -ENOMEM;
goto free_queues;
}
}

for (; i < num_possible_cpus(); i++) {
int target = i % rounddown_pow_of_two(dev->queue_count - 1);
dev->queues[i + 1] = dev->queues[target + 1];
}

return 0;
}
for (i = 1; i < dev->queue_count; i++) {
result = nvme_create_queue(dev->queues[i], i);
if (result) {
for (--i; i > 0; i--)
nvme_disable_queue(dev, i);
goto free_queues;
}
}

static void nvme_free_queues(struct nvme_dev *dev)
{
int i;
return 0;

for (i = dev->queue_count - 1; i >= 0; i--)
nvme_free_queue(dev, i);
free_queues:
nvme_free_queues(dev);
return result;
}

/*
Expand Down Expand Up @@ -1887,6 +1939,10 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
static int nvme_dev_remove(struct nvme_dev *dev)
{
struct nvme_ns *ns, *next;
int i;

for (i = dev->queue_count - 1; i >= 0; i--)
nvme_disable_queue(dev, i);

spin_lock(&dev_list_lock);
list_del(&dev->node);
Expand Down Expand Up @@ -2037,7 +2093,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
result = nvme_configure_admin_queue(dev);
if (result)
goto unmap;
dev->queue_count++;

spin_lock(&dev_list_lock);
list_add(&dev->node, &dev_list);
Expand Down

0 comments on commit 2240427

Please sign in to comment.