From 489beb91e66a237254e23ac1a0fe1beac23d87c5 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Tue, 29 Aug 2017 10:33:44 -0700 Subject: [PATCH 1/7] nvme-fabrics: Convert nvmf_transports_mutex to an rwsem The mutex protects against the list of transports changing while a controller is being created, but using a plain old mutex means that it also serializes controller creation. This unnecessarily slows down creating multiple controllers - for example for the RDMA transport, creating a controller involves establishing one connection for every IO queue, which involves even more network/software round trips, so the delay can become significant. The simplest way to fix this is to change the mutex to an rwsem and only hold it for writing when the list is being mutated. Since we can take the rwsem for reading while creating a controller, we can create multiple controllers in parallel. Signed-off-by: Roland Dreier Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index fc3b6552f4675..53a9598e3aee3 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -22,7 +22,7 @@ #include "fabrics.h" static LIST_HEAD(nvmf_transports); -static DEFINE_MUTEX(nvmf_transports_mutex); +static DECLARE_RWSEM(nvmf_transports_rwsem); static LIST_HEAD(nvmf_hosts); static DEFINE_MUTEX(nvmf_hosts_mutex); @@ -495,9 +495,9 @@ int nvmf_register_transport(struct nvmf_transport_ops *ops) if (!ops->create_ctrl) return -EINVAL; - mutex_lock(&nvmf_transports_mutex); + down_write(&nvmf_transports_rwsem); list_add_tail(&ops->entry, &nvmf_transports); - mutex_unlock(&nvmf_transports_mutex); + up_write(&nvmf_transports_rwsem); return 0; } @@ -514,9 +514,9 @@ EXPORT_SYMBOL_GPL(nvmf_register_transport); */ void nvmf_unregister_transport(struct nvmf_transport_ops *ops) { - mutex_lock(&nvmf_transports_mutex); + down_write(&nvmf_transports_rwsem); list_del(&ops->entry); - mutex_unlock(&nvmf_transports_mutex); + up_write(&nvmf_transports_rwsem); } EXPORT_SYMBOL_GPL(nvmf_unregister_transport); @@ -525,7 +525,7 @@ static struct nvmf_transport_ops *nvmf_lookup_transport( { struct nvmf_transport_ops *ops; - lockdep_assert_held(&nvmf_transports_mutex); + lockdep_assert_held(&nvmf_transports_rwsem); list_for_each_entry(ops, &nvmf_transports, entry) { if (strcmp(ops->name, opts->transport) == 0) @@ -851,7 +851,7 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) goto out_free_opts; opts->mask &= ~NVMF_REQUIRED_OPTS; - mutex_lock(&nvmf_transports_mutex); + down_read(&nvmf_transports_rwsem); ops = nvmf_lookup_transport(opts); if (!ops) { pr_info("no handler found for transport %s.\n", @@ -878,16 +878,16 @@ nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) dev_warn(ctrl->device, "controller returned incorrect NQN: \"%s\".\n", ctrl->subnqn); - mutex_unlock(&nvmf_transports_mutex); + up_read(&nvmf_transports_rwsem); ctrl->ops->delete_ctrl(ctrl); return ERR_PTR(-EINVAL); } - mutex_unlock(&nvmf_transports_mutex); + up_read(&nvmf_transports_rwsem); return ctrl; out_unlock: - mutex_unlock(&nvmf_transports_mutex); + up_read(&nvmf_transports_rwsem); out_free_opts: nvmf_free_options(opts); return ERR_PTR(ret); From 1cad65620fecfb24cdeefa5533628a4f293783e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Aug 2017 17:46:01 -0400 Subject: [PATCH 2/7] nvme: factor metadata handling out of __nvme_submit_user_cmd Keep the metadata code in a separate helper instead of making the main function more complicated. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 76 +++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b0dd58db110ed..a520c841c63c2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -600,6 +600,40 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, } EXPORT_SYMBOL_GPL(nvme_submit_sync_cmd); +static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf, + unsigned len, u32 seed, bool write) +{ + struct bio_integrity_payload *bip; + int ret = -ENOMEM; + void *buf; + + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + goto out; + + ret = -EFAULT; + if (write && copy_from_user(buf, ubuf, len)) + goto out_free_meta; + + bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); + if (IS_ERR(bip)) { + ret = PTR_ERR(bip); + goto out_free_meta; + } + + bip->bip_iter.bi_size = len; + bip->bip_iter.bi_sector = seed; + ret = bio_integrity_add_page(bio, virt_to_page(buf), len, + offset_in_page(buf)); + if (ret == len) + return buf; + ret = -ENOMEM; +out_free_meta: + kfree(buf); +out: + return ERR_PTR(ret); +} + int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, @@ -625,46 +659,17 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, if (ret) goto out; bio = req->bio; - - if (!disk) - goto submit; bio->bi_disk = disk; - - if (meta_buffer && meta_len) { - struct bio_integrity_payload *bip; - - meta = kmalloc(meta_len, GFP_KERNEL); - if (!meta) { - ret = -ENOMEM; + if (disk && meta_buffer && meta_len) { + meta = nvme_add_user_metadata(bio, meta_buffer, meta_len, + meta_seed, write); + if (IS_ERR(meta)) { + ret = PTR_ERR(meta); goto out_unmap; } - - if (write) { - if (copy_from_user(meta, meta_buffer, - meta_len)) { - ret = -EFAULT; - goto out_free_meta; - } - } - - bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); - if (IS_ERR(bip)) { - ret = PTR_ERR(bip); - goto out_free_meta; - } - - bip->bip_iter.bi_size = meta_len; - bip->bip_iter.bi_sector = meta_seed; - - ret = bio_integrity_add_page(bio, virt_to_page(meta), - meta_len, offset_in_page(meta)); - if (ret != meta_len) { - ret = -ENOMEM; - goto out_free_meta; - } } } - submit: + blk_execute_rq(req->q, disk, req, 0); if (nvme_req(req)->flags & NVME_REQ_CANCELLED) ret = -EINTR; @@ -676,7 +681,6 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, if (copy_to_user(meta_buffer, meta, meta_len)) ret = -EFAULT; } - out_free_meta: kfree(meta); out_unmap: if (bio) From b5d8af5b521bdb4167808979df37e9defabeb707 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 29 Aug 2017 17:46:02 -0400 Subject: [PATCH 3/7] nvme/pci: Use req_op to determine DIF remapping Only read and write commands need DIF remapping. Everything else uses a passthrough integrity payload. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 544805a2421b6..11874afb2422c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -668,7 +668,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1) goto out_unmap; - if (rq_data_dir(req)) + if (req_op(req) == REQ_OP_WRITE) nvme_dif_remap(req, nvme_dif_prep); if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) @@ -696,7 +696,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) if (iod->nents) { dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); if (blk_integrity_rq(req)) { - if (!rq_data_dir(req)) + if (req_op(req) == REQ_OP_READ) nvme_dif_remap(req, nvme_dif_complete); dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir); } From 485783ca63e3a47fa5a30df8c6745a6299607e4d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 29 Aug 2017 17:46:03 -0400 Subject: [PATCH 4/7] nvme: Make nvme user functions static These functions are used only locally in the nvme core. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 10 +++++----- drivers/nvme/host/nvme.h | 7 ------- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a520c841c63c2..01f39a977f953 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -634,10 +634,10 @@ static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf, return ERR_PTR(ret); } -int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, - void __user *ubuffer, unsigned bufflen, - void __user *meta_buffer, unsigned meta_len, u32 meta_seed, - u32 *result, unsigned timeout) +static int __nvme_submit_user_cmd(struct request_queue *q, + struct nvme_command *cmd, void __user *ubuffer, + unsigned bufflen, void __user *meta_buffer, unsigned meta_len, + u32 meta_seed, u32 *result, unsigned timeout) { bool write = nvme_is_write(cmd); struct nvme_ns *ns = q->queuedata; @@ -690,7 +690,7 @@ int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, return ret; } -int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, +static int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, u32 *result, unsigned timeout) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 936c4056d98eb..a19a587d60ed5 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -314,13 +314,6 @@ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, unsigned timeout, int qid, int at_head, int flags); -int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, - void __user *ubuffer, unsigned bufflen, u32 *result, - unsigned timeout); -int __nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, - void __user *ubuffer, unsigned bufflen, - void __user *meta_buffer, unsigned meta_len, u32 meta_seed, - u32 *result, unsigned timeout); int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_start_keep_alive(struct nvme_ctrl *ctrl); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); From 63263d60e0f9f37bfd5e6a1e83a62f0e62fc459f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 29 Aug 2017 17:46:04 -0400 Subject: [PATCH 5/7] nvme: Use metadata for passthrough commands The ioctls' struct allows the user to provide a metadata address and length for a passthrough command. This patch uses these values that were previously ignored and deletes the now unused wrapper function. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 01f39a977f953..277a7a02cba5c 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -634,7 +634,7 @@ static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf, return ERR_PTR(ret); } -static int __nvme_submit_user_cmd(struct request_queue *q, +static int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, u32 *result, unsigned timeout) @@ -690,14 +690,6 @@ static int __nvme_submit_user_cmd(struct request_queue *q, return ret; } -static int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, - void __user *ubuffer, unsigned bufflen, u32 *result, - unsigned timeout) -{ - return __nvme_submit_user_cmd(q, cmd, ubuffer, bufflen, NULL, 0, 0, - result, timeout); -} - static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status) { struct nvme_ctrl *ctrl = rq->end_io_data; @@ -987,7 +979,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.apptag = cpu_to_le16(io.apptag); c.rw.appmask = cpu_to_le16(io.appmask); - return __nvme_submit_user_cmd(ns->queue, &c, + return nvme_submit_user_cmd(ns->queue, &c, (void __user *)(uintptr_t)io.addr, length, metadata, meta_len, io.slba, NULL, 0); } @@ -1025,7 +1017,8 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, (void __user *)(uintptr_t)cmd.addr, cmd.data_len, - &cmd.result, timeout); + (void __user *)(uintptr_t)cmd.metadata, cmd.metadata, + 0, &cmd.result, timeout); if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) return -EFAULT; From 28dd5cf70aaac2a12a16847ae0a978f0b0575194 Mon Sep 17 00:00:00 2001 From: Omri Mann Date: Wed, 30 Aug 2017 15:22:59 +0300 Subject: [PATCH 6/7] nvmet: add support for reporting the host identifier And fix the Get/Set Log Page implementation to take all 8 bits of the feature identifier into account. Signed-off-by: Omri Mann Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig [hch: used the UUID API, updated changelog] --- drivers/nvme/target/admin-cmd.c | 17 +++++++++++++++-- drivers/nvme/target/fabrics-cmd.c | 1 + drivers/nvme/target/nvmet.h | 1 + 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 9496c71d2257d..c4a0bf36e7521 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -443,7 +443,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req) u32 val32; u16 status = 0; - switch (cdw10 & 0xf) { + switch (cdw10 & 0xff) { case NVME_FEAT_NUM_QUEUES: nvmet_set_result(req, (subsys->max_qid - 1) | ((subsys->max_qid - 1) << 16)); @@ -453,6 +453,9 @@ static void nvmet_execute_set_features(struct nvmet_req *req) req->sq->ctrl->kato = DIV_ROUND_UP(val32, 1000); nvmet_set_result(req, req->sq->ctrl->kato); break; + case NVME_FEAT_HOST_ID: + status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; + break; default: status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; break; @@ -467,7 +470,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10[0]); u16 status = 0; - switch (cdw10 & 0xf) { + switch (cdw10 & 0xff) { /* * These features are mandatory in the spec, but we don't * have a useful way to implement them. We'll eventually @@ -501,6 +504,16 @@ static void nvmet_execute_get_features(struct nvmet_req *req) case NVME_FEAT_KATO: nvmet_set_result(req, req->sq->ctrl->kato * 1000); break; + case NVME_FEAT_HOST_ID: + /* need 128-bit host identifier flag */ + if (!(req->cmd->common.cdw10[1] & cpu_to_le32(1 << 0))) { + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; + break; + } + + status = nvmet_copy_to_sgl(req, 0, &req->sq->ctrl->hostid, + sizeof(req->sq->ctrl->hostid)); + break; default: status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; break; diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 3cc17269504bf..859a66725291d 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -154,6 +154,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) le32_to_cpu(c->kato), &ctrl); if (status) goto out; + uuid_copy(&ctrl->hostid, &d->hostid); status = nvmet_install_queue(ctrl, req); if (status) { diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index e3b244c7e443e..7d261ab894f47 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -115,6 +115,7 @@ struct nvmet_ctrl { u32 cc; u32 csts; + uuid_t hostid; u16 cntlid; u32 kato; From 40a5fce495715c48c2e02668144e68a507ac5a30 Mon Sep 17 00:00:00 2001 From: Daniel Verkamp Date: Wed, 30 Aug 2017 15:18:19 -0700 Subject: [PATCH 7/7] nvme-fabrics: generate spec-compliant UUID NQNs The default host NQN, which is generated based on the host's UUID, does not follow the UUID-based NQN format laid out in the NVMe 1.3 specification. Remove the "NVMf:" portion of the NQN to match the spec. Signed-off-by: Daniel Verkamp Reviewed-by: Max Gurtovoy Cc: stable@vger.kernel.org Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 53a9598e3aee3..47307752dc65d 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -75,7 +75,7 @@ static struct nvmf_host *nvmf_host_default(void) kref_init(&host->ref); snprintf(host->nqn, NVMF_NQN_SIZE, - "nqn.2014-08.org.nvmexpress:NVMf:uuid:%pUb", &host->id); + "nqn.2014-08.org.nvmexpress:uuid:%pUb", &host->id); mutex_lock(&nvmf_hosts_mutex); list_add_tail(&host->list, &nvmf_hosts);