From bc8fb906b0ff9339b4286698cb7cd9cd5b8c53eb Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 19 Sep 2022 12:36:46 -0700 Subject: [PATCH 01/31] nvme: handle effects after freeing the request If a reset occurs after the scan work attempts to issue a command, the reset may quisce the admin queue, which blocks the scan work's command from dispatching. The scan work will not be able to complete while the queue is quiesced. Meanwhile, the reset work will cancel all outstanding admin tags and wait until all requests have transitioned to idle, which includes the passthrough request. But the passthrough request won't be set to idle until after the scan_work flushes, so we're deadlocked. Fix this by handling the end effects after the request has been freed. Link: https://bugzilla.kernel.org/show_bug.cgi?id=216354 Reported-by: Jonathan Derrick Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 17 ++++++----------- drivers/nvme/host/ioctl.c | 9 ++++++++- drivers/nvme/host/nvme.h | 4 +++- drivers/nvme/target/passthru.c | 7 ++++++- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8c9c1176624da..ea6694fd550f5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1111,8 +1111,8 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return effects; } -static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, - struct nvme_command *cmd, int status) +void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, + struct nvme_command *cmd, int status) { if (effects & NVME_CMD_EFFECTS_CSE_MASK) { nvme_unfreeze(ctrl); @@ -1148,21 +1148,16 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, break; } } +EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, NVME_TARGET_PASSTHRU); -int nvme_execute_passthru_rq(struct request *rq) +int nvme_execute_passthru_rq(struct request *rq, u32 *effects) { struct nvme_command *cmd = nvme_req(rq)->cmd; struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; struct nvme_ns *ns = rq->q->queuedata; - u32 effects; - int ret; - effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); - ret = nvme_execute_rq(rq, false); - if (effects) /* nothing to be done for zero cmd effects */ - nvme_passthru_end(ctrl, effects, cmd, ret); - - return ret; + *effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); + return nvme_execute_rq(rq, false); } EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU); diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 27614bee73806..d3281f87cd6e4 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -136,9 +136,11 @@ static int nvme_submit_user_cmd(struct request_queue *q, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, u32 meta_seed, u64 *result, unsigned timeout, bool vec) { + struct nvme_ctrl *ctrl; struct request *req; void *meta = NULL; struct bio *bio; + u32 effects; int ret; req = nvme_alloc_user_request(q, cmd, ubuffer, bufflen, meta_buffer, @@ -147,8 +149,9 @@ static int nvme_submit_user_cmd(struct request_queue *q, return PTR_ERR(req); bio = req->bio; + ctrl = nvme_req(req)->ctrl; - ret = nvme_execute_passthru_rq(req); + ret = nvme_execute_passthru_rq(req, &effects); if (result) *result = le64_to_cpu(nvme_req(req)->result.u64); @@ -158,6 +161,10 @@ static int nvme_submit_user_cmd(struct request_queue *q, if (bio) blk_rq_unmap_user(bio); blk_mq_free_request(req); + + if (effects) + nvme_passthru_end(ctrl, effects, cmd, ret); + return ret; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1bdf714dcd9e4..a0bf9560cf678 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1023,7 +1023,9 @@ static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {}; u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode); -int nvme_execute_passthru_rq(struct request *rq); +int nvme_execute_passthru_rq(struct request *rq, u32 *effects); +void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects, + struct nvme_command *cmd, int status); struct nvme_ctrl *nvme_ctrl_from_file(struct file *file); struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid); void nvme_put_ns(struct nvme_ns *ns); diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 6f39a29828b12..94d3153bae54d 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -215,9 +215,11 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) { struct nvmet_req *req = container_of(w, struct nvmet_req, p.work); struct request *rq = req->p.rq; + struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; + u32 effects; int status; - status = nvme_execute_passthru_rq(rq); + status = nvme_execute_passthru_rq(rq, &effects); if (status == NVME_SC_SUCCESS && req->cmd->common.opcode == nvme_admin_identify) { @@ -238,6 +240,9 @@ static void nvmet_passthru_execute_cmd_work(struct work_struct *w) req->cqe->result = nvme_req(rq)->result; nvmet_req_complete(req, status); blk_mq_free_request(rq); + + if (effects) + nvme_passthru_end(ctrl, effects, req->cmd, status); } static void nvmet_passthru_req_done(struct request *rq, From a8eb6c1ba48bddea82e8d74cbe6e119f006be97d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 19 Sep 2022 12:45:08 -0700 Subject: [PATCH 02/31] nvme: copy firmware_rev on each init The firmware revision can change on after a reset so copy the most recent info each time instead of just the first time, otherwise the sysfs firmware_rev entry may contain stale data. Reported-by: Jeff Lien Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Reviewed-by: Chao Leng Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ea6694fd550f5..e56ecc7fda2d9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2893,7 +2893,6 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) nvme_init_subnqn(subsys, ctrl, id); memcpy(subsys->serial, id->sn, sizeof(subsys->serial)); memcpy(subsys->model, id->mn, sizeof(subsys->model)); - memcpy(subsys->firmware_rev, id->fr, sizeof(subsys->firmware_rev)); subsys->vendor_id = le16_to_cpu(id->vid); subsys->cmic = id->cmic; @@ -3112,6 +3111,8 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->quirks |= core_quirks[i].quirks; } } + memcpy(ctrl->subsys->firmware_rev, id->fr, + sizeof(ctrl->subsys->firmware_rev)); if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) { dev_warn(ctrl->device, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n"); From 23e085b2dead13b51fe86d27069895b740f749c0 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 22 Sep 2022 07:54:06 -0700 Subject: [PATCH 03/31] nvme: restrict management ioctls to admin The passthrough commands already have this restriction, but the other operations do not. Require the same capabilities for all users as all of these operations, which include resets and rescans, can be disruptive. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index d3281f87cd6e4..a48a79ed5c4c5 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -764,11 +764,17 @@ long nvme_dev_ioctl(struct file *file, unsigned int cmd, case NVME_IOCTL_IO_CMD: return nvme_dev_user_cmd(ctrl, argp); case NVME_IOCTL_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; dev_warn(ctrl->device, "resetting controller\n"); return nvme_reset_ctrl_sync(ctrl); case NVME_IOCTL_SUBSYS_RESET: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; return nvme_reset_subsystem(ctrl); case NVME_IOCTL_RESCAN: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; nvme_queue_scan(ctrl); return 0; default: From 1e866afd4bcdd01a70a5eddb4371158d3035ce03 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 22 Sep 2022 08:13:47 -0700 Subject: [PATCH 04/31] nvme: ensure subsystem reset is single threaded The subsystem reset writes to a register, so we have to ensure the device state is capable of handling that otherwise the driver may access unmapped registers. Use the state machine to ensure the subsystem reset doesn't try to write registers on a device already undergoing this type of reset. Link: https://bugzilla.kernel.org/show_bug.cgi?id=214771 Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a0bf9560cf678..70555022cb445 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -602,11 +602,23 @@ static inline void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inj) static inline void nvme_should_fail(struct request *req) {} #endif +bool nvme_wait_reset(struct nvme_ctrl *ctrl); +int nvme_try_sched_reset(struct nvme_ctrl *ctrl); + static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) { + int ret; + if (!ctrl->subsystem) return -ENOTTY; - return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); + if (!nvme_wait_reset(ctrl)) + return -EBUSY; + + ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); + if (ret) + return ret; + + return nvme_try_sched_reset(ctrl); } /* @@ -712,7 +724,6 @@ void nvme_cancel_tagset(struct nvme_ctrl *ctrl); void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); -bool nvme_wait_reset(struct nvme_ctrl *ctrl); int nvme_disable_ctrl(struct nvme_ctrl *ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); @@ -802,7 +813,6 @@ int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); int nvme_reset_ctrl(struct nvme_ctrl *ctrl); int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); -int nvme_try_sched_reset(struct nvme_ctrl *ctrl); int nvme_delete_ctrl(struct nvme_ctrl *ctrl); void nvme_queue_scan(struct nvme_ctrl *ctrl); int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, u8 csi, From bf093d971695f30e312d2d0567c5feecfbcef450 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 22 Sep 2022 11:15:36 +0300 Subject: [PATCH 05/31] nvme: enumerate controller flags We expect to grow a few of these flags for various purposes so make them a proper enumeration. Signed-off-by: Sagi Grimberg Reviewed-by: James Smart Reviewed-by: Daniel Wagner Signed-off-by: Christoph Hellwig --- drivers/nvme/host/nvme.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 70555022cb445..6b4984e8b3639 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -233,6 +233,11 @@ struct nvme_fault_inject { #endif }; +enum nvme_ctrl_flags { + NVME_CTRL_FAILFAST_EXPIRED = 0, + NVME_CTRL_ADMIN_Q_STOPPED = 1, +}; + struct nvme_ctrl { bool comp_seen; enum nvme_ctrl_state state; @@ -354,8 +359,6 @@ struct nvme_ctrl { u16 maxcmd; int nr_reconnects; unsigned long flags; -#define NVME_CTRL_FAILFAST_EXPIRED 0 -#define NVME_CTRL_ADMIN_Q_STOPPED 1 struct nvmf_ctrl_options *opts; struct page *discard_page; From f46ef9e87c9e8941b7acee45611c7c6a322592bb Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 22 Sep 2022 11:15:37 +0300 Subject: [PATCH 06/31] nvme: send a rediscover uevent when a persistent discovery controller reconnects When a discovery controller is disconnected, no AENs will arrive to notify the host about discovery log change events. In order to solve this, send a uevent notification when a persistent discovery controller reconnects. We add a new ctrl flag NVME_CTRL_STARTED_ONCE that will be set on the first start, and consecutive calls will find it set, and send the event to userspace if the controller is a discovery controller. Upon the event reception, userspace will re-read the discovery log page and will act upon changes as it sees fit. Signed-off-by: Sagi Grimberg Reviewed-by: Daniel Wagner Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 10 ++++++++++ drivers/nvme/host/nvme.h | 1 + 2 files changed, 11 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e56ecc7fda2d9..0de2c227c1ab5 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4815,6 +4815,16 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) nvme_enable_aen(ctrl); + /* + * persistent discovery controllers need to send indication to userspace + * to re-read the discovery log page to learn about possible changes + * that were missed. We identify persistent discovery controllers by + * checking that they started once before, hence are reconnecting back. + */ + if (test_and_set_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags) && + nvme_discovery_ctrl(ctrl)) + nvme_change_uevent(ctrl, "NVME_EVENT=rediscover"); + if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); nvme_start_queues(ctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 6b4984e8b3639..a8b054cd2ebbd 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -236,6 +236,7 @@ struct nvme_fault_inject { enum nvme_ctrl_flags { NVME_CTRL_FAILFAST_EXPIRED = 0, NVME_CTRL_ADMIN_Q_STOPPED = 1, + NVME_CTRL_STARTED_ONCE = 2, }; struct nvme_ctrl { From 61ce339f19fabbc3e51237148a7ef6f2270e44fa Mon Sep 17 00:00:00 2001 From: Rishabh Bhatnagar Date: Tue, 20 Sep 2022 19:19:32 +0000 Subject: [PATCH 07/31] nvme-pci: set min_align_mask before calculating max_hw_sectors If swiotlb is force enabled dma_max_mapping_size ends up calling swiotlb_max_mapping_size which takes into account the min align mask for the device. Set the min align mask for nvme driver before calling dma_max_mapping_size while calculating max hw sectors. Signed-off-by: Rishabh Bhatnagar Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 70b1922c8953f..e4230f2c98e85 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2831,6 +2831,8 @@ static void nvme_reset_work(struct work_struct *work) nvme_start_admin_queue(&dev->ctrl); } + dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); + /* * Limit the max command size to prevent iod->sg allocations going * over a single page. @@ -2843,7 +2845,6 @@ static void nvme_reset_work(struct work_struct *work) * Don't limit the IOMMU merged segment size. */ dma_set_max_seg_size(dev->dev, 0xffffffff); - dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); mutex_unlock(&dev->shutdown_lock); From 6ee742fa8e5a7ee051604b16ec2ee6545461e794 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 26 Sep 2022 14:01:07 -0700 Subject: [PATCH 08/31] nvme-pci: report the actual number of tagset maps We've been reporting 2 maps regardless of whether the module parameter asked for anything beyond the default queues. A consequence of this means that blk-mq will reinitialize the all the hardware contexts and io schedulers on every controller reset when the mapping is exactly the same as before. This unnecessary overhead is adding several milliseconds on a reset for environments that don't need it. Report the actual number of mappings in use. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e4230f2c98e85..8dd1dddbbbbfc 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2526,9 +2526,11 @@ static void nvme_pci_alloc_tag_set(struct nvme_dev *dev) set->ops = &nvme_mq_ops; set->nr_hw_queues = dev->online_queues - 1; - set->nr_maps = 2; /* default + read */ + set->nr_maps = 1; + if (dev->io_queues[HCTX_TYPE_READ]) + set->nr_maps = 2; if (dev->io_queues[HCTX_TYPE_POLL]) - set->nr_maps++; + set->nr_maps = 3; set->timeout = NVME_IO_TIMEOUT; set->numa_node = dev->ctrl.numa_node; set->queue_depth = min_t(unsigned, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; From db94f240280c1da8ba1e679c422312676549570d Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Tue, 20 Sep 2022 21:16:17 +0800 Subject: [PATCH 09/31] nvmet-tcp: fix NULL pointer dereference during release nvmet-tcp frees CMD buffers in nvmet_tcp_uninit_data_in_cmds(), and waits the inflight IO requests in nvmet_sq_destroy(). During wait the inflight IO requests, the callback nvmet_tcp_queue_response() is called from backend after IO complete, this leads a typical Use-After-Free issue like this: BUG: kernel NULL pointer dereference, address: 0000000000000008 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 107f80067 P4D 107f80067 PUD 10789e067 PMD 0 Oops: 0000 [#1] PREEMPT SMP NOPTI CPU: 1 PID: 123 Comm: kworker/1:1H Kdump: loaded Tainted: G E 6.0.0-rc2.bm.1-amd64 #15 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 Workqueue: nvmet_tcp_wq nvmet_tcp_io_work [nvmet_tcp] RIP: 0010:shash_ahash_digest+0x2b/0x110 Code: 1f 44 00 00 41 57 41 56 41 55 41 54 55 48 89 fd 53 48 89 f3 48 83 ec 08 44 8b 67 30 45 85 e4 74 1c 48 8b 57 38 b8 00 10 00 00 <44> 8b 7a 08 44 29 f8 39 42 0c 0f 46 42 0c 41 39 c4 76 43 48 8b 03 RSP: 0018:ffffc9000051bdd8 EFLAGS: 00010206 RAX: 0000000000001000 RBX: ffff888100ab5470 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff888100ab5470 RDI: ffff888100ab5420 RBP: ffff888100ab5420 R08: ffff8881024d08c8 R09: ffff888103e1b4b8 R10: 8080808080808080 R11: 0000000000000000 R12: 0000000000001000 R13: 0000000000000000 R14: ffff88813412bd4c R15: ffff8881024d0800 FS: 0000000000000000(0000) GS:ffff88883fa40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000104b48000 CR4: 0000000000350ee0 Call Trace: nvmet_tcp_io_work+0xa52/0xb52 [nvmet_tcp] ? __switch_to+0x106/0x420 process_one_work+0x1ae/0x380 ? process_one_work+0x380/0x380 worker_thread+0x30/0x360 ? process_one_work+0x380/0x380 kthread+0xe6/0x110 ? kthread_complete_and_exit+0x20/0x20 ret_from_fork+0x1f/0x30 Separate nvmet_tcp_uninit_data_in_cmds() into two steps: uninit data in cmds <- new step 1 nvmet_sq_destroy(); cancel_work_sync(&queue->io_work); free CMD buffers <- new step 2 Signed-off-by: zhenwei pi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index c07de4f4f7197..70baeab6af30a 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1406,14 +1406,26 @@ static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) for (i = 0; i < queue->nr_cmds; i++, cmd++) { if (nvmet_tcp_need_data_in(cmd)) nvmet_req_uninit(&cmd->req); - - nvmet_tcp_free_cmd_buffers(cmd); } if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) { /* failed in connect */ - nvmet_tcp_finish_cmd(&queue->connect); + nvmet_req_uninit(&queue->connect.req); + } +} + +static void nvmet_tcp_free_cmd_data_in_buffers(struct nvmet_tcp_queue *queue) +{ + struct nvmet_tcp_cmd *cmd = queue->cmds; + int i; + + for (i = 0; i < queue->nr_cmds; i++, cmd++) { + if (nvmet_tcp_need_data_in(cmd)) + nvmet_tcp_free_cmd_buffers(cmd); } + + if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) + nvmet_tcp_free_cmd_buffers(&queue->connect); } static void nvmet_tcp_release_queue_work(struct work_struct *w) @@ -1434,6 +1446,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) nvmet_tcp_uninit_data_in_cmds(queue); nvmet_sq_destroy(&queue->nvme_sq); cancel_work_sync(&queue->io_work); + nvmet_tcp_free_cmd_data_in_buffers(queue); sock_release(queue->sock); nvmet_tcp_free_cmds(queue); if (queue->hdr_digest || queue->data_digest) From f614b937d850193588f161ff854a4e19940a5e43 Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 21 Sep 2022 00:04:44 +0530 Subject: [PATCH 10/31] nvmet-tcp: handle ICReq PDU received in NVMET_TCP_Q_LIVE state As per NVMe/TCP transport specification ICReq PDU is the first PDU received by the controller and controller should receive only one ICReq PDU. If controller receives more than one ICReq PDU then this can be considered as fatal error. nvmet-tcp driver does not check for ICReq PDU opcode if queue state is NVMET_TCP_Q_LIVE. In LIVE state ICReq PDU is treated as CapsuleCmd PDU, this can result in abnormal behavior. Add a check for ICReq PDU in nvmet_tcp_done_recv_pdu() to fix this issue. Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 70baeab6af30a..1762e2e905853 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -961,6 +961,13 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) return nvmet_tcp_handle_icreq(queue); } + if (unlikely(hdr->type == nvme_tcp_icreq)) { + pr_err("queue %d: received icreq pdu in state %d\n", + queue->idx, queue->state); + nvmet_tcp_fatal_error(queue); + return -EPROTO; + } + if (hdr->type == nvme_tcp_h2c_data) { ret = nvmet_tcp_handle_h2c_data_pdu(queue); if (unlikely(ret)) From b6a545ffa2c192b1e6da4a7924edac5ba9f4ea2b Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Wed, 21 Sep 2022 00:06:49 +0530 Subject: [PATCH 11/31] nvmet-tcp: add bounds check on Transfer Tag ttag is used as an index to get cmd in nvmet_tcp_handle_h2c_data_pdu(), add a bounds check to avoid out-of-bounds access. Signed-off-by: Varun Prakash Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 1762e2e905853..98e17ea184d61 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -920,10 +920,17 @@ static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) struct nvme_tcp_data_pdu *data = &queue->pdu.data; struct nvmet_tcp_cmd *cmd; - if (likely(queue->nr_cmds)) + if (likely(queue->nr_cmds)) { + if (unlikely(data->ttag >= queue->nr_cmds)) { + pr_err("queue %d: received out of bound ttag %u, nr_cmds %u\n", + queue->idx, data->ttag, queue->nr_cmds); + nvmet_tcp_fatal_error(queue); + return -EPROTO; + } cmd = &queue->cmds[data->ttag]; - else + } else { cmd = &queue->connect; + } if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) { pr_err("ttag %u unexpected data offset %u (expected %u)\n", From 0700542a823ba3d3aa9c699a255aecc23dbbcaff Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Thu, 22 Sep 2022 15:06:16 +0800 Subject: [PATCH 12/31] nvmet-tcp: remove nvmet_tcp_finish_cmd There is only a single call-site of nvmet_tcp_finish_cmd(), this becomes redundant. Remove nvmet_tcp_finish_cmd() and use the original function body instead. Suggested-by: Sagi Grimberg Signed-off-by: zhenwei pi Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 98e17ea184d61..63d72b9cb28dc 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -164,7 +164,6 @@ static DEFINE_MUTEX(nvmet_tcp_queue_mutex); static struct workqueue_struct *nvmet_tcp_wq; static const struct nvmet_fabrics_ops nvmet_tcp_ops; static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); -static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd); static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, @@ -1177,7 +1176,8 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) queue->idx, cmd->req.cmd->common.command_id, queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst), le32_to_cpu(cmd->exp_ddgst)); - nvmet_tcp_finish_cmd(cmd); + nvmet_req_uninit(&cmd->req); + nvmet_tcp_free_cmd_buffers(cmd); nvmet_tcp_fatal_error(queue); ret = -EPROTO; goto out; @@ -1406,12 +1406,6 @@ static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) write_unlock_bh(&sock->sk->sk_callback_lock); } -static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) -{ - nvmet_req_uninit(&cmd->req); - nvmet_tcp_free_cmd_buffers(cmd); -} - static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) { struct nvmet_tcp_cmd *cmd = queue->cmds; From 1befd944e05050d76950014f3dc04ed47faba2c3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 15:37:18 +0200 Subject: [PATCH 13/31] nvmet-auth: don't try to cancel a non-initialized work_struct Currently blktests nvme/002 trips up debugobjects if CONFIG_NVME_AUTH is enabled, but authentication is not on a queue. This is because nvmet_auth_sq_free cancels sq->auth_expired_work unconditionaly, while auth_expired_work is only ever initialized if authentication is enabled for a given controller. Fix this by calling most of what is nvmet_init_auth unconditionally when initializing the SQ, and just do the setting of the result field in the connect command handler. Fixes: db1312dd9548 ("nvmet: implement basic In-Band Authentication") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke --- drivers/nvme/target/core.c | 1 + drivers/nvme/target/fabrics-cmd-auth.c | 13 ++++--------- drivers/nvme/target/fabrics-cmd.c | 6 ++++-- drivers/nvme/target/nvmet.h | 7 ++++--- 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a1345790005f4..8e3cf0c3588ce 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -830,6 +830,7 @@ int nvmet_sq_init(struct nvmet_sq *sq) } init_completion(&sq->free_done); init_completion(&sq->confirm_done); + nvmet_auth_sq_init(sq); return 0; } diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index 84601e38a335a..7970a7640e585 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -23,17 +23,12 @@ static void nvmet_auth_expired_work(struct work_struct *work) sq->dhchap_tid = -1; } -void nvmet_init_auth(struct nvmet_ctrl *ctrl, struct nvmet_req *req) +void nvmet_auth_sq_init(struct nvmet_sq *sq) { - u32 result = le32_to_cpu(req->cqe->result.u32); - /* Initialize in-band authentication */ - INIT_DELAYED_WORK(&req->sq->auth_expired_work, - nvmet_auth_expired_work); - req->sq->authenticated = false; - req->sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE; - result |= (u32)NVME_CONNECT_AUTHREQ_ATR << 16; - req->cqe->result.u32 = cpu_to_le32(result); + INIT_DELAYED_WORK(&sq->auth_expired_work, nvmet_auth_expired_work); + sq->authenticated = false; + sq->dhchap_step = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE; } static u16 nvmet_auth_negotiate(struct nvmet_req *req, void *d) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index c1dfdfb92ebf0..c7e903589d377 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -272,7 +272,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); if (nvmet_has_auth(ctrl)) - nvmet_init_auth(ctrl, req); + req->cqe->result.u32 |= + cpu_to_le32((u32)NVME_CONNECT_AUTHREQ_ATR << 16); out: kfree(d); complete: @@ -333,7 +334,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); if (nvmet_has_auth(ctrl)) - nvmet_init_auth(ctrl, req); + req->cqe->result.u32 |= + cpu_to_le32((u32)NVME_CONNECT_AUTHREQ_ATR << 16); out: kfree(d); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 6ffeeb0a1c49e..dfe3894205aa7 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -704,7 +704,7 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret, bool set_ctrl); int nvmet_auth_set_host_hash(struct nvmet_host *host, const char *hash); int nvmet_setup_auth(struct nvmet_ctrl *ctrl); -void nvmet_init_auth(struct nvmet_ctrl *ctrl, struct nvmet_req *req); +void nvmet_auth_sq_init(struct nvmet_sq *sq); void nvmet_destroy_auth(struct nvmet_ctrl *ctrl); void nvmet_auth_sq_free(struct nvmet_sq *sq); int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id); @@ -726,8 +726,9 @@ static inline int nvmet_setup_auth(struct nvmet_ctrl *ctrl) { return 0; } -static inline void nvmet_init_auth(struct nvmet_ctrl *ctrl, - struct nvmet_req *req) {}; +static inline void nvmet_auth_sq_init(struct nvmet_sq *sq) +{ +} static inline void nvmet_destroy_auth(struct nvmet_ctrl *ctrl) {}; static inline void nvmet_auth_sq_free(struct nvmet_sq *sq) {}; static inline bool nvmet_check_auth_status(struct nvmet_req *req) From 1c32a8012b7fabe469b6af826edfd4ae2a6201d3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 15:38:58 +0200 Subject: [PATCH 14/31] nvme: improve the NVME_CONNECT_AUTHREQ* definitions Mark them as unsigned so that we don't need extra casts, and define them relative to cdword0 instead of requiring extra shifts. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke --- drivers/nvme/target/fabrics-cmd.c | 6 ++---- include/linux/nvme.h | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index c7e903589d377..618f7adca70fd 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -272,8 +272,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); if (nvmet_has_auth(ctrl)) - req->cqe->result.u32 |= - cpu_to_le32((u32)NVME_CONNECT_AUTHREQ_ATR << 16); + req->cqe->result.u32 |= cpu_to_le32(NVME_CONNECT_AUTHREQ_ATR); out: kfree(d); complete: @@ -334,8 +333,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); if (nvmet_has_auth(ctrl)) - req->cqe->result.u32 |= - cpu_to_le32((u32)NVME_CONNECT_AUTHREQ_ATR << 16); + req->cqe->result.u32 |= cpu_to_le32(NVME_CONNECT_AUTHREQ_ATR); out: kfree(d); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index ae53d74f3696a..050d7d0cd81b0 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1482,8 +1482,8 @@ struct nvmf_connect_command { }; enum { - NVME_CONNECT_AUTHREQ_ASCR = (1 << 2), - NVME_CONNECT_AUTHREQ_ATR = (1 << 1), + NVME_CONNECT_AUTHREQ_ASCR = (1U << 18), + NVME_CONNECT_AUTHREQ_ATR = (1U << 17), }; struct nvmf_connect_data { From ab46d8d40f01487bf637428c4767f0e75ac2a95a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 16:09:57 +0200 Subject: [PATCH 15/31] nvmet: add helpers to set the result field for connect commands The code to set the result field for the admin and I/O connect commands is not only verbose and duplicated, but also violates the aliasing rules as it accesses both the u16 and u32 members in the union. Add a little helper to sort all that out. Fixes: db1312dd9548 ("nvmet: implement basic In-Band Authentication") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke --- drivers/nvme/target/fabrics-cmd.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 618f7adca70fd..43b5bd8bb6a52 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -198,6 +198,12 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) return ret; } +static u32 nvmet_connect_result(struct nvmet_ctrl *ctrl) +{ + return (u32)ctrl->cntlid | + (nvmet_has_auth(ctrl) ? NVME_CONNECT_AUTHREQ_ATR : 0); +} + static void nvmet_execute_admin_connect(struct nvmet_req *req) { struct nvmf_connect_command *c = &req->cmd->connect; @@ -269,10 +275,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, ctrl->pi_support ? " T10-PI is enabled" : "", nvmet_has_auth(ctrl) ? " with DH-HMAC-CHAP" : ""); - req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); - - if (nvmet_has_auth(ctrl)) - req->cqe->result.u32 |= cpu_to_le32(NVME_CONNECT_AUTHREQ_ATR); + req->cqe->result.u32 = cpu_to_le32(nvmet_connect_result(ctrl)); out: kfree(d); complete: @@ -328,13 +331,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) if (status) goto out_ctrl_put; - /* pass back cntlid for successful completion */ - req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); - pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); - if (nvmet_has_auth(ctrl)) - req->cqe->result.u32 |= cpu_to_le32(NVME_CONNECT_AUTHREQ_ATR); - + req->cqe->result.u32 = cpu_to_le32(nvmet_connect_result(ctrl)); out: kfree(d); complete: From 3c69ed7aa546bd79b01977cf2c0a603cdb9e8ad5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Sep 2022 08:33:14 +0200 Subject: [PATCH 16/31] nvme-auth: add a MAINTAINERS entry Add Hannes as the nvme-auth maintainer. Signed-off-by: Christoph Hellwig Acked-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 9d7f64dc0efe8..47f27eea29ba5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14542,6 +14542,15 @@ F: drivers/nvme/common/ F: include/linux/nvme* F: include/uapi/linux/nvme_ioctl.h +NVM EXPRESS FABRICS AUTHENTICATION +M: Hannes Reinecke +L: linux-nvme@lists.infradead.org +S: Supported +F: drivers/nvme/host/auth.c +F: drivers/nvme/target/auth.c +F: drivers/nvme/target/fabrics-cmd-auth.c +F: include/linux/nvme-auth.h + NVM EXPRESS FC TRANSPORT DRIVERS M: James Smart L: linux-nvme@lists.infradead.org From fe60e8c534118a288cd251a59d747cbf5c03e160 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 4 Sep 2022 15:18:30 +0300 Subject: [PATCH 17/31] nvme: add common helpers to allocate and free tagsets Add common helpers to allocate and tear down the admin and I/O tag sets, including the special queues allocated with them. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 102 +++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 8 +++ 2 files changed, 110 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0de2c227c1ab5..f2786a6b1a85f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4796,6 +4796,108 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, } EXPORT_SYMBOL_GPL(nvme_complete_async_event); +int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, unsigned int flags, + unsigned int cmd_size) +{ + int ret; + + memset(set, 0, sizeof(*set)); + set->ops = ops; + set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; + if (ctrl->ops->flags & NVME_F_FABRICS) + set->reserved_tags = NVMF_RESERVED_TAGS; + set->numa_node = ctrl->numa_node; + set->flags = flags; + set->cmd_size = cmd_size; + set->driver_data = ctrl; + set->nr_hw_queues = 1; + set->timeout = NVME_ADMIN_TIMEOUT; + ret = blk_mq_alloc_tag_set(set); + if (ret) + return ret; + + ctrl->admin_q = blk_mq_init_queue(set); + if (IS_ERR(ctrl->admin_q)) { + ret = PTR_ERR(ctrl->admin_q); + goto out_free_tagset; + } + + if (ctrl->ops->flags & NVME_F_FABRICS) { + ctrl->fabrics_q = blk_mq_init_queue(set); + if (IS_ERR(ctrl->fabrics_q)) { + ret = PTR_ERR(ctrl->fabrics_q); + goto out_cleanup_admin_q; + } + } + + ctrl->admin_tagset = set; + return 0; + +out_cleanup_admin_q: + blk_mq_destroy_queue(ctrl->fabrics_q); +out_free_tagset: + blk_mq_free_tag_set(ctrl->admin_tagset); + return ret; +} +EXPORT_SYMBOL_GPL(nvme_alloc_admin_tag_set); + +void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl) +{ + blk_mq_destroy_queue(ctrl->admin_q); + if (ctrl->ops->flags & NVME_F_FABRICS) + blk_mq_destroy_queue(ctrl->fabrics_q); + blk_mq_free_tag_set(ctrl->admin_tagset); +} +EXPORT_SYMBOL_GPL(nvme_remove_admin_tag_set); + +int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, unsigned int flags, + unsigned int cmd_size) +{ + int ret; + + memset(set, 0, sizeof(*set)); + set->ops = ops; + set->queue_depth = ctrl->sqsize + 1; + set->reserved_tags = NVMF_RESERVED_TAGS; + set->numa_node = ctrl->numa_node; + set->flags = flags; + set->cmd_size = cmd_size, + set->driver_data = ctrl; + set->nr_hw_queues = ctrl->queue_count - 1; + set->timeout = NVME_IO_TIMEOUT; + if (ops->map_queues) + set->nr_maps = ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; + ret = blk_mq_alloc_tag_set(set); + if (ret) + return ret; + + if (ctrl->ops->flags & NVME_F_FABRICS) { + ctrl->connect_q = blk_mq_init_queue(set); + if (IS_ERR(ctrl->connect_q)) { + ret = PTR_ERR(ctrl->connect_q); + goto out_free_tag_set; + } + } + + ctrl->tagset = set; + return 0; + +out_free_tag_set: + blk_mq_free_tag_set(set); + return ret; +} +EXPORT_SYMBOL_GPL(nvme_alloc_io_tag_set); + +void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl) +{ + if (ctrl->ops->flags & NVME_F_FABRICS) + blk_mq_destroy_queue(ctrl->connect_q); + blk_mq_free_tag_set(ctrl->tagset); +} +EXPORT_SYMBOL_GPL(nvme_remove_io_tag_set); + void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_mpath_stop(ctrl); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a8b054cd2ebbd..56000a846a481 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -737,6 +737,14 @@ void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl); void nvme_stop_ctrl(struct nvme_ctrl *ctrl); int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl); +int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, unsigned int flags, + unsigned int cmd_size); +void nvme_remove_admin_tag_set(struct nvme_ctrl *ctrl); +int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set, + const struct blk_mq_ops *ops, unsigned int flags, + unsigned int cmd_size); +void nvme_remove_io_tag_set(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); From fb8745d040ef5b9080003325e56b91fefe1022bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:23:24 +0200 Subject: [PATCH 18/31] nvme-tcp: remove the unused queue_size member in nvme_tcp_queue ->nvme_tcp_queue is not used anywhere, so remove it. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/tcp.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b5f22ceaae823..8a749ef63afee 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -134,7 +134,6 @@ struct nvme_tcp_queue { /* send state */ struct nvme_tcp_request *request; - int queue_size; u32 maxh2cdata; size_t cmnd_capsule_len; struct nvme_tcp_ctrl *ctrl; @@ -1479,8 +1478,7 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); } -static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, - int qid, size_t queue_size) +static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid) { struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); struct nvme_tcp_queue *queue = &ctrl->queues[qid]; @@ -1492,7 +1490,6 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, INIT_LIST_HEAD(&queue->send_list); mutex_init(&queue->send_mutex); INIT_WORK(&queue->io_work, nvme_tcp_io_work); - queue->queue_size = queue_size; if (qid > 0) queue->cmnd_capsule_len = nctrl->ioccsz * 16; @@ -1785,7 +1782,7 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) { int ret; - ret = nvme_tcp_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); + ret = nvme_tcp_alloc_queue(ctrl, 0); if (ret) return ret; @@ -1805,7 +1802,7 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) int i, ret; for (i = 1; i < ctrl->queue_count; i++) { - ret = nvme_tcp_alloc_queue(ctrl, i, ctrl->sqsize + 1); + ret = nvme_tcp_alloc_queue(ctrl, i); if (ret) goto out_free_queues; } From 06427ca09b2f3103a60b384345c81cb784e19956 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:09:48 +0200 Subject: [PATCH 19/31] nvme-tcp: store the generic nvme_ctrl in set->driver_data Point the private data to the generic controller structure in preparation of using the common tagset init/exit code. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/tcp.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8a749ef63afee..863e985085d4d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -465,7 +465,7 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_tcp_ctrl *ctrl = set->driver_data; + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_tcp_cmd_pdu *pdu; int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; @@ -489,7 +489,7 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set, static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_tcp_ctrl *ctrl = data; + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data); struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1]; hctx->driver_data = queue; @@ -499,7 +499,7 @@ static int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_tcp_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_tcp_ctrl *ctrl = data; + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data); struct nvme_tcp_queue *queue = &ctrl->queues[0]; hctx->driver_data = queue; @@ -1700,7 +1700,7 @@ static int nvme_tcp_alloc_admin_tag_set(struct nvme_ctrl *nctrl) set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); - set->driver_data = ctrl; + set->driver_data = &ctrl->ctrl; set->nr_hw_queues = 1; set->timeout = NVME_ADMIN_TIMEOUT; ret = blk_mq_alloc_tag_set(set); @@ -1722,7 +1722,7 @@ static int nvme_tcp_alloc_tag_set(struct nvme_ctrl *nctrl) set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); - set->driver_data = ctrl; + set->driver_data = &ctrl->ctrl; set->nr_hw_queues = nctrl->queue_count - 1; set->timeout = NVME_IO_TIMEOUT; set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; @@ -2486,7 +2486,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, static void nvme_tcp_map_queues(struct blk_mq_tag_set *set) { - struct nvme_tcp_ctrl *ctrl = set->driver_data; + struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) { From de777825e462bc086dca9675312f1780b4b8c88e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:12:47 +0200 Subject: [PATCH 20/31] nvme-tcp: use the tagset alloc/free helpers Use the common helpers to allocate and free the tagsets. To make this work the generic nvme_ctrl now needs to be stored in the hctx private data instead of the nvme_tcp_ctrl. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/tcp.c | 101 +++++++--------------------------------- 1 file changed, 16 insertions(+), 85 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 863e985085d4d..3e7b29d07c713 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1687,51 +1687,6 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) return ret; } -static int nvme_tcp_alloc_admin_tag_set(struct nvme_ctrl *nctrl) -{ - struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->admin_tag_set; - int ret; - - memset(set, 0, sizeof(*set)); - set->ops = &nvme_tcp_admin_mq_ops; - set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->flags = BLK_MQ_F_BLOCKING; - set->cmd_size = sizeof(struct nvme_tcp_request); - set->driver_data = &ctrl->ctrl; - set->nr_hw_queues = 1; - set->timeout = NVME_ADMIN_TIMEOUT; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - nctrl->admin_tagset = set; - return ret; -} - -static int nvme_tcp_alloc_tag_set(struct nvme_ctrl *nctrl) -{ - struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->tag_set; - int ret; - - memset(set, 0, sizeof(*set)); - set->ops = &nvme_tcp_mq_ops; - set->queue_depth = nctrl->sqsize + 1; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; - set->cmd_size = sizeof(struct nvme_tcp_request); - set->driver_data = &ctrl->ctrl; - set->nr_hw_queues = nctrl->queue_count - 1; - set->timeout = NVME_IO_TIMEOUT; - set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - nctrl->tagset = set; - return ret; -} - static void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl) { if (to_tcp_ctrl(ctrl)->async_req.pdu) { @@ -1890,10 +1845,8 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_io_queues(ctrl); - if (remove) { - blk_mq_destroy_queue(ctrl->connect_q); - blk_mq_free_tag_set(ctrl->tagset); - } + if (remove) + nvme_remove_io_tag_set(ctrl); nvme_tcp_free_io_queues(ctrl); } @@ -1906,13 +1859,12 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) return ret; if (new) { - ret = nvme_tcp_alloc_tag_set(ctrl); + ret = nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set, + &nvme_tcp_mq_ops, + BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING, + sizeof(struct nvme_tcp_request)); if (ret) goto out_free_io_queues; - - ret = nvme_ctrl_init_connect_q(ctrl); - if (ret) - goto out_free_tag_set; } /* @@ -1959,10 +1911,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) out_cleanup_connect_q: nvme_cancel_tagset(ctrl); if (new) - blk_mq_destroy_queue(ctrl->connect_q); -out_free_tag_set: - if (new) - blk_mq_free_tag_set(ctrl->tagset); + nvme_remove_io_tag_set(ctrl); out_free_io_queues: nvme_tcp_free_io_queues(ctrl); return ret; @@ -1971,11 +1920,8 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_queue(ctrl, 0); - if (remove) { - blk_mq_destroy_queue(ctrl->admin_q); - blk_mq_destroy_queue(ctrl->fabrics_q); - blk_mq_free_tag_set(ctrl->admin_tagset); - } + if (remove) + nvme_remove_admin_tag_set(ctrl); nvme_tcp_free_admin_queue(ctrl); } @@ -1988,26 +1934,17 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) return error; if (new) { - error = nvme_tcp_alloc_admin_tag_set(ctrl); + error = nvme_alloc_admin_tag_set(ctrl, + &to_tcp_ctrl(ctrl)->admin_tag_set, + &nvme_tcp_admin_mq_ops, BLK_MQ_F_BLOCKING, + sizeof(struct nvme_tcp_request)); if (error) goto out_free_queue; - - ctrl->fabrics_q = blk_mq_init_queue(ctrl->admin_tagset); - if (IS_ERR(ctrl->fabrics_q)) { - error = PTR_ERR(ctrl->fabrics_q); - goto out_free_tagset; - } - - ctrl->admin_q = blk_mq_init_queue(ctrl->admin_tagset); - if (IS_ERR(ctrl->admin_q)) { - error = PTR_ERR(ctrl->admin_q); - goto out_cleanup_fabrics_q; - } } error = nvme_tcp_start_queue(ctrl, 0); if (error) - goto out_cleanup_queue; + goto out_cleanup_tagset; error = nvme_enable_ctrl(ctrl); if (error) @@ -2027,15 +1964,9 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) out_stop_queue: nvme_tcp_stop_queue(ctrl, 0); nvme_cancel_admin_tagset(ctrl); -out_cleanup_queue: - if (new) - blk_mq_destroy_queue(ctrl->admin_q); -out_cleanup_fabrics_q: - if (new) - blk_mq_destroy_queue(ctrl->fabrics_q); -out_free_tagset: +out_cleanup_tagset: if (new) - blk_mq_free_tag_set(ctrl->admin_tagset); + nvme_remove_admin_tag_set(ctrl); out_free_queue: nvme_tcp_free_admin_queue(ctrl); return error; From 2d60738c8f80684d469302d60edb2101f5e4190b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:14:01 +0200 Subject: [PATCH 21/31] nvme-rdma: store the generic nvme_ctrl in set->driver_data Point the private data to the generic controller structure in preparation of using the common tagset init/exit code. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/rdma.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4c6df34f9d7ac..8bc2930fd496e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -295,7 +295,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_rdma_ctrl *ctrl = set->driver_data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; @@ -320,7 +320,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_rdma_ctrl *ctrl = data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data); struct nvme_rdma_queue *queue = &ctrl->queues[hctx_idx + 1]; BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); @@ -332,7 +332,7 @@ static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_rdma_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_rdma_ctrl *ctrl = data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(data); struct nvme_rdma_queue *queue = &ctrl->queues[0]; BUG_ON(hctx_idx != 0); @@ -801,7 +801,7 @@ static int nvme_rdma_alloc_admin_tag_set(struct nvme_ctrl *nctrl) set->numa_node = nctrl->numa_node; set->cmd_size = sizeof(struct nvme_rdma_request) + NVME_RDMA_DATA_SGL_SIZE; - set->driver_data = ctrl; + set->driver_data = &ctrl->ctrl; set->nr_hw_queues = 1; set->timeout = NVME_ADMIN_TIMEOUT; set->flags = BLK_MQ_F_NO_SCHED; @@ -828,7 +828,7 @@ static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *nctrl) if (nctrl->max_integrity_segments) set->cmd_size += sizeof(struct nvme_rdma_sgl) + NVME_RDMA_METADATA_SGL_SIZE; - set->driver_data = ctrl; + set->driver_data = &ctrl->ctrl; set->nr_hw_queues = nctrl->queue_count - 1; set->timeout = NVME_IO_TIMEOUT; set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; @@ -2206,7 +2206,7 @@ static void nvme_rdma_complete_rq(struct request *rq) static void nvme_rdma_map_queues(struct blk_mq_tag_set *set) { - struct nvme_rdma_ctrl *ctrl = set->driver_data; + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(set->driver_data); struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) { From cefa1032f1114845de305f34dbe4c0c96ed6de1c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:14:53 +0200 Subject: [PATCH 22/31] nvme-rdma: use the tagset alloc/free helpers Use the common helpers to allocate and free the tagsets. To make this work the generic nvme_ctrl now needs to be stored in the hctx private data instead of the nvme_rdma_ctrl. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/rdma.c | 133 ++++++++++----------------------------- 1 file changed, 34 insertions(+), 99 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 8bc2930fd496e..5ad0ab2853a49 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -788,64 +788,21 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) return ret; } -static int nvme_rdma_alloc_admin_tag_set(struct nvme_ctrl *nctrl) +static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *ctrl) { - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->admin_tag_set; - int ret; + unsigned int cmd_size = sizeof(struct nvme_rdma_request) + + NVME_RDMA_DATA_SGL_SIZE; - memset(set, 0, sizeof(*set)); - set->ops = &nvme_rdma_admin_mq_ops; - set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_RDMA_DATA_SGL_SIZE; - set->driver_data = &ctrl->ctrl; - set->nr_hw_queues = 1; - set->timeout = NVME_ADMIN_TIMEOUT; - set->flags = BLK_MQ_F_NO_SCHED; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - ctrl->ctrl.admin_tagset = set; - return ret; -} - -static int nvme_rdma_alloc_tag_set(struct nvme_ctrl *nctrl) -{ - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - struct blk_mq_tag_set *set = &ctrl->tag_set; - int ret; + if (ctrl->max_integrity_segments) + cmd_size += sizeof(struct nvme_rdma_sgl) + + NVME_RDMA_METADATA_SGL_SIZE; - memset(set, 0, sizeof(*set)); - set->ops = &nvme_rdma_mq_ops; - set->queue_depth = nctrl->sqsize + 1; - set->reserved_tags = NVMF_RESERVED_TAGS; - set->numa_node = nctrl->numa_node; - set->flags = BLK_MQ_F_SHOULD_MERGE; - set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_RDMA_DATA_SGL_SIZE; - if (nctrl->max_integrity_segments) - set->cmd_size += sizeof(struct nvme_rdma_sgl) + - NVME_RDMA_METADATA_SGL_SIZE; - set->driver_data = &ctrl->ctrl; - set->nr_hw_queues = nctrl->queue_count - 1; - set->timeout = NVME_IO_TIMEOUT; - set->nr_maps = nctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2; - ret = blk_mq_alloc_tag_set(set); - if (!ret) - ctrl->ctrl.tagset = set; - return ret; + return nvme_alloc_io_tag_set(ctrl, &to_rdma_ctrl(ctrl)->tag_set, + &nvme_rdma_mq_ops, BLK_MQ_F_SHOULD_MERGE, cmd_size); } -static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, - bool remove) +static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl) { - if (remove) { - blk_mq_destroy_queue(ctrl->ctrl.admin_q); - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); - blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); - } if (ctrl->async_event_sqe.data) { cancel_work_sync(&ctrl->ctrl.async_event_work); nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, @@ -887,26 +844,19 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, goto out_free_queue; if (new) { - error = nvme_rdma_alloc_admin_tag_set(&ctrl->ctrl); + error = nvme_alloc_admin_tag_set(&ctrl->ctrl, + &ctrl->admin_tag_set, &nvme_rdma_admin_mq_ops, + BLK_MQ_F_NO_SCHED, + sizeof(struct nvme_rdma_request) + + NVME_RDMA_DATA_SGL_SIZE); if (error) goto out_free_async_qe; - ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.fabrics_q)) { - error = PTR_ERR(ctrl->ctrl.fabrics_q); - goto out_free_tagset; - } - - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_cleanup_fabrics_q; - } } error = nvme_rdma_start_queue(ctrl, 0); if (error) - goto out_cleanup_queue; + goto out_remove_admin_tag_set; error = nvme_enable_ctrl(&ctrl->ctrl); if (error) @@ -933,15 +883,9 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, out_stop_queue: nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); -out_cleanup_queue: +out_remove_admin_tag_set: if (new) - blk_mq_destroy_queue(ctrl->ctrl.admin_q); -out_cleanup_fabrics_q: - if (new) - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); -out_free_tagset: - if (new) - blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); + nvme_remove_admin_tag_set(&ctrl->ctrl); out_free_async_qe: if (ctrl->async_event_sqe.data) { nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, @@ -953,16 +897,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, return error; } -static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, - bool remove) -{ - if (remove) { - blk_mq_destroy_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(ctrl->ctrl.tagset); - } - nvme_rdma_free_io_queues(ctrl); -} - static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) { int ret, nr_queues; @@ -975,10 +909,6 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ret = nvme_rdma_alloc_tag_set(&ctrl->ctrl); if (ret) goto out_free_io_queues; - - ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); - if (ret) - goto out_free_tag_set; } /* @@ -989,7 +919,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) nr_queues = min(ctrl->tag_set.nr_hw_queues + 1, ctrl->ctrl.queue_count); ret = nvme_rdma_start_io_queues(ctrl, 1, nr_queues); if (ret) - goto out_cleanup_connect_q; + goto out_cleanup_tagset; if (!new) { nvme_start_queues(&ctrl->ctrl); @@ -1022,13 +952,10 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) nvme_stop_queues(&ctrl->ctrl); nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); -out_cleanup_connect_q: +out_cleanup_tagset: nvme_cancel_tagset(&ctrl->ctrl); if (new) - blk_mq_destroy_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - if (new) - blk_mq_free_tag_set(ctrl->ctrl.tagset); + nvme_remove_io_tag_set(&ctrl->ctrl); out_free_io_queues: nvme_rdma_free_io_queues(ctrl); return ret; @@ -1041,9 +968,11 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); - if (remove) + if (remove) { nvme_start_admin_queue(&ctrl->ctrl); - nvme_rdma_destroy_admin_queue(ctrl, remove); + nvme_remove_admin_tag_set(&ctrl->ctrl); + } + nvme_rdma_destroy_admin_queue(ctrl); } static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, @@ -1055,9 +984,11 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); nvme_cancel_tagset(&ctrl->ctrl); - if (remove) + if (remove) { nvme_start_queues(&ctrl->ctrl); - nvme_rdma_destroy_io_queues(ctrl, remove); + nvme_remove_io_tag_set(&ctrl->ctrl); + } + nvme_rdma_free_io_queues(ctrl); } } @@ -1179,14 +1110,18 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); nvme_cancel_tagset(&ctrl->ctrl); - nvme_rdma_destroy_io_queues(ctrl, new); + if (new) + nvme_remove_io_tag_set(&ctrl->ctrl); + nvme_rdma_free_io_queues(ctrl); } destroy_admin: nvme_stop_admin_queue(&ctrl->ctrl); blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); nvme_cancel_admin_tagset(&ctrl->ctrl); - nvme_rdma_destroy_admin_queue(ctrl, new); + if (new) + nvme_remove_admin_tag_set(&ctrl->ctrl); + nvme_rdma_destroy_admin_queue(ctrl); return ret; } From 18ecd97506ab27d446ad5f7292b620fef3116089 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:27:33 +0200 Subject: [PATCH 23/31] nvme-fc: keep ctrl->sqsize in sync with opts->queue_size Also update the sqsize field when capping the queue size, and remove the check a queue size that is larger than sqsize given that sqsize is only initialized from opts->queue_size. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: James Smart --- drivers/nvme/host/fc.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 42767fb754552..ee376111f5610 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -3165,15 +3165,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) "to maxcmd\n", opts->queue_size, ctrl->ctrl.maxcmd); opts->queue_size = ctrl->ctrl.maxcmd; - } - - if (opts->queue_size > ctrl->ctrl.sqsize + 1) { - /* warn if sqsize is lower than queue_size */ - dev_warn(ctrl->ctrl.device, - "queue_size %zu > ctrl sqsize %u, reducing " - "to sqsize\n", - opts->queue_size, ctrl->ctrl.sqsize + 1); - opts->queue_size = ctrl->ctrl.sqsize + 1; + ctrl->ctrl.sqsize = opts->queue_size - 1; } ret = nvme_fc_init_aen_ops(ctrl); From 1864ea46155ce5e12a3797532c531843688904cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:17:59 +0200 Subject: [PATCH 24/31] nvme-fc: store the generic nvme_ctrl in set->driver_data Point the private data to the generic controller structure in preparation of using the common tagset init/exit code and use the chance the cleanup the init_hctx methods a bit. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: James Smart --- drivers/nvme/host/fc.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index ee376111f5610..d707cf93f1f4b 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1829,7 +1829,7 @@ nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); - return __nvme_fc_exit_request(set->driver_data, op); + return __nvme_fc_exit_request(to_fc_ctrl(set->driver_data), op); } static int @@ -2135,7 +2135,7 @@ static int nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_fc_ctrl *ctrl = set->driver_data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(set->driver_data); struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; @@ -2206,36 +2206,28 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) } } -static inline void -__nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, - unsigned int qidx) +static inline int +__nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int qidx) { + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(data); struct nvme_fc_queue *queue = &ctrl->queues[qidx]; hctx->driver_data = queue; queue->hctx = hctx; + return 0; } static int -nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, - unsigned int hctx_idx) +nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_fc_ctrl *ctrl = data; - - __nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1); - - return 0; + return __nvme_fc_init_hctx(hctx, data, hctx_idx + 1); } static int nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_fc_ctrl *ctrl = data; - - __nvme_fc_init_hctx(hctx, ctrl, hctx_idx); - - return 0; + return __nvme_fc_init_hctx(hctx, data, hctx_idx); } static void @@ -2862,7 +2854,7 @@ nvme_fc_complete_rq(struct request *rq) static void nvme_fc_map_queues(struct blk_mq_tag_set *set) { - struct nvme_fc_ctrl *ctrl = set->driver_data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(set->driver_data); int i; for (i = 0; i < set->nr_maps; i++) { @@ -2923,7 +2915,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) ctrl->tag_set.cmd_size = struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, ctrl->lport->ops->fcprqst_priv_sz); - ctrl->tag_set.driver_data = ctrl; + ctrl->tag_set.driver_data = &ctrl->ctrl; ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; @@ -3546,7 +3538,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->admin_tag_set.cmd_size = struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, ctrl->lport->ops->fcprqst_priv_sz); - ctrl->admin_tag_set.driver_data = ctrl; + ctrl->admin_tag_set.driver_data = &ctrl->ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; From 6dfba1c09c109f4a6ca12e156dbdbe27e0924862 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:19:36 +0200 Subject: [PATCH 25/31] nvme-fc: use the tagset alloc/free helpers Use the common helpers to allocate and free the tagsets. To make this work the generic nvme_ctrl now needs to be stored in the hctx private data instead of the nvme_fc_ctrl. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: James Smart --- drivers/nvme/host/fc.c | 83 +++++++++--------------------------------- 1 file changed, 17 insertions(+), 66 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index d707cf93f1f4b..5d57a042dbcad 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2383,10 +2383,8 @@ nvme_fc_ctrl_free(struct kref *ref) container_of(ref, struct nvme_fc_ctrl, ref); unsigned long flags; - if (ctrl->ctrl.tagset) { - blk_mq_destroy_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - } + if (ctrl->ctrl.tagset) + nvme_remove_io_tag_set(&ctrl->ctrl); /* remove from rport list */ spin_lock_irqsave(&ctrl->rport->lock, flags); @@ -2394,9 +2392,7 @@ nvme_fc_ctrl_free(struct kref *ref) spin_unlock_irqrestore(&ctrl->rport->lock, flags); nvme_start_admin_queue(&ctrl->ctrl); - blk_mq_destroy_queue(ctrl->ctrl.admin_q); - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); + nvme_remove_admin_tag_set(&ctrl->ctrl); kfree(ctrl->queues); @@ -2906,32 +2902,16 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) nvme_fc_init_io_queues(ctrl); - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_fc_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS; - ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = - struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, - ctrl->lport->ops->fcprqst_priv_sz); - ctrl->tag_set.driver_data = &ctrl->ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); + ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, + &nvme_fc_mq_ops, BLK_MQ_F_SHOULD_MERGE, + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz)); if (ret) return ret; - ctrl->ctrl.tagset = &ctrl->tag_set; - - ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); - if (ret) - goto out_free_tag_set; - ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) - goto out_cleanup_blk_queue; + goto out_cleanup_tagset; ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) @@ -2943,10 +2923,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) out_delete_hw_queues: nvme_fc_delete_hw_io_queues(ctrl); -out_cleanup_blk_queue: - blk_mq_destroy_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - blk_mq_free_tag_set(&ctrl->tag_set); +out_cleanup_tagset: + nvme_remove_io_tag_set(&ctrl->ctrl); nvme_fc_free_io_queues(ctrl); /* force put free routine to ignore io queues */ @@ -3530,35 +3508,12 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, nvme_fc_init_queue(ctrl, 0); - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; - ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS; - ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; - ctrl->admin_tag_set.cmd_size = - struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, - ctrl->lport->ops->fcprqst_priv_sz); - ctrl->admin_tag_set.driver_data = &ctrl->ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; - ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; - - ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); + ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, + &nvme_fc_admin_mq_ops, BLK_MQ_F_NO_SCHED, + struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, + ctrl->lport->ops->fcprqst_priv_sz)); if (ret) goto out_free_queues; - ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; - - ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.fabrics_q)) { - ret = PTR_ERR(ctrl->ctrl.fabrics_q); - goto out_free_admin_tag_set; - } - - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - ret = PTR_ERR(ctrl->ctrl.admin_q); - goto out_cleanup_fabrics_q; - } /* * Would have been nice to init io queues tag set as well. @@ -3569,7 +3524,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); if (ret) - goto out_cleanup_admin_q; + goto out_cleanup_tagset; /* at this point, teardown path changes to ref counting on nvme ctrl */ @@ -3624,12 +3579,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, return ERR_PTR(-EIO); -out_cleanup_admin_q: - blk_mq_destroy_queue(ctrl->ctrl.admin_q); -out_cleanup_fabrics_q: - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); -out_free_admin_tag_set: - blk_mq_free_tag_set(&ctrl->admin_tag_set); +out_cleanup_tagset: + nvme_remove_admin_tag_set(&ctrl->ctrl); out_free_queues: kfree(ctrl->queues); out_free_ida: From 379e0df5ab2c46e52e7ca2273d1238526631aa4f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:26:18 +0200 Subject: [PATCH 26/31] nvme-loop: initialize sqsize later Defer initializing the sqsize field from the options until it has been capped by MAXCMD. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/target/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 9750a7fca2688..ed6d36eb7d295 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -601,7 +601,6 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, ret = -ENOMEM; - ctrl->ctrl.sqsize = opts->queue_size - 1; ctrl->ctrl.kato = opts->kato; ctrl->port = nvme_loop_find_port(&ctrl->ctrl); @@ -621,6 +620,7 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev, opts->queue_size, ctrl->ctrl.maxcmd); opts->queue_size = ctrl->ctrl.maxcmd; } + ctrl->ctrl.sqsize = opts->queue_size - 1; if (opts->nr_io_queues) { ret = nvme_loop_create_io_queues(ctrl); From 2ade82213b7a35c761273fe0c982f4ab8707f30a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:20:46 +0200 Subject: [PATCH 27/31] nvme-loop: store the generic nvme_ctrl in set->driver_data Point the private data to the generic controller structure in preparation of using the common tagset init/exit code. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/target/loop.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index ed6d36eb7d295..54578cc18d528 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -204,7 +204,7 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set, struct request *req, unsigned int hctx_idx, unsigned int numa_node) { - struct nvme_loop_ctrl *ctrl = set->driver_data; + struct nvme_loop_ctrl *ctrl = to_loop_ctrl(set->driver_data); struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); nvme_req(req)->ctrl = &ctrl->ctrl; @@ -218,7 +218,7 @@ static struct lock_class_key loop_hctx_fq_lock_key; static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_loop_ctrl *ctrl = data; + struct nvme_loop_ctrl *ctrl = to_loop_ctrl(data); struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1]; BUG_ON(hctx_idx >= ctrl->ctrl.queue_count); @@ -238,7 +238,7 @@ static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { - struct nvme_loop_ctrl *ctrl = data; + struct nvme_loop_ctrl *ctrl = to_loop_ctrl(data); struct nvme_loop_queue *queue = &ctrl->queues[0]; BUG_ON(hctx_idx != 0); @@ -357,7 +357,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) + NVME_INLINE_SG_CNT * sizeof(struct scatterlist); - ctrl->admin_tag_set.driver_data = ctrl; + ctrl->admin_tag_set.driver_data = &ctrl->ctrl; ctrl->admin_tag_set.nr_hw_queues = 1; ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; @@ -530,7 +530,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) + NVME_INLINE_SG_CNT * sizeof(struct scatterlist); - ctrl->tag_set.driver_data = ctrl; + ctrl->tag_set.driver_data = &ctrl->ctrl; ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; ctrl->tag_set.timeout = NVME_IO_TIMEOUT; ctrl->ctrl.tagset = &ctrl->tag_set; From ceee1953f9239b699d80c5ef366712c3fd865cc2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:21:17 +0200 Subject: [PATCH 28/31] nvme-loop: use the tagset alloc/free helpers Use the common helpers to allocate and free the tagsets. To make this work the generic nvme_ctrl now needs to be stored in the hctx private data instead of the nvme_loop_ctrl. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/target/loop.c | 83 +++++++++----------------------------- 1 file changed, 19 insertions(+), 64 deletions(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 54578cc18d528..b45fe3adf015f 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -266,9 +266,7 @@ static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags)) return; nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); - blk_mq_destroy_queue(ctrl->ctrl.admin_q); - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); + nvme_remove_admin_tag_set(&ctrl->ctrl); } static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) @@ -282,10 +280,8 @@ static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl) list_del(&ctrl->list); mutex_unlock(&nvme_loop_ctrl_mutex); - if (nctrl->tagset) { - blk_mq_destroy_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - } + if (nctrl->tagset) + nvme_remove_io_tag_set(nctrl); kfree(ctrl->queues); nvmf_free_options(nctrl->opts); free_ctrl: @@ -350,52 +346,31 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) { int error; - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; - ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS; - ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); - ctrl->admin_tag_set.driver_data = &ctrl->ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = NVME_ADMIN_TIMEOUT; - ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; - ctrl->queues[0].ctrl = ctrl; error = nvmet_sq_init(&ctrl->queues[0].nvme_sq); if (error) return error; ctrl->ctrl.queue_count = 1; - error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); + error = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set, + &nvme_loop_admin_mq_ops, BLK_MQ_F_NO_SCHED, + sizeof(struct nvme_loop_iod) + + NVME_INLINE_SG_CNT * sizeof(struct scatterlist)); if (error) goto out_free_sq; - ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; - - ctrl->ctrl.fabrics_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.fabrics_q)) { - error = PTR_ERR(ctrl->ctrl.fabrics_q); - goto out_free_tagset; - } - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_cleanup_fabrics_q; - } /* reset stopped state for the fresh admin queue */ clear_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->ctrl.flags); error = nvmf_connect_admin_queue(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_cleanup_tagset; set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); error = nvme_enable_ctrl(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_cleanup_tagset; ctrl->ctrl.max_hw_sectors = (NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9); @@ -404,17 +379,13 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) error = nvme_init_ctrl_finish(&ctrl->ctrl); if (error) - goto out_cleanup_queue; + goto out_cleanup_tagset; return 0; -out_cleanup_queue: +out_cleanup_tagset: clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); - blk_mq_destroy_queue(ctrl->ctrl.admin_q); -out_cleanup_fabrics_q: - blk_mq_destroy_queue(ctrl->ctrl.fabrics_q); -out_free_tagset: - blk_mq_free_tag_set(&ctrl->admin_tag_set); + nvme_remove_admin_tag_set(&ctrl->ctrl); out_free_sq: nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); return error; @@ -522,37 +493,21 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl) if (ret) return ret; - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_loop_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS; - ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); - ctrl->tag_set.driver_data = &ctrl->ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - ctrl->ctrl.tagset = &ctrl->tag_set; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); + ret = nvme_alloc_io_tag_set(&ctrl->ctrl, &ctrl->tag_set, + &nvme_loop_mq_ops, BLK_MQ_F_SHOULD_MERGE, + sizeof(struct nvme_loop_iod) + + NVME_INLINE_SG_CNT * sizeof(struct scatterlist)); if (ret) goto out_destroy_queues; - ret = nvme_ctrl_init_connect_q(&(ctrl->ctrl)); - if (ret) - goto out_free_tagset; - ret = nvme_loop_connect_io_queues(ctrl); if (ret) - goto out_cleanup_connect_q; + goto out_cleanup_tagset; return 0; -out_cleanup_connect_q: - blk_mq_destroy_queue(ctrl->ctrl.connect_q); -out_free_tagset: - blk_mq_free_tag_set(&ctrl->tag_set); +out_cleanup_tagset: + nvme_remove_io_tag_set(&ctrl->ctrl); out_destroy_queues: nvme_loop_destroy_io_queues(ctrl); return ret; From fe6f04c079d0e4b38178955e0ea9d3fbc5d4066b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 20 Sep 2022 17:50:18 +0200 Subject: [PATCH 29/31] nvme: remove nvme_ctrl_init_connect_q Unused now. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/nvme.h | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 56000a846a481..7902c5245363e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -990,14 +990,6 @@ static inline int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) } #endif -static inline int nvme_ctrl_init_connect_q(struct nvme_ctrl *ctrl) -{ - ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); - if (IS_ERR(ctrl->connect_q)) - return PTR_ERR(ctrl->connect_q); - return 0; -} - static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) { return dev_to_disk(dev)->private_data; From 8df20252c06046ef4c68107bcaaca56c21028d8c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Sep 2022 10:24:07 +0200 Subject: [PATCH 30/31] nvmet: don't look at the request_queue in nvmet_bdev_zone_mgmt_emulate_all nvmet is a consumer of the block layer and should not directly look at the request_queue. Just use the NUMA node ID from the gendisk instead of the request_queue. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/target/zns.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/target/zns.c b/drivers/nvme/target/zns.c index c7ef69f29fe4e..7e89056cc4116 100644 --- a/drivers/nvme/target/zns.c +++ b/drivers/nvme/target/zns.c @@ -387,7 +387,6 @@ static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req) { struct block_device *bdev = req->ns->bdev; unsigned int nr_zones = bdev_nr_zones(bdev); - struct request_queue *q = bdev_get_queue(bdev); struct bio *bio = NULL; sector_t sector = 0; int ret; @@ -396,7 +395,7 @@ static u16 nvmet_bdev_zone_mgmt_emulate_all(struct nvmet_req *req) }; d.zbitmap = kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(*(d.zbitmap)), - GFP_NOIO, q->node); + GFP_NOIO, bdev->bd_disk->node_id); if (!d.zbitmap) { ret = -ENOMEM; goto out; From 84fe64f898913ef69f70a8d91aea613b5722b63b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Sep 2022 10:26:26 +0200 Subject: [PATCH 31/31] nvmet: don't look at the request_queue in nvmet_bdev_set_limits nvmet is a consumer of the block layer and should not directly look at the request_queue. Use the bdev_ helpers to retrieve the device limits instead. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/target/io-cmd-bdev.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 8d527a8c0f542..c2d6cea0236b0 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -12,11 +12,9 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) { - const struct queue_limits *ql = &bdev_get_queue(bdev)->limits; - /* Number of logical blocks per physical block. */ - const u32 lpp = ql->physical_block_size / ql->logical_block_size; /* Logical blocks per physical block, 0's based. */ - const __le16 lpp0b = to0based(lpp); + const __le16 lpp0b = to0based(bdev_physical_block_size(bdev) / + bdev_logical_block_size(bdev)); /* * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, @@ -42,11 +40,12 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) /* NPWA = Namespace Preferred Write Alignment. 0's based */ id->npwa = id->npwg; /* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ - id->npdg = to0based(ql->discard_granularity / ql->logical_block_size); + id->npdg = to0based(bdev_discard_granularity(bdev) / + bdev_logical_block_size(bdev)); /* NPDG = Namespace Preferred Deallocate Alignment */ id->npda = id->npdg; /* NOWS = Namespace Optimal Write Size */ - id->nows = to0based(ql->io_opt / ql->logical_block_size); + id->nows = to0based(bdev_io_opt(bdev) / bdev_logical_block_size(bdev)); } void nvmet_bdev_ns_disable(struct nvmet_ns *ns)