From 0bd46e22c5ec3dbfb81b60de475151e3f6b411c2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 9 Sep 2021 12:14:40 +0300 Subject: [PATCH 1/5] nvmet: fix a width vs precision bug in nvmet_subsys_attr_serial_show() This was intended to limit the number of characters printed from "subsys->serial" to NVMET_SN_MAX_SIZE. But accidentally the width specifier was used instead of the precision specifier so it only affects the alignment and not the number of characters printed. Fixes: f04064814c2a ("nvmet: fixup buffer overrun in nvmet_subsys_attr_serial()") Signed-off-by: Dan Carpenter Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index d784f3c200b4c..be5d82421e3a4 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1067,7 +1067,7 @@ static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item, { struct nvmet_subsys *subsys = to_subsys(item); - return snprintf(page, PAGE_SIZE, "%*s\n", + return snprintf(page, PAGE_SIZE, "%.*s\n", NVMET_SN_MAX_SIZE, subsys->serial); } From 9edceaf43050f5ba1dd7d0011bcf68a736a17743 Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Thu, 2 Sep 2021 11:20:02 +0200 Subject: [PATCH 2/5] nvme: avoid race in shutdown namespace removal When we remove the siblings entry, we update ns->head->list, hence we can't separate the removal and test for being empty. They have to be in the same critical section to avoid a race. To avoid breaking the refcounting imbalance again, add a list empty check to nvme_find_ns_head. Fixes: 5396fdac56d8 ("nvme: fix refcounting imbalance when all paths are down") Signed-off-by: Daniel Wagner Reviewed-by: Hannes Reinecke Tested-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7efb31b87f37a..97f8211cf92c1 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3524,7 +3524,9 @@ static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, lockdep_assert_held(&subsys->lock); list_for_each_entry(h, &subsys->nsheads, entry) { - if (h->ns_id == nsid && nvme_tryget_ns_head(h)) + if (h->ns_id != nsid) + continue; + if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) return h; } @@ -3843,6 +3845,10 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); + if (list_empty(&ns->head->list)) { + list_del_init(&ns->head->entry); + last_path = true; + } mutex_unlock(&ns->ctrl->subsys->lock); /* guarantee not available in head->list */ @@ -3863,13 +3869,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) list_del_init(&ns->list); up_write(&ns->ctrl->namespaces_rwsem); - /* Synchronize with nvme_init_ns_head() */ - mutex_lock(&ns->head->subsys->lock); - if (list_empty(&ns->head->list)) { - list_del_init(&ns->head->entry); - last_path = true; - } - mutex_unlock(&ns->head->subsys->lock); if (last_path) nvme_mpath_shutdown_disk(ns->head); nvme_put_ns(ns); From 79f528afa93918519574773ea49a444c104bc1bd Mon Sep 17 00:00:00 2001 From: Anton Eidelman Date: Sun, 12 Sep 2021 12:54:57 -0600 Subject: [PATCH 3/5] nvme-multipath: fix ANA state updates when a namespace is not present nvme_update_ana_state() has a deficiency that results in a failure to properly update the ana state for a namespace in the following case: NSIDs in ctrl->namespaces: 1, 3, 4 NSIDs in desc->nsids: 1, 2, 3, 4 Loop iteration 0: ns index = 0, n = 0, ns->head->ns_id = 1, nsid = 1, MATCH. Loop iteration 1: ns index = 1, n = 1, ns->head->ns_id = 3, nsid = 2, NO MATCH. Loop iteration 2: ns index = 2, n = 2, ns->head->ns_id = 4, nsid = 4, MATCH. Where the update to the ANA state of NSID 3 is missed. To fix this increment n and retry the update with the same ns when ns->head->ns_id is higher than nsid, Signed-off-by: Anton Eidelman Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/nvme/host/multipath.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5d7bc58a27bd9..e8ccdd398f784 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -600,14 +600,17 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, down_read(&ctrl->namespaces_rwsem); list_for_each_entry(ns, &ctrl->namespaces, list) { - unsigned nsid = le32_to_cpu(desc->nsids[n]); - + unsigned nsid; +again: + nsid = le32_to_cpu(desc->nsids[n]); if (ns->head->ns_id < nsid) continue; if (ns->head->ns_id == nsid) nvme_update_ns_ana_state(desc, ns); if (++n == nr_nsids) break; + if (ns->head->ns_id > nsid) + goto again; } up_read(&ctrl->namespaces_rwsem); return 0; From 9817d763dbe15327b9b3ff4404fa6f27f927e744 Mon Sep 17 00:00:00 2001 From: Ruozhu Li Date: Mon, 6 Sep 2021 11:51:34 +0800 Subject: [PATCH 4/5] nvme-rdma: destroy cm id before destroy qp to avoid use after free We should always destroy cm_id before destroy qp to avoid to get cma event after qp was destroyed, which may lead to use after free. In RDMA connection establishment error flow, don't destroy qp in cm event handler.Just report cm_error to upper level, qp will be destroy in nvme_rdma_alloc_queue() after destroy cm id. Signed-off-by: Ruozhu Li Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index a68704e39084e..042c594bc57e2 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -656,8 +656,8 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) return; - nvme_rdma_destroy_queue_ib(queue); rdma_destroy_id(queue->cm_id); + nvme_rdma_destroy_queue_ib(queue); mutex_destroy(&queue->queue_lock); } @@ -1815,14 +1815,10 @@ static int nvme_rdma_conn_established(struct nvme_rdma_queue *queue) for (i = 0; i < queue->queue_size; i++) { ret = nvme_rdma_post_recv(queue, &queue->rsp_ring[i]); if (ret) - goto out_destroy_queue_ib; + return ret; } return 0; - -out_destroy_queue_ib: - nvme_rdma_destroy_queue_ib(queue); - return ret; } static int nvme_rdma_conn_rejected(struct nvme_rdma_queue *queue, @@ -1916,14 +1912,10 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) if (ret) { dev_err(ctrl->ctrl.device, "rdma_connect_locked failed (%d).\n", ret); - goto out_destroy_queue_ib; + return ret; } return 0; - -out_destroy_queue_ib: - nvme_rdma_destroy_queue_ib(queue); - return ret; } static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, @@ -1954,8 +1946,6 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, case RDMA_CM_EVENT_ROUTE_ERROR: case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: - nvme_rdma_destroy_queue_ib(queue); - fallthrough; case RDMA_CM_EVENT_ADDR_ERROR: dev_dbg(queue->ctrl->ctrl.device, "CM error event %d\n", ev->event); From 70f437fb4395ad4d1d16fab9a1ad9fbc9fc0579b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 9 Sep 2021 08:54:52 -0700 Subject: [PATCH 5/5] nvme-tcp: fix io_work priority inversion Dispatching requests inline with the .queue_rq() call may block while holding the send_mutex. If the tcp io_work also happens to schedule, it may see the req_list is non-empty, leaving "pending" true and remaining in TASK_RUNNING. Since io_work is of higher scheduling priority, the .queue_rq task may not get a chance to run, blocking forward progress and leading to io timeouts. Instead of checking for pending requests within io_work, let the queueing restart io_work outside the send_mutex lock if there is more work to be done. Fixes: a0fdd1418007f ("nvme-tcp: rerun io_work if req_list is not empty") Reported-by: Samuel Jones Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index e2ab12f3f51c9..e4249b7dc0568 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -274,6 +274,12 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) } while (ret > 0); } +static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) +{ + return !list_empty(&queue->send_list) || + !llist_empty(&queue->req_list) || queue->more_requests; +} + static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, bool sync, bool last) { @@ -294,9 +300,10 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, nvme_tcp_send_all(queue); queue->more_requests = false; mutex_unlock(&queue->send_mutex); - } else if (last) { - queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } + + if (last && nvme_tcp_queue_more(queue)) + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); } static void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue) @@ -906,12 +913,6 @@ static void nvme_tcp_state_change(struct sock *sk) read_unlock_bh(&sk->sk_callback_lock); } -static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) -{ - return !list_empty(&queue->send_list) || - !llist_empty(&queue->req_list) || queue->more_requests; -} - static inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) { queue->request = NULL; @@ -1145,8 +1146,7 @@ static void nvme_tcp_io_work(struct work_struct *w) pending = true; else if (unlikely(result < 0)) break; - } else - pending = !llist_empty(&queue->req_list); + } result = nvme_tcp_try_recv(queue); if (result > 0)