From 3820c4fdc247b6f0a4162733bdb8ddf8f2e8a1e4 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 31 Jul 2023 12:37:58 +0200 Subject: [PATCH 1/6] nvme-rdma: do not try to stop unallocated queues Trying to stop a queue which hasn't been allocated will result in a warning due to calling mutex_lock() against an uninitialized mutex. DEBUG_LOCKS_WARN_ON(lock->magic != lock) WARNING: CPU: 4 PID: 104150 at kernel/locking/mutex.c:579 Call trace: RIP: 0010:__mutex_lock+0x1173/0x14a0 nvme_rdma_stop_queue+0x1b/0xa0 [nvme_rdma] nvme_rdma_teardown_io_queues.part.0+0xb0/0x1d0 [nvme_rdma] nvme_rdma_delete_ctrl+0x50/0x100 [nvme_rdma] nvme_do_delete_ctrl+0x149/0x158 [nvme_core] Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Tested-by: Yi Zhang Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 337a624a537ce..a7fea4cbacd75 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -638,6 +638,9 @@ static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) { + if (!test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) + return; + mutex_lock(&queue->queue_lock); if (test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) __nvme_rdma_stop_queue(queue); From d920abd1e7c4884f9ecd0749d1921b7ab19ddfbd Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 2 Oct 2023 13:54:28 +0300 Subject: [PATCH 2/6] nvmet-tcp: Fix a possible UAF in queue intialization setup From Alon: "Due to a logical bug in the NVMe-oF/TCP subsystem in the Linux kernel, a malicious user can cause a UAF and a double free, which may lead to RCE (may also lead to an LPE in case the attacker already has local privileges)." Hence, when a queue initialization fails after the ahash requests are allocated, it is guaranteed that the queue removal async work will be called, hence leave the deallocation to the queue removal. Also, be extra careful not to continue processing the socket, so set queue rcv_state to NVMET_TCP_RECV_ERR upon a socket error. Cc: stable@vger.kernel.org Reported-by: Alon Zahavi Tested-by: Alon Zahavi Signed-off-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index cd92d7ddf5ed1..197fc2ecb164d 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -372,6 +372,7 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status) { + queue->rcv_state = NVMET_TCP_RECV_ERR; if (status == -EPIPE || status == -ECONNRESET) kernel_sock_shutdown(queue->sock, SHUT_RDWR); else @@ -910,15 +911,11 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) iov.iov_len = sizeof(*icresp); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (ret < 0) - goto free_crypto; + return ret; /* queue removal will cleanup */ queue->state = NVMET_TCP_Q_LIVE; nvmet_prepare_receive_pdu(queue); return 0; -free_crypto: - if (queue->hdr_digest || queue->data_digest) - nvmet_tcp_free_crypto(queue); - return ret; } static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, From 4ae55a7dce04989f289d5c5c8c8e5c37adc36c71 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 4 Sep 2023 17:26:38 +0200 Subject: [PATCH 3/6] nvme-auth: use chap->s2 to indicate bidirectional authentication Commit 546dea18c999 ("nvme-auth: check chap ctrl_key once constructed") replaced the condition "if (ctrl->ctrl_key)" (indicating bidirectional auth) by "if (chap->ctrl_key)", because ctrl->ctrl_key is a resource shared with sysfs. But chap->ctrl_key is set in nvme_auth_process_dhchap_challenge() depending on the DHVLEN in the DH-HMAC-CHAP Challenge message received from the controller, and will thus be non-NULL for every DH-HMAC-CHAP exchange, even if unidirectional auth was requested. This will lead to a protocol violation by sending a Success2 message in the unidirectional case (per NVMe base spec 2.0, the authentication transaction ends after the Success1 message for unidirectional auth). Use chap->s2 instead, which is non-zero if and only if the host requested bi-directional authentication from the controller. Fixes: 546dea18c999 ("nvme-auth: check chap ctrl_key once constructed") Signed-off-by: Martin Wilck Reviewed-by: Daniel Wagner Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/auth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index daf5d144a8eaf..064592a5d546a 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -341,7 +341,7 @@ static int nvme_auth_process_dhchap_success1(struct nvme_ctrl *ctrl, struct nvmf_auth_dhchap_success1_data *data = chap->buf; size_t size = sizeof(*data); - if (chap->ctrl_key) + if (chap->s2) size += chap->hash_len; if (size > CHAP_BUF_SIZE) { @@ -825,7 +825,7 @@ static void nvme_queue_auth_work(struct work_struct *work) goto fail2; } - if (chap->ctrl_key) { + if (chap->s2) { /* DH-HMAC-CHAP Step 5: send success2 */ dev_dbg(ctrl->device, "%s: qid %d send success2\n", __func__, chap->qid); From 2b32c76e2b0154b98b9322ae7546b8156cd703e6 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 16 Oct 2023 13:12:47 -0700 Subject: [PATCH 4/6] nvme: sanitize metadata bounce buffer for reads User can request more metadata bytes than the device will write. Ensure kernel buffer is initialized so we're not leaking unsanitized memory on the copy-out. Fixes: 0b7f1f26f95a51a ("nvme: use the block layer for userspace passthrough metadata") Reviewed-by: Jens Axboe Reviewed-by: Christoph Hellwig Reviewed-by: Kanchan Joshi Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index d8ff796fd5f21..747c879e8982b 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -108,9 +108,13 @@ static void *nvme_add_user_metadata(struct request *req, void __user *ubuf, if (!buf) goto out; - ret = -EFAULT; - if ((req_op(req) == REQ_OP_DRV_OUT) && copy_from_user(buf, ubuf, len)) - goto out_free_meta; + if (req_op(req) == REQ_OP_DRV_OUT) { + ret = -EFAULT; + if (copy_from_user(buf, ubuf, len)) + goto out_free_meta; + } else { + memset(buf, 0, len); + } bip = bio_integrity_alloc(bio, GFP_KERNEL, 1); if (IS_ERR(bip)) { From f965b281fd872b2e18bd82dd97730db9834d0750 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Tue, 17 Oct 2023 10:28:45 +0200 Subject: [PATCH 5/6] nvmet-auth: complete a request only after freeing the dhchap pointers It may happen that the work to destroy a queue (for example nvmet_tcp_release_queue_work()) is started while an auth-send or auth-receive command is still completing. nvmet_sq_destroy() will block, waiting for all the references to the sq to be dropped, the last reference is then dropped when nvmet_req_complete() is called. When this happens, both nvmet_sq_destroy() and nvmet_execute_auth_send()/_receive() will free the dhchap pointers by calling nvmet_auth_sq_free(). Since there isn't any lock, the two threads may race against each other, causing double frees and memory corruptions, as reported by KASAN. Reproduced by stress blktests nvme/041 nvme/042 nvme/043 nvme nvme2: qid 0: authenticated with hash hmac(sha512) dhgroup ffdhe4096 ================================================================== BUG: KASAN: double-free in kfree+0xec/0x4b0 Call Trace: kfree+0xec/0x4b0 nvmet_auth_sq_free+0xe1/0x160 [nvmet] nvmet_execute_auth_send+0x482/0x16d0 [nvmet] process_one_work+0x8e5/0x1510 Allocated by task 191846: __kasan_kmalloc+0x81/0xa0 nvmet_auth_ctrl_sesskey+0xf6/0x380 [nvmet] nvmet_auth_reply+0x119/0x990 [nvmet] Freed by task 143270: kfree+0xec/0x4b0 nvmet_auth_sq_free+0xe1/0x160 [nvmet] process_one_work+0x8e5/0x1510 Fix this bug by calling nvmet_req_complete() only after freeing the pointers, so we will prevent the race by holding the sq reference. V2: remove redundant code Fixes: db1312dd9548 ("nvmet: implement basic In-Band Authentication") Signed-off-by: Maurizio Lombardi Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/fabrics-cmd-auth.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index 586458f765f17..1d9854484e2e8 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -333,19 +333,21 @@ void nvmet_execute_auth_send(struct nvmet_req *req) __func__, ctrl->cntlid, req->sq->qid, status, req->error_loc); req->cqe->result.u64 = 0; - nvmet_req_complete(req, status); if (req->sq->dhchap_step != NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2 && req->sq->dhchap_step != NVME_AUTH_DHCHAP_MESSAGE_FAILURE2) { unsigned long auth_expire_secs = ctrl->kato ? ctrl->kato : 120; mod_delayed_work(system_wq, &req->sq->auth_expired_work, auth_expire_secs * HZ); - return; + goto complete; } /* Final states, clear up variables */ nvmet_auth_sq_free(req->sq); if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE2) nvmet_ctrl_fatal_error(ctrl); + +complete: + nvmet_req_complete(req, status); } static int nvmet_auth_challenge(struct nvmet_req *req, void *d, int al) @@ -514,11 +516,12 @@ void nvmet_execute_auth_receive(struct nvmet_req *req) kfree(d); done: req->cqe->result.u64 = 0; - nvmet_req_complete(req, status); + if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2) nvmet_auth_sq_free(req->sq); else if (req->sq->dhchap_step == NVME_AUTH_DHCHAP_MESSAGE_FAILURE1) { nvmet_auth_sq_free(req->sq); nvmet_ctrl_fatal_error(ctrl); } + nvmet_req_complete(req, status); } From 5c3f4066462a5f6cac04d3dd81c9f551fabbc6c7 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 12 Oct 2023 11:13:51 -0700 Subject: [PATCH 6/6] nvme-pci: add BOGUS_NID for Intel 0a54 device These ones claim cmic and nmic capable, so need special consideration to ignore their duplicate identifiers. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217981 Reported-by: welsh@cassens.com Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 347cb5daebc3c..3f0c9ee09a12b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3329,7 +3329,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES | - NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + NVME_QUIRK_IGNORE_DEV_SUBNQN | + NVME_QUIRK_BOGUS_NID, }, { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */ .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, },