From e5bb0988a5b622f58cc53dbdc044562229284d23 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Tue, 4 Jul 2023 09:36:56 +0200 Subject: [PATCH 01/10] nvme: add BOGUS_NID quirk for Samsung SM953 Add the quirk as SM953 is reporting bogus namespace ID. Link: https://bugzilla.kernel.org/show_bug.cgi?id=217593 Reported-by: Clemens Springsguth Tested-by: Clemens Springsguth Signed-off-by: Pankaj Raghav Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 72725729cb6c3..8754b4a5c6844 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3396,6 +3396,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x144d, 0xa802), /* Samsung SM953 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, { PCI_DEVICE(0x1cc4, 0x6303), /* UMIS RPJTJ512MGE1QDY 512G */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1cc4, 0x6302), /* UMIS RPJTJ256MGE1QDY 256G */ From 9bcf156f5897e91e21be54960b46774f0682afad Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Fri, 7 Jul 2023 09:32:22 +0900 Subject: [PATCH 02/10] nvmet: use PAGE_SECTORS_SHIFT Replace occurences of the pattern "PAGE_SHIFT - 9" in the passthru and loop targets with PAGE_SECTORS_SHIFT. Signed-off-by: Damien Le Moal Reviewed-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/loop.c | 2 +- drivers/nvme/target/passthru.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index f2d24b2d992f8..48d5df054cd02 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -373,7 +373,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) goto out_cleanup_tagset; ctrl->ctrl.max_hw_sectors = - (NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9); + (NVME_LOOP_MAX_SEGMENTS - 1) << PAGE_SECTORS_SHIFT; nvme_unquiesce_admin_queue(&ctrl->ctrl); diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index 71a9c1cc57f59..9fe07d7efa96c 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -102,14 +102,14 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req) * which depends on the host's memory fragementation. To solve this, * ensure mdts is limited to the pages equal to the number of segments. */ - max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9), + max_hw_sectors = min_not_zero(pctrl->max_segments << PAGE_SECTORS_SHIFT, pctrl->max_hw_sectors); /* * nvmet_passthru_map_sg is limitted to using a single bio so limit * the mdts based on BIO_MAX_VECS as well */ - max_hw_sectors = min_not_zero(BIO_MAX_VECS << (PAGE_SHIFT - 9), + max_hw_sectors = min_not_zero(BIO_MAX_VECS << PAGE_SECTORS_SHIFT, max_hw_sectors); page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; From b938e6603660652dc3db66d3c915fbfed3bce21d Mon Sep 17 00:00:00 2001 From: Ankit Kumar Date: Fri, 23 Jun 2023 18:08:05 +0530 Subject: [PATCH 03/10] nvme: fix the NVME_ID_NS_NVM_STS_MASK definition As per NVMe command set specification 1.0c Storage tag size is 7 bits. Fixes: 4020aad85c67 ("nvme: add support for enhanced metadata") Signed-off-by: Ankit Kumar Reviewed-by: Kanchan Joshi Signed-off-by: Keith Busch --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 182b6d614eb19..26dd3f859d9d7 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -473,7 +473,7 @@ struct nvme_id_ns_nvm { }; enum { - NVME_ID_NS_NVM_STS_MASK = 0x3f, + NVME_ID_NS_NVM_STS_MASK = 0x7f, NVME_ID_NS_NVM_GUARD_SHIFT = 7, NVME_ID_NS_NVM_GUARD_MASK = 0x3, }; From b718ae835bd5de891ff6fefa290940b7613d2b07 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 12 Jul 2023 07:54:59 -0700 Subject: [PATCH 04/10] nvme: warn only once for legacy uuid attribute Report the legacy fallback behavior for uuid attributes just once instead of logging repeated warnings for the same condition every time the attribute is read. The old behavior is too spamy on the kernel logs. Cc: Johannes Thumshirn Reported-by: Breno Leitao Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 45e91811f905c..212e1b05d2984 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -92,7 +92,7 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, * we have no UUID set */ if (uuid_is_null(&ids->uuid)) { - dev_warn_ratelimited(dev, + dev_warn_once(dev, "No UUID available providing old NGUID\n"); return sysfs_emit(buf, "%pU\n", ids->nguid); } From 733ba346d941d37e0814bac37b6a5c188ac3c9b2 Mon Sep 17 00:00:00 2001 From: Minjie Du Date: Wed, 12 Jul 2023 19:18:37 +0800 Subject: [PATCH 05/10] nvme: fix parameter check in nvme_fault_inject_init() Make IS_ERR() judge the debugfs_create_dir() function return. Signed-off-by: Minjie Du Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/fault_inject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c index 83d2e6860d388..1ba10a5c656d1 100644 --- a/drivers/nvme/host/fault_inject.c +++ b/drivers/nvme/host/fault_inject.c @@ -27,7 +27,7 @@ void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj, /* create debugfs directory and attribute */ parent = debugfs_create_dir(dev_name, NULL); - if (!parent) { + if (IS_ERR(parent)) { pr_warn("%s: failed to create debugfs directory\n", dev_name); return; } From 60e445bdfccbb90c1bc13a92e128e50ba4357b3c Mon Sep 17 00:00:00 2001 From: Michael Liang Date: Fri, 7 Jul 2023 15:21:56 -0600 Subject: [PATCH 06/10] nvme-fc: return non-zero status code when fails to create association Return non-zero status code(-EIO) when needed, so re-connecting or deleting controller will be triggered properly. Signed-off-by: Michael Liang Reviewed-by: Caleb Sander Reviewed-by: James Smart Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 691f2df574ce9..ad9336343e012 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2551,6 +2551,9 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { __nvme_fc_abort_outstanding_ios(ctrl, true); set_bit(ASSOC_FAILED, &ctrl->flags); + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: transport error during (re)connect\n", + ctrl->cnum); return; } @@ -3110,7 +3113,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) */ ret = nvme_enable_ctrl(&ctrl->ctrl); - if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) + if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) + ret = -EIO; + if (ret) goto out_disconnect_admin_queue; ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments; @@ -3120,7 +3125,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) nvme_unquiesce_admin_queue(&ctrl->ctrl); ret = nvme_init_ctrl_finish(&ctrl->ctrl, false); - if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) + if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) + ret = -EIO; + if (ret) goto out_disconnect_admin_queue; /* sanity checks */ @@ -3165,7 +3172,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) else ret = nvme_fc_recreate_io_queues(ctrl); } - if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) + if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) + ret = -EIO; + if (ret) goto out_term_aen_ops; changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -3180,6 +3189,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) out_term_aen_ops: nvme_fc_term_aen_ops(ctrl); out_disconnect_admin_queue: + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n", + ctrl->cnum, ctrl->association_id, ret); /* send a Disconnect(association) LS to fc-nvme target */ nvme_fc_xmt_disconnect_assoc(ctrl); spin_lock_irqsave(&ctrl->lock, flags); From ee6fdc5055e916b1dd497f11260d4901c4c1e55e Mon Sep 17 00:00:00 2001 From: Michael Liang Date: Fri, 7 Jul 2023 15:21:57 -0600 Subject: [PATCH 07/10] nvme-fc: fix race between error recovery and creating association There is a small race window between nvme-fc association creation and error recovery. Fix this race condition by protecting accessing to controller state and ASSOC_FAILED flag under nvme-fc controller lock. Signed-off-by: Michael Liang Reviewed-by: Caleb Sander Reviewed-by: James Smart Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index ad9336343e012..1cd2bf82319a9 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2548,17 +2548,24 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) * the controller. Abort any ios on the association and let the * create_association error path resolve things. */ - if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { - __nvme_fc_abort_outstanding_ios(ctrl, true); + enum nvme_ctrl_state state; + unsigned long flags; + + spin_lock_irqsave(&ctrl->lock, flags); + state = ctrl->ctrl.state; + if (state == NVME_CTRL_CONNECTING) { set_bit(ASSOC_FAILED, &ctrl->flags); + spin_unlock_irqrestore(&ctrl->lock, flags); + __nvme_fc_abort_outstanding_ios(ctrl, true); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: transport error during (re)connect\n", ctrl->cnum); return; } + spin_unlock_irqrestore(&ctrl->lock, flags); /* Otherwise, only proceed if in LIVE state - e.g. on first error */ - if (ctrl->ctrl.state != NVME_CTRL_LIVE) + if (state != NVME_CTRL_LIVE) return; dev_warn(ctrl->ctrl.device, @@ -3172,12 +3179,16 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) else ret = nvme_fc_recreate_io_queues(ctrl); } + + spin_lock_irqsave(&ctrl->lock, flags); if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) ret = -EIO; - if (ret) + if (ret) { + spin_unlock_irqrestore(&ctrl->lock, flags); goto out_term_aen_ops; - + } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + spin_unlock_irqrestore(&ctrl->lock, flags); ctrl->ctrl.nr_reconnects = 0; From 71a5bb153be104d9175636e95166fd5e37466649 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 12 Jul 2023 08:36:47 -0700 Subject: [PATCH 08/10] nvme: ensure disabling pairs with unquiesce If any error handling that disables the controller fails to queue the reset work, like if the state changed to disconnected inbetween, then the failed teardown needs to unquiesce the queues since it's no longer paired with reset_work. Just make sure that the controller can be put into a resetting state prior to starting the disable so that no other handling can change the queue states while recovery is happening. Reported-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8754b4a5c6844..8e7dbe0ab8904 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1298,9 +1298,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) */ if (nvme_should_reset(dev, csts)) { nvme_warn_reset(dev, csts); - nvme_dev_disable(dev, false); - nvme_reset_ctrl(&dev->ctrl); - return BLK_EH_DONE; + goto disable; } /* @@ -1351,10 +1349,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) "I/O %d QID %d timeout, reset controller\n", req->tag, nvmeq->qid); nvme_req(req)->flags |= NVME_REQ_CANCELLED; - nvme_dev_disable(dev, false); - nvme_reset_ctrl(&dev->ctrl); - - return BLK_EH_DONE; + goto disable; } if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) { @@ -1391,6 +1386,15 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) * as the device then is in a faulty state. */ return BLK_EH_RESET_TIMER; + +disable: + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) + return BLK_EH_DONE; + + nvme_dev_disable(dev, false); + if (nvme_try_sched_reset(&dev->ctrl)) + nvme_unquiesce_io_queues(&dev->ctrl); + return BLK_EH_DONE; } static void nvme_free_queue(struct nvme_queue *nvmeq) @@ -3278,6 +3282,10 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev, case pci_channel_io_frozen: dev_warn(dev->ctrl.device, "frozen state error detected, reset controller\n"); + if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) { + nvme_dev_disable(dev, true); + return PCI_ERS_RESULT_DISCONNECT; + } nvme_dev_disable(dev, false); return PCI_ERS_RESULT_NEED_RESET; case pci_channel_io_perm_failure: @@ -3294,7 +3302,8 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - nvme_reset_ctrl(&dev->ctrl); + if (!nvme_try_sched_reset(&dev->ctrl)) + nvme_unquiesce_io_queues(&dev->ctrl); return PCI_ERS_RESULT_RECOVERED; } From ac522fc6c3165fd0daa2f8da7e07d5f800586daa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Jul 2023 15:30:42 +0200 Subject: [PATCH 09/10] nvme: don't reject probe due to duplicate IDs for single-ported PCIe devices While duplicate IDs are still very harmful, including the potential to easily see changing devices in /dev/disk/by-id, it turn out they are extremely common for cheap end user NVMe devices. Relax our check for them for so that it doesn't reject the probe on single-ported PCIe devices, but prints a big warning instead. In doubt we'd still like to see quirk entries to disable the potential for changing supposed stable device identifier links, but this will at least allow users how have two (or more) of these devices to use them without having to manually add a new PCI ID entry with the quirk through sysfs or by patching the kernel. Fixes: 2079f41ec6ff ("nvme: check that EUI/GUID/UUID are globally unique") Cc: stable@vger.kernel.org # 6.0+ Co-developed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 36 +++++++++++++++++++++++++++++++++--- 1 file changed, 33 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 47d7ba2827ff2..37b6fa7466620 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3431,10 +3431,40 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) ret = nvme_global_check_duplicate_ids(ctrl->subsys, &info->ids); if (ret) { - dev_err(ctrl->device, - "globally duplicate IDs for nsid %d\n", info->nsid); + /* + * We've found two different namespaces on two different + * subsystems that report the same ID. This is pretty nasty + * for anything that actually requires unique device + * identification. In the kernel we need this for multipathing, + * and in user space the /dev/disk/by-id/ links rely on it. + * + * If the device also claims to be multi-path capable back off + * here now and refuse the probe the second device as this is a + * recipe for data corruption. If not this is probably a + * cheap consumer device if on the PCIe bus, so let the user + * proceed and use the shiny toy, but warn that with changing + * probing order (which due to our async probing could just be + * device taking longer to startup) the other device could show + * up at any time. + */ nvme_print_device_info(ctrl); - return ret; + if ((ns->ctrl->ops->flags & NVME_F_FABRICS) || /* !PCIe */ + ((ns->ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) && + info->is_shared)) { + dev_err(ctrl->device, + "ignoring nsid %d because of duplicate IDs\n", + info->nsid); + return ret; + } + + dev_err(ctrl->device, + "clearing duplicate IDs for nsid %d\n", info->nsid); + dev_err(ctrl->device, + "use of /dev/disk/by-id/ may cause data corruption\n"); + memset(&info->ids.nguid, 0, sizeof(info->ids.nguid)); + memset(&info->ids.uuid, 0, sizeof(info->ids.uuid)); + memset(&info->ids.eui64, 0, sizeof(info->ids.eui64)); + ctrl->quirks |= NVME_QUIRK_BOGUS_NID; } mutex_lock(&ctrl->subsys->lock); From b8f6446b6853768cb99e7c201bddce69ca60c15e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 13 Jul 2023 17:26:20 +0800 Subject: [PATCH 10/10] nvme-pci: fix DMA direction of unmapping integrity data DMA direction should be taken in dma_unmap_page() for unmapping integrity data. Fix this DMA direction, and reported in Guangwu's test. Reported-by: Guangwu Zhang Fixes: 4aedb705437f ("nvme-pci: split metadata handling from nvme_map_data / nvme_unmap_data") Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8e7dbe0ab8904..baf69af7ea78e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -967,7 +967,7 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req) struct nvme_iod *iod = blk_mq_rq_to_pdu(req); dma_unmap_page(dev->dev, iod->meta_dma, - rq_integrity_vec(req)->bv_len, rq_data_dir(req)); + rq_integrity_vec(req)->bv_len, rq_dma_dir(req)); } if (blk_rq_nr_phys_segments(req))