From e5bb0988a5b622f58cc53dbdc044562229284d23 Mon Sep 17 00:00:00 2001
From: Pankaj Raghav
Date: Tue, 4 Jul 2023 09:36:56 +0200
Subject: [PATCH 01/10] nvme: add BOGUS_NID quirk for Samsung SM953
Add the quirk as SM953 is reporting bogus namespace ID.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217593
Reported-by: Clemens Springsguth
Tested-by: Clemens Springsguth
Signed-off-by: Pankaj Raghav
Signed-off-by: Keith Busch
---
drivers/nvme/host/pci.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 72725729cb6c3..8754b4a5c6844 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3396,6 +3396,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x144d, 0xa802), /* Samsung SM953 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1cc4, 0x6303), /* UMIS RPJTJ512MGE1QDY 512G */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1cc4, 0x6302), /* UMIS RPJTJ256MGE1QDY 256G */
From 9bcf156f5897e91e21be54960b46774f0682afad Mon Sep 17 00:00:00 2001
From: Damien Le Moal
Date: Fri, 7 Jul 2023 09:32:22 +0900
Subject: [PATCH 02/10] nvmet: use PAGE_SECTORS_SHIFT
Replace occurences of the pattern "PAGE_SHIFT - 9" in the passthru and
loop targets with PAGE_SECTORS_SHIFT.
Signed-off-by: Damien Le Moal
Reviewed-by: Chaitanya Kulkarni
Reviewed-by: Sagi Grimberg
Reviewed-by: Christoph Hellwig
Signed-off-by: Keith Busch
---
drivers/nvme/target/loop.c | 2 +-
drivers/nvme/target/passthru.c | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index f2d24b2d992f8..48d5df054cd02 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -373,7 +373,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
goto out_cleanup_tagset;
ctrl->ctrl.max_hw_sectors =
- (NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9);
+ (NVME_LOOP_MAX_SEGMENTS - 1) << PAGE_SECTORS_SHIFT;
nvme_unquiesce_admin_queue(&ctrl->ctrl);
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 71a9c1cc57f59..9fe07d7efa96c 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -102,14 +102,14 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
* which depends on the host's memory fragementation. To solve this,
* ensure mdts is limited to the pages equal to the number of segments.
*/
- max_hw_sectors = min_not_zero(pctrl->max_segments << (PAGE_SHIFT - 9),
+ max_hw_sectors = min_not_zero(pctrl->max_segments << PAGE_SECTORS_SHIFT,
pctrl->max_hw_sectors);
/*
* nvmet_passthru_map_sg is limitted to using a single bio so limit
* the mdts based on BIO_MAX_VECS as well
*/
- max_hw_sectors = min_not_zero(BIO_MAX_VECS << (PAGE_SHIFT - 9),
+ max_hw_sectors = min_not_zero(BIO_MAX_VECS << PAGE_SECTORS_SHIFT,
max_hw_sectors);
page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
From b938e6603660652dc3db66d3c915fbfed3bce21d Mon Sep 17 00:00:00 2001
From: Ankit Kumar
Date: Fri, 23 Jun 2023 18:08:05 +0530
Subject: [PATCH 03/10] nvme: fix the NVME_ID_NS_NVM_STS_MASK definition
As per NVMe command set specification 1.0c Storage tag size is 7 bits.
Fixes: 4020aad85c67 ("nvme: add support for enhanced metadata")
Signed-off-by: Ankit Kumar
Reviewed-by: Kanchan Joshi
Signed-off-by: Keith Busch
---
include/linux/nvme.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 182b6d614eb19..26dd3f859d9d7 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -473,7 +473,7 @@ struct nvme_id_ns_nvm {
};
enum {
- NVME_ID_NS_NVM_STS_MASK = 0x3f,
+ NVME_ID_NS_NVM_STS_MASK = 0x7f,
NVME_ID_NS_NVM_GUARD_SHIFT = 7,
NVME_ID_NS_NVM_GUARD_MASK = 0x3,
};
From b718ae835bd5de891ff6fefa290940b7613d2b07 Mon Sep 17 00:00:00 2001
From: Keith Busch
Date: Wed, 12 Jul 2023 07:54:59 -0700
Subject: [PATCH 04/10] nvme: warn only once for legacy uuid attribute
Report the legacy fallback behavior for uuid attributes just once
instead of logging repeated warnings for the same condition every time
the attribute is read. The old behavior is too spamy on the kernel logs.
Cc: Johannes Thumshirn
Reported-by: Breno Leitao
Reviewed-by: Christoph Hellwig
Signed-off-by: Keith Busch
---
drivers/nvme/host/sysfs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index 45e91811f905c..212e1b05d2984 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -92,7 +92,7 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
* we have no UUID set
*/
if (uuid_is_null(&ids->uuid)) {
- dev_warn_ratelimited(dev,
+ dev_warn_once(dev,
"No UUID available providing old NGUID\n");
return sysfs_emit(buf, "%pU\n", ids->nguid);
}
From 733ba346d941d37e0814bac37b6a5c188ac3c9b2 Mon Sep 17 00:00:00 2001
From: Minjie Du
Date: Wed, 12 Jul 2023 19:18:37 +0800
Subject: [PATCH 05/10] nvme: fix parameter check in nvme_fault_inject_init()
Make IS_ERR() judge the debugfs_create_dir() function return.
Signed-off-by: Minjie Du
Reviewed-by: Christoph Hellwig
Signed-off-by: Keith Busch
---
drivers/nvme/host/fault_inject.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/nvme/host/fault_inject.c b/drivers/nvme/host/fault_inject.c
index 83d2e6860d388..1ba10a5c656d1 100644
--- a/drivers/nvme/host/fault_inject.c
+++ b/drivers/nvme/host/fault_inject.c
@@ -27,7 +27,7 @@ void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
/* create debugfs directory and attribute */
parent = debugfs_create_dir(dev_name, NULL);
- if (!parent) {
+ if (IS_ERR(parent)) {
pr_warn("%s: failed to create debugfs directory\n", dev_name);
return;
}
From 60e445bdfccbb90c1bc13a92e128e50ba4357b3c Mon Sep 17 00:00:00 2001
From: Michael Liang
Date: Fri, 7 Jul 2023 15:21:56 -0600
Subject: [PATCH 06/10] nvme-fc: return non-zero status code when fails to
create association
Return non-zero status code(-EIO) when needed, so re-connecting or
deleting controller will be triggered properly.
Signed-off-by: Michael Liang
Reviewed-by: Caleb Sander
Reviewed-by: James Smart
Signed-off-by: Keith Busch
---
drivers/nvme/host/fc.c | 18 +++++++++++++++---
1 file changed, 15 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 691f2df574ce9..ad9336343e012 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2551,6 +2551,9 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
__nvme_fc_abort_outstanding_ios(ctrl, true);
set_bit(ASSOC_FAILED, &ctrl->flags);
+ dev_warn(ctrl->ctrl.device,
+ "NVME-FC{%d}: transport error during (re)connect\n",
+ ctrl->cnum);
return;
}
@@ -3110,7 +3113,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
*/
ret = nvme_enable_ctrl(&ctrl->ctrl);
- if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
+ if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
+ ret = -EIO;
+ if (ret)
goto out_disconnect_admin_queue;
ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments;
@@ -3120,7 +3125,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
nvme_unquiesce_admin_queue(&ctrl->ctrl);
ret = nvme_init_ctrl_finish(&ctrl->ctrl, false);
- if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
+ if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
+ ret = -EIO;
+ if (ret)
goto out_disconnect_admin_queue;
/* sanity checks */
@@ -3165,7 +3172,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
else
ret = nvme_fc_recreate_io_queues(ctrl);
}
- if (ret || test_bit(ASSOC_FAILED, &ctrl->flags))
+ if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
+ ret = -EIO;
+ if (ret)
goto out_term_aen_ops;
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
@@ -3180,6 +3189,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
out_term_aen_ops:
nvme_fc_term_aen_ops(ctrl);
out_disconnect_admin_queue:
+ dev_warn(ctrl->ctrl.device,
+ "NVME-FC{%d}: create_assoc failed, assoc_id %llx ret %d\n",
+ ctrl->cnum, ctrl->association_id, ret);
/* send a Disconnect(association) LS to fc-nvme target */
nvme_fc_xmt_disconnect_assoc(ctrl);
spin_lock_irqsave(&ctrl->lock, flags);
From ee6fdc5055e916b1dd497f11260d4901c4c1e55e Mon Sep 17 00:00:00 2001
From: Michael Liang
Date: Fri, 7 Jul 2023 15:21:57 -0600
Subject: [PATCH 07/10] nvme-fc: fix race between error recovery and creating
association
There is a small race window between nvme-fc association creation and error
recovery. Fix this race condition by protecting accessing to controller
state and ASSOC_FAILED flag under nvme-fc controller lock.
Signed-off-by: Michael Liang
Reviewed-by: Caleb Sander
Reviewed-by: James Smart
Signed-off-by: Keith Busch
---
drivers/nvme/host/fc.c | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index ad9336343e012..1cd2bf82319a9 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2548,17 +2548,24 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg)
* the controller. Abort any ios on the association and let the
* create_association error path resolve things.
*/
- if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
- __nvme_fc_abort_outstanding_ios(ctrl, true);
+ enum nvme_ctrl_state state;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctrl->lock, flags);
+ state = ctrl->ctrl.state;
+ if (state == NVME_CTRL_CONNECTING) {
set_bit(ASSOC_FAILED, &ctrl->flags);
+ spin_unlock_irqrestore(&ctrl->lock, flags);
+ __nvme_fc_abort_outstanding_ios(ctrl, true);
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: transport error during (re)connect\n",
ctrl->cnum);
return;
}
+ spin_unlock_irqrestore(&ctrl->lock, flags);
/* Otherwise, only proceed if in LIVE state - e.g. on first error */
- if (ctrl->ctrl.state != NVME_CTRL_LIVE)
+ if (state != NVME_CTRL_LIVE)
return;
dev_warn(ctrl->ctrl.device,
@@ -3172,12 +3179,16 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
else
ret = nvme_fc_recreate_io_queues(ctrl);
}
+
+ spin_lock_irqsave(&ctrl->lock, flags);
if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags))
ret = -EIO;
- if (ret)
+ if (ret) {
+ spin_unlock_irqrestore(&ctrl->lock, flags);
goto out_term_aen_ops;
-
+ }
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
+ spin_unlock_irqrestore(&ctrl->lock, flags);
ctrl->ctrl.nr_reconnects = 0;
From 71a5bb153be104d9175636e95166fd5e37466649 Mon Sep 17 00:00:00 2001
From: Keith Busch
Date: Wed, 12 Jul 2023 08:36:47 -0700
Subject: [PATCH 08/10] nvme: ensure disabling pairs with unquiesce
If any error handling that disables the controller fails to queue the
reset work, like if the state changed to disconnected inbetween, then
the failed teardown needs to unquiesce the queues since it's no longer
paired with reset_work. Just make sure that the controller can be put
into a resetting state prior to starting the disable so that no other
handling can change the queue states while recovery is happening.
Reported-by: Ming Lei
Reviewed-by: Christoph Hellwig
Signed-off-by: Keith Busch
---
drivers/nvme/host/pci.c | 25 +++++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8754b4a5c6844..8e7dbe0ab8904 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1298,9 +1298,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
*/
if (nvme_should_reset(dev, csts)) {
nvme_warn_reset(dev, csts);
- nvme_dev_disable(dev, false);
- nvme_reset_ctrl(&dev->ctrl);
- return BLK_EH_DONE;
+ goto disable;
}
/*
@@ -1351,10 +1349,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
- nvme_dev_disable(dev, false);
- nvme_reset_ctrl(&dev->ctrl);
-
- return BLK_EH_DONE;
+ goto disable;
}
if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
@@ -1391,6 +1386,15 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
* as the device then is in a faulty state.
*/
return BLK_EH_RESET_TIMER;
+
+disable:
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
+ return BLK_EH_DONE;
+
+ nvme_dev_disable(dev, false);
+ if (nvme_try_sched_reset(&dev->ctrl))
+ nvme_unquiesce_io_queues(&dev->ctrl);
+ return BLK_EH_DONE;
}
static void nvme_free_queue(struct nvme_queue *nvmeq)
@@ -3278,6 +3282,10 @@ static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
case pci_channel_io_frozen:
dev_warn(dev->ctrl.device,
"frozen state error detected, reset controller\n");
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) {
+ nvme_dev_disable(dev, true);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
nvme_dev_disable(dev, false);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
@@ -3294,7 +3302,8 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
dev_info(dev->ctrl.device, "restart after slot reset\n");
pci_restore_state(pdev);
- nvme_reset_ctrl(&dev->ctrl);
+ if (!nvme_try_sched_reset(&dev->ctrl))
+ nvme_unquiesce_io_queues(&dev->ctrl);
return PCI_ERS_RESULT_RECOVERED;
}
From ac522fc6c3165fd0daa2f8da7e07d5f800586daa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig
Date: Thu, 13 Jul 2023 15:30:42 +0200
Subject: [PATCH 09/10] nvme: don't reject probe due to duplicate IDs for
single-ported PCIe devices
While duplicate IDs are still very harmful, including the potential to easily
see changing devices in /dev/disk/by-id, it turn out they are extremely
common for cheap end user NVMe devices.
Relax our check for them for so that it doesn't reject the probe on
single-ported PCIe devices, but prints a big warning instead. In doubt
we'd still like to see quirk entries to disable the potential for
changing supposed stable device identifier links, but this will at least
allow users how have two (or more) of these devices to use them without
having to manually add a new PCI ID entry with the quirk through sysfs or
by patching the kernel.
Fixes: 2079f41ec6ff ("nvme: check that EUI/GUID/UUID are globally unique")
Cc: stable@vger.kernel.org # 6.0+
Co-developed-by: Sagi Grimberg
Signed-off-by: Christoph Hellwig
Signed-off-by: Keith Busch
---
drivers/nvme/host/core.c | 36 +++++++++++++++++++++++++++++++++---
1 file changed, 33 insertions(+), 3 deletions(-)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 47d7ba2827ff2..37b6fa7466620 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3431,10 +3431,40 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
ret = nvme_global_check_duplicate_ids(ctrl->subsys, &info->ids);
if (ret) {
- dev_err(ctrl->device,
- "globally duplicate IDs for nsid %d\n", info->nsid);
+ /*
+ * We've found two different namespaces on two different
+ * subsystems that report the same ID. This is pretty nasty
+ * for anything that actually requires unique device
+ * identification. In the kernel we need this for multipathing,
+ * and in user space the /dev/disk/by-id/ links rely on it.
+ *
+ * If the device also claims to be multi-path capable back off
+ * here now and refuse the probe the second device as this is a
+ * recipe for data corruption. If not this is probably a
+ * cheap consumer device if on the PCIe bus, so let the user
+ * proceed and use the shiny toy, but warn that with changing
+ * probing order (which due to our async probing could just be
+ * device taking longer to startup) the other device could show
+ * up at any time.
+ */
nvme_print_device_info(ctrl);
- return ret;
+ if ((ns->ctrl->ops->flags & NVME_F_FABRICS) || /* !PCIe */
+ ((ns->ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) &&
+ info->is_shared)) {
+ dev_err(ctrl->device,
+ "ignoring nsid %d because of duplicate IDs\n",
+ info->nsid);
+ return ret;
+ }
+
+ dev_err(ctrl->device,
+ "clearing duplicate IDs for nsid %d\n", info->nsid);
+ dev_err(ctrl->device,
+ "use of /dev/disk/by-id/ may cause data corruption\n");
+ memset(&info->ids.nguid, 0, sizeof(info->ids.nguid));
+ memset(&info->ids.uuid, 0, sizeof(info->ids.uuid));
+ memset(&info->ids.eui64, 0, sizeof(info->ids.eui64));
+ ctrl->quirks |= NVME_QUIRK_BOGUS_NID;
}
mutex_lock(&ctrl->subsys->lock);
From b8f6446b6853768cb99e7c201bddce69ca60c15e Mon Sep 17 00:00:00 2001
From: Ming Lei
Date: Thu, 13 Jul 2023 17:26:20 +0800
Subject: [PATCH 10/10] nvme-pci: fix DMA direction of unmapping integrity data
DMA direction should be taken in dma_unmap_page() for unmapping integrity
data.
Fix this DMA direction, and reported in Guangwu's test.
Reported-by: Guangwu Zhang
Fixes: 4aedb705437f ("nvme-pci: split metadata handling from nvme_map_data / nvme_unmap_data")
Signed-off-by: Ming Lei
Reviewed-by: Christoph Hellwig
Signed-off-by: Keith Busch
---
drivers/nvme/host/pci.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8e7dbe0ab8904..baf69af7ea78e 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -967,7 +967,7 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req)
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
dma_unmap_page(dev->dev, iod->meta_dma,
- rq_integrity_vec(req)->bv_len, rq_data_dir(req));
+ rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
}
if (blk_rq_nr_phys_segments(req))