Skip to content

Commit

Permalink
NVMe: Add pci error handlers
Browse files Browse the repository at this point in the history
Requests enabling pcie aer support. Shuts down the controller on error
detected with io frozen state prior to requesting slot reset; resumes
controller after reset completes.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
  • Loading branch information
Keith Busch authored and Jens Axboe committed Dec 22, 2015
1 parent bbc758e commit a0a3408
Showing 1 changed file with 44 additions and 10 deletions.
54 changes: 44 additions & 10 deletions drivers/nvme/host/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
* more details.
*/

#include <linux/aer.h>
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
Expand Down Expand Up @@ -1670,6 +1671,8 @@ static int nvme_dev_map(struct nvme_dev *dev)
if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2))
dev->cmb = nvme_map_cmb(dev);

pci_enable_pcie_error_reporting(pdev);
pci_save_state(pdev);
return 0;

unmap:
Expand Down Expand Up @@ -1697,8 +1700,10 @@ static void nvme_dev_unmap(struct nvme_dev *dev)
pci_release_regions(pdev);
}

if (pci_is_enabled(pdev))
if (pci_is_enabled(pdev)) {
pci_disable_pcie_error_reporting(pdev);
pci_disable_device(pdev);
}
}

struct nvme_delq_ctx {
Expand Down Expand Up @@ -2225,13 +2230,6 @@ static void nvme_remove(struct pci_dev *pdev)
nvme_put_ctrl(&dev->ctrl);
}

/* These functions are yet to be implemented */
#define nvme_error_detected NULL
#define nvme_dump_registers NULL
#define nvme_link_reset NULL
#define nvme_slot_reset NULL
#define nvme_error_resume NULL

#ifdef CONFIG_PM_SLEEP
static int nvme_suspend(struct device *dev)
{
Expand All @@ -2254,10 +2252,46 @@ static int nvme_resume(struct device *dev)

static SIMPLE_DEV_PM_OPS(nvme_dev_pm_ops, nvme_suspend, nvme_resume);

static pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);

/*
* A frozen channel requires a reset. When detected, this method will
* shutdown the controller to quiesce. The controller will be restarted
* after the slot reset through driver's slot_reset callback.
*/
dev_warn(&pdev->dev, "error detected: state:%d\n", state);
switch (state) {
case pci_channel_io_normal:
return PCI_ERS_RESULT_CAN_RECOVER;
case pci_channel_io_frozen:
nvme_dev_shutdown(dev);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
return PCI_ERS_RESULT_DISCONNECT;
}
return PCI_ERS_RESULT_NEED_RESET;
}

static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
{
struct nvme_dev *dev = pci_get_drvdata(pdev);

dev_info(&pdev->dev, "restart after slot reset\n");
pci_restore_state(pdev);
queue_work(nvme_workq, &dev->reset_work);
return PCI_ERS_RESULT_RECOVERED;
}

static void nvme_error_resume(struct pci_dev *pdev)
{
pci_cleanup_aer_uncorrect_error_status(pdev);
}

static const struct pci_error_handlers nvme_err_handler = {
.error_detected = nvme_error_detected,
.mmio_enabled = nvme_dump_registers,
.link_reset = nvme_link_reset,
.slot_reset = nvme_slot_reset,
.resume = nvme_error_resume,
.reset_notify = nvme_reset_notify,
Expand Down

0 comments on commit a0a3408

Please sign in to comment.