Skip to content

Commit

Permalink
sfc: Add AER and EEH support for Siena
Browse files Browse the repository at this point in the history
The Linux side of EEH is triggered by MMIO reads, but this
driver's data path does not issue any MMIO reads (except in
legacy interrupt mode).  Therefore add a monitor function
to poll EEH periodically.

When preparing to reset the device based on our own error
detection, also poll EEH and defer to its recovery mechanism
if appropriate.

[bwh: Use a separate condition for the initial link poll; fix some
 style errors]
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
  • Loading branch information
Alexandre Rames authored and Ben Hutchings committed Mar 7, 2013
1 parent 634ab72 commit 626950d
Show file tree
Hide file tree
Showing 4 changed files with 216 additions and 26 deletions.
207 changes: 185 additions & 22 deletions drivers/net/ethernet/sfc/efx.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@
#include <linux/ethtool.h>
#include <linux/topology.h>
#include <linux/gfp.h>
#include <linux/pci.h>
#include <linux/cpu_rmap.h>
#include <linux/aer.h>
#include "net_driver.h"
#include "efx.h"
#include "nic.h"
Expand Down Expand Up @@ -71,17 +73,19 @@ const char *const efx_loopback_mode_names[] = {

const unsigned int efx_reset_type_max = RESET_TYPE_MAX;
const char *const efx_reset_type_names[] = {
[RESET_TYPE_INVISIBLE] = "INVISIBLE",
[RESET_TYPE_ALL] = "ALL",
[RESET_TYPE_WORLD] = "WORLD",
[RESET_TYPE_DISABLE] = "DISABLE",
[RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG",
[RESET_TYPE_INT_ERROR] = "INT_ERROR",
[RESET_TYPE_RX_RECOVERY] = "RX_RECOVERY",
[RESET_TYPE_RX_DESC_FETCH] = "RX_DESC_FETCH",
[RESET_TYPE_TX_DESC_FETCH] = "TX_DESC_FETCH",
[RESET_TYPE_TX_SKIP] = "TX_SKIP",
[RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
[RESET_TYPE_INVISIBLE] = "INVISIBLE",
[RESET_TYPE_ALL] = "ALL",
[RESET_TYPE_RECOVER_OR_ALL] = "RECOVER_OR_ALL",
[RESET_TYPE_WORLD] = "WORLD",
[RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
[RESET_TYPE_DISABLE] = "DISABLE",
[RESET_TYPE_TX_WATCHDOG] = "TX_WATCHDOG",
[RESET_TYPE_INT_ERROR] = "INT_ERROR",
[RESET_TYPE_RX_RECOVERY] = "RX_RECOVERY",
[RESET_TYPE_RX_DESC_FETCH] = "RX_DESC_FETCH",
[RESET_TYPE_TX_DESC_FETCH] = "TX_DESC_FETCH",
[RESET_TYPE_TX_SKIP] = "TX_SKIP",
[RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
};

#define EFX_MAX_MTU (9 * 1024)
Expand Down Expand Up @@ -117,9 +121,12 @@ MODULE_PARM_DESC(separate_tx_channels,
static int napi_weight = 64;

/* This is the time (in jiffies) between invocations of the hardware
* monitor. On Falcon-based NICs, this will:
* monitor.
* On Falcon-based NICs, this will:
* - Check the on-board hardware monitor;
* - Poll the link state and reconfigure the hardware as necessary.
* On Siena-based NICs for power systems with EEH support, this will give EEH a
* chance to start.
*/
static unsigned int efx_monitor_interval = 1 * HZ;

Expand Down Expand Up @@ -203,13 +210,14 @@ static void efx_stop_all(struct efx_nic *efx);
#define EFX_ASSERT_RESET_SERIALISED(efx) \
do { \
if ((efx->state == STATE_READY) || \
(efx->state == STATE_RECOVERY) || \
(efx->state == STATE_DISABLED)) \
ASSERT_RTNL(); \
} while (0)

static int efx_check_disabled(struct efx_nic *efx)
{
if (efx->state == STATE_DISABLED) {
if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
netif_err(efx, drv, efx->net_dev,
"device is disabled due to earlier errors\n");
return -EIO;
Expand Down Expand Up @@ -677,7 +685,7 @@ static void efx_stop_datapath(struct efx_nic *efx)
BUG_ON(efx->port_enabled);

/* Only perform flush if dma is enabled */
if (dev->is_busmaster) {
if (dev->is_busmaster && efx->state != STATE_RECOVERY) {
rc = efx_nic_flush_queues(efx);

if (rc && EFX_WORKAROUND_7803(efx)) {
Expand Down Expand Up @@ -1590,13 +1598,15 @@ static void efx_start_all(struct efx_nic *efx)
efx_start_port(efx);
efx_start_datapath(efx);

/* Start the hardware monitor if there is one. Otherwise (we're link
* event driven), we have to poll the PHY because after an event queue
* flush, we could have a missed a link state change */
if (efx->type->monitor != NULL) {
/* Start the hardware monitor if there is one */
if (efx->type->monitor != NULL)
queue_delayed_work(efx->workqueue, &efx->monitor_work,
efx_monitor_interval);
} else {

/* If link state detection is normally event-driven, we have
* to poll now because we could have missed a change
*/
if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) {
mutex_lock(&efx->mac_lock);
if (efx->phy_op->poll(efx))
efx_link_status_changed(efx);
Expand Down Expand Up @@ -2303,7 +2313,9 @@ int efx_reset(struct efx_nic *efx, enum reset_type method)

out:
/* Leave device stopped if necessary */
disabled = rc || method == RESET_TYPE_DISABLE;
disabled = rc ||
method == RESET_TYPE_DISABLE ||
method == RESET_TYPE_RECOVER_OR_DISABLE;
rc2 = efx_reset_up(efx, method, !disabled);
if (rc2) {
disabled = true;
Expand All @@ -2322,13 +2334,48 @@ int efx_reset(struct efx_nic *efx, enum reset_type method)
return rc;
}

/* Try recovery mechanisms.
* For now only EEH is supported.
* Returns 0 if the recovery mechanisms are unsuccessful.
* Returns a non-zero value otherwise.
*/
static int efx_try_recovery(struct efx_nic *efx)
{
#ifdef CONFIG_EEH
/* A PCI error can occur and not be seen by EEH because nothing
* happens on the PCI bus. In this case the driver may fail and
* schedule a 'recover or reset', leading to this recovery handler.
* Manually call the eeh failure check function.
*/
struct eeh_dev *eehdev =
of_node_to_eeh_dev(pci_device_to_OF_node(efx->pci_dev));

if (eeh_dev_check_failure(eehdev)) {
/* The EEH mechanisms will handle the error and reset the
* device if necessary.
*/
return 1;
}
#endif
return 0;
}

/* The worker thread exists so that code that cannot sleep can
* schedule a reset for later.
*/
static void efx_reset_work(struct work_struct *data)
{
struct efx_nic *efx = container_of(data, struct efx_nic, reset_work);
unsigned long pending = ACCESS_ONCE(efx->reset_pending);
unsigned long pending;
enum reset_type method;

pending = ACCESS_ONCE(efx->reset_pending);
method = fls(pending) - 1;

if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
method == RESET_TYPE_RECOVER_OR_ALL) &&
efx_try_recovery(efx))
return;

if (!pending)
return;
Expand All @@ -2340,7 +2387,7 @@ static void efx_reset_work(struct work_struct *data)
* it cannot change again.
*/
if (efx->state == STATE_READY)
(void)efx_reset(efx, fls(pending) - 1);
(void)efx_reset(efx, method);

rtnl_unlock();
}
Expand All @@ -2349,11 +2396,20 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
{
enum reset_type method;

if (efx->state == STATE_RECOVERY) {
netif_dbg(efx, drv, efx->net_dev,
"recovering: skip scheduling %s reset\n",
RESET_TYPE(type));
return;
}

switch (type) {
case RESET_TYPE_INVISIBLE:
case RESET_TYPE_ALL:
case RESET_TYPE_RECOVER_OR_ALL:
case RESET_TYPE_WORLD:
case RESET_TYPE_DISABLE:
case RESET_TYPE_RECOVER_OR_DISABLE:
method = type;
netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
RESET_TYPE(method));
Expand Down Expand Up @@ -2563,6 +2619,8 @@ static void efx_pci_remove(struct pci_dev *pci_dev)
efx_fini_struct(efx);
pci_set_drvdata(pci_dev, NULL);
free_netdev(efx->net_dev);

pci_disable_pcie_error_reporting(pci_dev);
};

/* NIC VPD information
Expand Down Expand Up @@ -2735,6 +2793,11 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
netif_warn(efx, probe, efx->net_dev,
"failed to create MTDs (%d)\n", rc);

rc = pci_enable_pcie_error_reporting(pci_dev);
if (rc && rc != -EINVAL)
netif_warn(efx, probe, efx->net_dev,
"pci_enable_pcie_error_reporting failed (%d)\n", rc);

return 0;

fail4:
Expand Down Expand Up @@ -2859,12 +2922,112 @@ static const struct dev_pm_ops efx_pm_ops = {
.restore = efx_pm_resume,
};

/* A PCI error affecting this device was detected.
* At this point MMIO and DMA may be disabled.
* Stop the software path and request a slot reset.
*/
pci_ers_result_t efx_io_error_detected(struct pci_dev *pdev,
enum pci_channel_state state)
{
pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
struct efx_nic *efx = pci_get_drvdata(pdev);

if (state == pci_channel_io_perm_failure)
return PCI_ERS_RESULT_DISCONNECT;

rtnl_lock();

if (efx->state != STATE_DISABLED) {
efx->state = STATE_RECOVERY;
efx->reset_pending = 0;

efx_device_detach_sync(efx);

efx_stop_all(efx);
efx_stop_interrupts(efx, false);

status = PCI_ERS_RESULT_NEED_RESET;
} else {
/* If the interface is disabled we don't want to do anything
* with it.
*/
status = PCI_ERS_RESULT_RECOVERED;
}

rtnl_unlock();

pci_disable_device(pdev);

return status;
}

/* Fake a successfull reset, which will be performed later in efx_io_resume. */
pci_ers_result_t efx_io_slot_reset(struct pci_dev *pdev)
{
struct efx_nic *efx = pci_get_drvdata(pdev);
pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
int rc;

if (pci_enable_device(pdev)) {
netif_err(efx, hw, efx->net_dev,
"Cannot re-enable PCI device after reset.\n");
status = PCI_ERS_RESULT_DISCONNECT;
}

rc = pci_cleanup_aer_uncorrect_error_status(pdev);
if (rc) {
netif_err(efx, hw, efx->net_dev,
"pci_cleanup_aer_uncorrect_error_status failed (%d)\n", rc);
/* Non-fatal error. Continue. */
}

return status;
}

/* Perform the actual reset and resume I/O operations. */
static void efx_io_resume(struct pci_dev *pdev)
{
struct efx_nic *efx = pci_get_drvdata(pdev);
int rc;

rtnl_lock();

if (efx->state == STATE_DISABLED)
goto out;

rc = efx_reset(efx, RESET_TYPE_ALL);
if (rc) {
netif_err(efx, hw, efx->net_dev,
"efx_reset failed after PCI error (%d)\n", rc);
} else {
efx->state = STATE_READY;
netif_dbg(efx, hw, efx->net_dev,
"Done resetting and resuming IO after PCI error.\n");
}

out:
rtnl_unlock();
}

/* For simplicity and reliability, we always require a slot reset and try to
* reset the hardware when a pci error affecting the device is detected.
* We leave both the link_reset and mmio_enabled callback unimplemented:
* with our request for slot reset the mmio_enabled callback will never be
* called, and the link_reset callback is not used by AER or EEH mechanisms.
*/
static struct pci_error_handlers efx_err_handlers = {
.error_detected = efx_io_error_detected,
.slot_reset = efx_io_slot_reset,
.resume = efx_io_resume,
};

static struct pci_driver efx_pci_driver = {
.name = KBUILD_MODNAME,
.id_table = efx_pci_table,
.probe = efx_pci_probe,
.remove = efx_pci_remove,
.driver.pm = &efx_pm_ops,
.err_handler = &efx_err_handlers,
};

/**************************************************************************
Expand Down
12 changes: 9 additions & 3 deletions drivers/net/ethernet/sfc/enum.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,12 @@ enum efx_loopback_mode {
* Reset methods are numbered in order of increasing scope.
*
* @RESET_TYPE_INVISIBLE: Reset datapath and MAC (Falcon only)
* @RESET_TYPE_RECOVER_OR_ALL: Try to recover. Apply RESET_TYPE_ALL
* if unsuccessful.
* @RESET_TYPE_ALL: Reset datapath, MAC and PHY
* @RESET_TYPE_WORLD: Reset as much as possible
* @RESET_TYPE_RECOVER_OR_DISABLE: Try to recover. Apply RESET_TYPE_DISABLE if
* unsuccessful.
* @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled
* @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog
* @RESET_TYPE_INT_ERROR: reset due to internal error
Expand All @@ -150,9 +154,11 @@ enum efx_loopback_mode {
*/
enum reset_type {
RESET_TYPE_INVISIBLE = 0,
RESET_TYPE_ALL = 1,
RESET_TYPE_WORLD = 2,
RESET_TYPE_DISABLE = 3,
RESET_TYPE_RECOVER_OR_ALL = 1,
RESET_TYPE_ALL = 2,
RESET_TYPE_WORLD = 3,
RESET_TYPE_RECOVER_OR_DISABLE = 4,
RESET_TYPE_DISABLE = 5,
RESET_TYPE_MAX_METHOD,
RESET_TYPE_TX_WATCHDOG,
RESET_TYPE_INT_ERROR,
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/sfc/net_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ enum nic_state {
STATE_UNINIT = 0, /* device being probed/removed or is frozen */
STATE_READY = 1, /* hardware ready and netdev registered */
STATE_DISABLED = 2, /* device disabled due to hardware errors */
STATE_RECOVERY = 3, /* device recovering from PCI error */
};

/*
Expand Down
Loading

0 comments on commit 626950d

Please sign in to comment.