Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 17727
b: refs/heads/master
c: b6495c0
h: refs/heads/master
i:
  17725: 7100419
  17723: f71073a
  17719: 74f0731
  17711: dc39038
  17695: db0ec5f
  17663: 5ac8c4b
v: v3
  • Loading branch information
Linas Vepstas authored and Paul Mackerras committed Jan 10, 2006
1 parent dca1509 commit f6a629e
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 41 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 21e464dd7c943c984dcccd9aff8c9f6a5ea920d7
refs/heads/master: b6495c0c8f100b882d85774f44529519befefba9
24 changes: 21 additions & 3 deletions trunk/arch/powerpc/platforms/pseries/eeh.c
Original file line number Diff line number Diff line change
Expand Up @@ -450,11 +450,16 @@ eeh_slot_availability(struct pci_dn *pdn)
if (rc) return rc;

if (rets[1] == 0) return -1; /* EEH is not supported */
if (rets[0] == 0) return 0; /* Oll Korrect */
if (rets[0] == 0) return 0; /* Oll Korrect */
if (rets[0] == 5) {
if (rets[2] == 0) return -1; /* permanently unavailable */
return rets[2]; /* number of millisecs to wait */
}
if (rets[0] == 1)
return 250;

printk (KERN_ERR "EEH: Slot unavailable: rc=%d, rets=%d %d %d\n",
rc, rets[0], rets[1], rets[2]);
return -1;
}

Expand Down Expand Up @@ -501,9 +506,11 @@ rtas_pci_slot_reset(struct pci_dn *pdn, int state)

/** rtas_set_slot_reset -- assert the pci #RST line for 1/4 second
* dn -- device node to be reset.
*
* Return 0 if success, else a non-zero value.
*/

void
int
rtas_set_slot_reset(struct pci_dn *pdn)
{
int i, rc;
Expand Down Expand Up @@ -533,10 +540,21 @@ rtas_set_slot_reset(struct pci_dn *pdn)
* ready to be used; if not, wait for recovery. */
for (i=0; i<10; i++) {
rc = eeh_slot_availability (pdn);
if (rc <= 0) break;
if (rc < 0)
printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n", rc, pdn->node->full_name);
if (rc == 0)
return 0;
if (rc < 0)
return -1;

msleep (rc+100);
}

rc = eeh_slot_availability (pdn);
if (rc)
printk (KERN_ERR "EEH: timeout resetting slot %s\n", pdn->node->full_name);

return rc;
}

/* ------------------------------------------------------- */
Expand Down
81 changes: 45 additions & 36 deletions trunk/arch/powerpc/platforms/pseries/eeh_driver.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,14 +200,18 @@ static void eeh_report_failure(struct pci_dev *dev, void *userdata)
* bus resets can be performed.
*/

static void eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
static int eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
{
int rc;
if (bus)
pcibios_remove_pci_devices(bus);

/* Reset the pci controller. (Asserts RST#; resets config space).
* Reconfigure bridges and devices */
rtas_set_slot_reset(pe_dn);
* Reconfigure bridges and devices. Don't try to bring the system
* up if the reset failed for some reason. */
rc = rtas_set_slot_reset(pe_dn);
if (rc)
return rc;

/* Walk over all functions on this device */
rtas_configure_bridge(pe_dn);
Expand All @@ -223,6 +227,8 @@ static void eeh_reset_device (struct pci_dn *pe_dn, struct pci_bus *bus)
ssleep (5);
pcibios_add_pci_devices(bus);
}

return 0;
}

/* The longest amount of time to wait for a pci device
Expand All @@ -235,7 +241,7 @@ void handle_eeh_events (struct eeh_event *event)
struct device_node *frozen_dn;
struct pci_dn *frozen_pdn;
struct pci_bus *frozen_bus;
int perm_failure = 0;
int rc = 0;

frozen_dn = find_device_pe(event->dn);
frozen_bus = pcibios_find_pci_bus(frozen_dn);
Expand Down Expand Up @@ -272,42 +278,15 @@ void handle_eeh_events (struct eeh_event *event)
frozen_pdn->eeh_freeze_count++;

if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
perm_failure = 1;
goto hard_fail;

/* If the reset state is a '5' and the time to reset is 0 (infinity)
* or is more then 15 seconds, then mark this as a permanent failure.
*/
if ((event->state == pci_channel_io_perm_failure) &&
((event->time_unavail <= 0) ||
(event->time_unavail > MAX_WAIT_FOR_RECOVERY*1000)))
{
perm_failure = 1;
}

/* Log the error with the rtas logger. */
if (perm_failure) {
/*
* About 90% of all real-life EEH failures in the field
* are due to poorly seated PCI cards. Only 10% or so are
* due to actual, failed cards.
*/
printk(KERN_ERR
"EEH: PCI device %s - %s has failed %d times \n"
"and has been permanently disabled. Please try reseating\n"
"this device or replacing it.\n",
pci_name (frozen_pdn->pcidev),
pcid_name(frozen_pdn->pcidev),
frozen_pdn->eeh_freeze_count);

eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */);

/* Notify all devices that they're about to go down. */
pci_walk_bus(frozen_bus, eeh_report_failure, 0);

/* Shut down the device drivers for good. */
pcibios_remove_pci_devices(frozen_bus);
return;
}
goto hard_fail;

eeh_slot_error_detail(frozen_pdn, 1 /* Temporary Error */);
printk(KERN_WARNING
Expand All @@ -330,24 +309,54 @@ void handle_eeh_events (struct eeh_event *event)
* go down willingly, without panicing the system.
*/
if (result == PCIERR_RESULT_NONE) {
eeh_reset_device(frozen_pdn, frozen_bus);
rc = eeh_reset_device(frozen_pdn, frozen_bus);
if (rc)
goto hard_fail;
}

/* If any device called out for a reset, then reset the slot */
if (result == PCIERR_RESULT_NEED_RESET) {
eeh_reset_device(frozen_pdn, NULL);
rc = eeh_reset_device(frozen_pdn, NULL);
if (rc)
goto hard_fail;
pci_walk_bus(frozen_bus, eeh_report_reset, 0);
}

/* If all devices reported they can proceed, the re-enable PIO */
if (result == PCIERR_RESULT_CAN_RECOVER) {
/* XXX Not supported; we brute-force reset the device */
eeh_reset_device(frozen_pdn, NULL);
rc = eeh_reset_device(frozen_pdn, NULL);
if (rc)
goto hard_fail;
pci_walk_bus(frozen_bus, eeh_report_reset, 0);
}

/* Tell all device drivers that they can resume operations */
pci_walk_bus(frozen_bus, eeh_report_resume, 0);

return;

hard_fail:
/*
* About 90% of all real-life EEH failures in the field
* are due to poorly seated PCI cards. Only 10% or so are
* due to actual, failed cards.
*/
printk(KERN_ERR
"EEH: PCI device %s - %s has failed %d times \n"
"and has been permanently disabled. Please try reseating\n"
"this device or replacing it.\n",
pci_name (frozen_pdn->pcidev),
pcid_name(frozen_pdn->pcidev),
frozen_pdn->eeh_freeze_count);

eeh_slot_error_detail(frozen_pdn, 2 /* Permanent Error */);

/* Notify all devices that they're about to go down. */
pci_walk_bus(frozen_bus, eeh_report_failure, 0);

/* Shut down the device drivers for good. */
pcibios_remove_pci_devices(frozen_bus);
}

/* ---------- end of file ---------- */
4 changes: 3 additions & 1 deletion trunk/include/asm-powerpc/ppc-pci.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,10 @@ void eeh_slot_error_detail (struct pci_dn *pdn, int severity);
* does this by asserting the PCI #RST line for 1/8th of
* a second; this routine will sleep while the adapter is
* being reset.
*
* Returns a non-zero value if the reset failed.
*/
void rtas_set_slot_reset (struct pci_dn *);
int rtas_set_slot_reset (struct pci_dn *);

/**
* eeh_restore_bars - Restore device configuration info.
Expand Down

0 comments on commit f6a629e

Please sign in to comment.