Skip to content

Commit

Permalink
sparc64: Commonize large portions of PSYCHO error handling.
Browse files Browse the repository at this point in the history
The IOMMU and streaming cache error interrogation is moved here
as well as the PCI error interrupt handler.

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Sep 11, 2008
1 parent 1c03a55 commit e6e0037
Show file tree
Hide file tree
Showing 4 changed files with 369 additions and 579 deletions.
361 changes: 0 additions & 361 deletions arch/sparc64/kernel/pci_psycho.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,6 @@ static void *psycho_pci_config_mkaddr(struct pci_pbm_info *pbm,
}

/* PSYCHO error handling support. */
enum psycho_error_type {
UE_ERR, CE_ERR, PCI_ERR
};

/* Helper function of IOMMU error checking, which checks out
* the state of the streaming buffers. The IOMMU lock is
Expand All @@ -125,112 +122,10 @@ enum psycho_error_type {
#define PSYCHO_STC_DATA_B 0xc000UL
#define PSYCHO_STC_ERR_A 0xb400UL
#define PSYCHO_STC_ERR_B 0xc400UL
#define PSYCHO_STCERR_WRITE 0x0000000000000002UL /* Write Error */
#define PSYCHO_STCERR_READ 0x0000000000000001UL /* Read Error */
#define PSYCHO_STC_TAG_A 0xb800UL
#define PSYCHO_STC_TAG_B 0xc800UL
#define PSYCHO_STCTAG_PPN 0x0fffffff00000000UL /* Physical Page Number */
#define PSYCHO_STCTAG_VPN 0x00000000ffffe000UL /* Virtual Page Number */
#define PSYCHO_STCTAG_VALID 0x0000000000000002UL /* Valid */
#define PSYCHO_STCTAG_WRITE 0x0000000000000001UL /* Writable */
#define PSYCHO_STC_LINE_A 0xb900UL
#define PSYCHO_STC_LINE_B 0xc900UL
#define PSYCHO_STCLINE_LINDX 0x0000000001e00000UL /* LRU Index */
#define PSYCHO_STCLINE_SPTR 0x00000000001f8000UL /* Dirty Data Start Pointer */
#define PSYCHO_STCLINE_LADDR 0x0000000000007f00UL /* Line Address */
#define PSYCHO_STCLINE_EPTR 0x00000000000000fcUL /* Dirty Data End Pointer */
#define PSYCHO_STCLINE_VALID 0x0000000000000002UL /* Valid */
#define PSYCHO_STCLINE_FOFN 0x0000000000000001UL /* Fetch Outstanding / Flush Necessary */

static DEFINE_SPINLOCK(stc_buf_lock);
static unsigned long stc_error_buf[128];
static unsigned long stc_tag_buf[16];
static unsigned long stc_line_buf[16];

static void psycho_check_stc_error(struct pci_pbm_info *pbm)
{
struct strbuf *strbuf = &pbm->stc;
unsigned long err_base, tag_base, line_base;
u64 control;
int i;

err_base = strbuf->strbuf_err_stat;
tag_base = strbuf->strbuf_tag_diag;
line_base = strbuf->strbuf_line_diag;

spin_lock(&stc_buf_lock);

/* This is __REALLY__ dangerous. When we put the
* streaming buffer into diagnostic mode to probe
* it's tags and error status, we _must_ clear all
* of the line tag valid bits before re-enabling
* the streaming buffer. If any dirty data lives
* in the STC when we do this, we will end up
* invalidating it before it has a chance to reach
* main memory.
*/
control = psycho_read(strbuf->strbuf_control);
psycho_write(strbuf->strbuf_control,
(control | PSYCHO_STRBUF_CTRL_DENAB));
for (i = 0; i < 128; i++) {
unsigned long val;

val = psycho_read(err_base + (i * 8UL));
psycho_write(err_base + (i * 8UL), 0UL);
stc_error_buf[i] = val;
}
for (i = 0; i < 16; i++) {
stc_tag_buf[i] = psycho_read(tag_base + (i * 8UL));
stc_line_buf[i] = psycho_read(line_base + (i * 8UL));
psycho_write(tag_base + (i * 8UL), 0UL);
psycho_write(line_base + (i * 8UL), 0UL);
}

/* OK, state is logged, exit diagnostic mode. */
psycho_write(strbuf->strbuf_control, control);

for (i = 0; i < 16; i++) {
int j, saw_error, first, last;

saw_error = 0;
first = i * 8;
last = first + 8;
for (j = first; j < last; j++) {
unsigned long errval = stc_error_buf[j];
if (errval != 0) {
saw_error++;
printk("%s: STC_ERR(%d)[wr(%d)rd(%d)]\n",
pbm->name,
j,
(errval & PSYCHO_STCERR_WRITE) ? 1 : 0,
(errval & PSYCHO_STCERR_READ) ? 1 : 0);
}
}
if (saw_error != 0) {
unsigned long tagval = stc_tag_buf[i];
unsigned long lineval = stc_line_buf[i];
printk("%s: STC_TAG(%d)[PA(%016lx)VA(%08lx)V(%d)W(%d)]\n",
pbm->name,
i,
((tagval & PSYCHO_STCTAG_PPN) >> 19UL),
(tagval & PSYCHO_STCTAG_VPN),
((tagval & PSYCHO_STCTAG_VALID) ? 1 : 0),
((tagval & PSYCHO_STCTAG_WRITE) ? 1 : 0));
printk("%s: STC_LINE(%d)[LIDX(%lx)SP(%lx)LADDR(%lx)EP(%lx)"
"V(%d)FOFN(%d)]\n",
pbm->name,
i,
((lineval & PSYCHO_STCLINE_LINDX) >> 21UL),
((lineval & PSYCHO_STCLINE_SPTR) >> 15UL),
((lineval & PSYCHO_STCLINE_LADDR) >> 8UL),
((lineval & PSYCHO_STCLINE_EPTR) >> 2UL),
((lineval & PSYCHO_STCLINE_VALID) ? 1 : 0),
((lineval & PSYCHO_STCLINE_FOFN) ? 1 : 0));
}
}

spin_unlock(&stc_buf_lock);
}

/* When an Uncorrectable Error or a PCI Error happens, we
* interrogate the IOMMU state to see if it is the cause.
Expand All @@ -257,122 +152,7 @@ static void psycho_check_stc_error(struct pci_pbm_info *pbm)
#define PSYCHO_IOMMU_TSBBASE 0x0208UL
#define PSYCHO_IOMMU_FLUSH 0x0210UL
#define PSYCHO_IOMMU_TAG 0xa580UL
#define PSYCHO_IOMMU_TAG_ERRSTS (0x3UL << 23UL)
#define PSYCHO_IOMMU_TAG_ERR (0x1UL << 22UL)
#define PSYCHO_IOMMU_TAG_WRITE (0x1UL << 21UL)
#define PSYCHO_IOMMU_TAG_STREAM (0x1UL << 20UL)
#define PSYCHO_IOMMU_TAG_SIZE (0x1UL << 19UL)
#define PSYCHO_IOMMU_TAG_VPAGE 0x7ffffUL
#define PSYCHO_IOMMU_DATA 0xa600UL
#define PSYCHO_IOMMU_DATA_VALID (1UL << 30UL)
#define PSYCHO_IOMMU_DATA_CACHE (1UL << 28UL)
#define PSYCHO_IOMMU_DATA_PPAGE 0xfffffffUL
static void psycho_check_iommu_error(struct pci_pbm_info *pbm,
unsigned long afsr,
unsigned long afar,
enum psycho_error_type type)
{
struct iommu *iommu = pbm->iommu;
unsigned long iommu_tag[16];
unsigned long iommu_data[16];
unsigned long flags;
u64 control;
int i;

spin_lock_irqsave(&iommu->lock, flags);
control = psycho_read(iommu->iommu_control);
if (control & PSYCHO_IOMMU_CTRL_XLTEERR) {
char *type_string;

/* Clear the error encountered bit. */
control &= ~PSYCHO_IOMMU_CTRL_XLTEERR;
psycho_write(iommu->iommu_control, control);

switch((control & PSYCHO_IOMMU_CTRL_XLTESTAT) >> 25UL) {
case 0:
type_string = "Protection Error";
break;
case 1:
type_string = "Invalid Error";
break;
case 2:
type_string = "TimeOut Error";
break;
case 3:
default:
type_string = "ECC Error";
break;
};
printk("%s: IOMMU Error, type[%s]\n",
pbm->name, type_string);

/* Put the IOMMU into diagnostic mode and probe
* it's TLB for entries with error status.
*
* It is very possible for another DVMA to occur
* while we do this probe, and corrupt the system
* further. But we are so screwed at this point
* that we are likely to crash hard anyways, so
* get as much diagnostic information to the
* console as we can.
*/
psycho_write(iommu->iommu_control,
control | PSYCHO_IOMMU_CTRL_DENAB);
for (i = 0; i < 16; i++) {
unsigned long base = pbm->controller_regs;

iommu_tag[i] =
psycho_read(base + PSYCHO_IOMMU_TAG + (i * 8UL));
iommu_data[i] =
psycho_read(base + PSYCHO_IOMMU_DATA + (i * 8UL));

/* Now clear out the entry. */
psycho_write(base + PSYCHO_IOMMU_TAG + (i * 8UL), 0);
psycho_write(base + PSYCHO_IOMMU_DATA + (i * 8UL), 0);
}

/* Leave diagnostic mode. */
psycho_write(iommu->iommu_control, control);

for (i = 0; i < 16; i++) {
unsigned long tag, data;

tag = iommu_tag[i];
if (!(tag & PSYCHO_IOMMU_TAG_ERR))
continue;

data = iommu_data[i];
switch((tag & PSYCHO_IOMMU_TAG_ERRSTS) >> 23UL) {
case 0:
type_string = "Protection Error";
break;
case 1:
type_string = "Invalid Error";
break;
case 2:
type_string = "TimeOut Error";
break;
case 3:
default:
type_string = "ECC Error";
break;
};
printk("%s: IOMMU TAG(%d)[error(%s) wr(%d) str(%d) sz(%dK) vpg(%08lx)]\n",
pbm->name, i, type_string,
((tag & PSYCHO_IOMMU_TAG_WRITE) ? 1 : 0),
((tag & PSYCHO_IOMMU_TAG_STREAM) ? 1 : 0),
((tag & PSYCHO_IOMMU_TAG_SIZE) ? 64 : 8),
(tag & PSYCHO_IOMMU_TAG_VPAGE) << IOMMU_PAGE_SHIFT);
printk("%s: IOMMU DATA(%d)[valid(%d) cache(%d) ppg(%016lx)]\n",
pbm->name, i,
((data & PSYCHO_IOMMU_DATA_VALID) ? 1 : 0),
((data & PSYCHO_IOMMU_DATA_CACHE) ? 1 : 0),
(data & PSYCHO_IOMMU_DATA_PPAGE) << IOMMU_PAGE_SHIFT);
}
}
psycho_check_stc_error(pbm);
spin_unlock_irqrestore(&iommu->lock, flags);
}

/* Uncorrectable Errors. Cause of the error and the address are
* recorded in the UE_AFSR and UE_AFAR of PSYCHO. They are errors
Expand Down Expand Up @@ -540,150 +320,9 @@ static irqreturn_t psycho_ce_intr(int irq, void *dev_id)
*/
#define PSYCHO_PCI_AFSR_A 0x2010UL
#define PSYCHO_PCI_AFSR_B 0x4010UL
#define PSYCHO_PCIAFSR_PMA 0x8000000000000000UL /* Primary Master Abort Error */
#define PSYCHO_PCIAFSR_PTA 0x4000000000000000UL /* Primary Target Abort Error */
#define PSYCHO_PCIAFSR_PRTRY 0x2000000000000000UL /* Primary Excessive Retries */
#define PSYCHO_PCIAFSR_PPERR 0x1000000000000000UL /* Primary Parity Error */
#define PSYCHO_PCIAFSR_SMA 0x0800000000000000UL /* Secondary Master Abort Error */
#define PSYCHO_PCIAFSR_STA 0x0400000000000000UL /* Secondary Target Abort Error */
#define PSYCHO_PCIAFSR_SRTRY 0x0200000000000000UL /* Secondary Excessive Retries */
#define PSYCHO_PCIAFSR_SPERR 0x0100000000000000UL /* Secondary Parity Error */
#define PSYCHO_PCIAFSR_RESV1 0x00ff000000000000UL /* Reserved */
#define PSYCHO_PCIAFSR_BMSK 0x0000ffff00000000UL /* Bytemask of failed transfer */
#define PSYCHO_PCIAFSR_BLK 0x0000000080000000UL /* Trans was block operation */
#define PSYCHO_PCIAFSR_RESV2 0x0000000040000000UL /* Reserved */
#define PSYCHO_PCIAFSR_MID 0x000000003e000000UL /* MID causing the error */
#define PSYCHO_PCIAFSR_RESV3 0x0000000001ffffffUL /* Reserved */
#define PSYCHO_PCI_AFAR_A 0x2018UL
#define PSYCHO_PCI_AFAR_B 0x4018UL

static irqreturn_t psycho_pcierr_intr_other(struct pci_pbm_info *pbm)
{
unsigned long csr, csr_error_bits;
irqreturn_t ret = IRQ_NONE;
u16 stat;

csr = psycho_read(pbm->pci_csr);
csr_error_bits =
csr & (PSYCHO_PCICTRL_SBH_ERR | PSYCHO_PCICTRL_SERR);
if (csr_error_bits) {
/* Clear the errors. */
psycho_write(pbm->pci_csr, csr);

/* Log 'em. */
if (csr_error_bits & PSYCHO_PCICTRL_SBH_ERR)
printk("%s: PCI streaming byte hole error asserted.\n",
pbm->name);
if (csr_error_bits & PSYCHO_PCICTRL_SERR)
printk("%s: PCI SERR signal asserted.\n", pbm->name);
ret = IRQ_HANDLED;
}
pci_read_config_word(pbm->pci_bus->self, PCI_STATUS, &stat);
if (stat & (PCI_STATUS_PARITY |
PCI_STATUS_SIG_TARGET_ABORT |
PCI_STATUS_REC_TARGET_ABORT |
PCI_STATUS_REC_MASTER_ABORT |
PCI_STATUS_SIG_SYSTEM_ERROR)) {
printk("%s: PCI bus error, PCI_STATUS[%04x]\n",
pbm->name, stat);
pci_write_config_word(pbm->pci_bus->self, PCI_STATUS, 0xffff);
ret = IRQ_HANDLED;
}
return ret;
}

static irqreturn_t psycho_pcierr_intr(int irq, void *dev_id)
{
struct pci_pbm_info *pbm = dev_id;
unsigned long afsr_reg, afar_reg;
unsigned long afsr, afar, error_bits;
int reported;

afsr_reg = pbm->pci_afsr;
afar_reg = pbm->pci_afar;

/* Latch error status. */
afar = psycho_read(afar_reg);
afsr = psycho_read(afsr_reg);

/* Clear primary/secondary error status bits. */
error_bits = afsr &
(PSYCHO_PCIAFSR_PMA | PSYCHO_PCIAFSR_PTA |
PSYCHO_PCIAFSR_PRTRY | PSYCHO_PCIAFSR_PPERR |
PSYCHO_PCIAFSR_SMA | PSYCHO_PCIAFSR_STA |
PSYCHO_PCIAFSR_SRTRY | PSYCHO_PCIAFSR_SPERR);
if (!error_bits)
return psycho_pcierr_intr_other(pbm);
psycho_write(afsr_reg, error_bits);

/* Log the error. */
printk("%s: PCI Error, primary error type[%s]\n",
pbm->name,
(((error_bits & PSYCHO_PCIAFSR_PMA) ?
"Master Abort" :
((error_bits & PSYCHO_PCIAFSR_PTA) ?
"Target Abort" :
((error_bits & PSYCHO_PCIAFSR_PRTRY) ?
"Excessive Retries" :
((error_bits & PSYCHO_PCIAFSR_PPERR) ?
"Parity Error" : "???"))))));
printk("%s: bytemask[%04lx] UPA_MID[%02lx] was_block(%d)\n",
pbm->name,
(afsr & PSYCHO_PCIAFSR_BMSK) >> 32UL,
(afsr & PSYCHO_PCIAFSR_MID) >> 25UL,
(afsr & PSYCHO_PCIAFSR_BLK) ? 1 : 0);
printk("%s: PCI AFAR [%016lx]\n", pbm->name, afar);
printk("%s: PCI Secondary errors [", pbm->name);
reported = 0;
if (afsr & PSYCHO_PCIAFSR_SMA) {
reported++;
printk("(Master Abort)");
}
if (afsr & PSYCHO_PCIAFSR_STA) {
reported++;
printk("(Target Abort)");
}
if (afsr & PSYCHO_PCIAFSR_SRTRY) {
reported++;
printk("(Excessive Retries)");
}
if (afsr & PSYCHO_PCIAFSR_SPERR) {
reported++;
printk("(Parity Error)");
}
if (!reported)
printk("(none)");
printk("]\n");

/* For the error types shown, scan PBM's PCI bus for devices
* which have logged that error type.
*/

/* If we see a Target Abort, this could be the result of an
* IOMMU translation error of some sort. It is extremely
* useful to log this information as usually it indicates
* a bug in the IOMMU support code or a PCI device driver.
*/
if (error_bits & (PSYCHO_PCIAFSR_PTA | PSYCHO_PCIAFSR_STA)) {
psycho_check_iommu_error(pbm, afsr, afar, PCI_ERR);
pci_scan_for_target_abort(pbm, pbm->pci_bus);
}
if (error_bits & (PSYCHO_PCIAFSR_PMA | PSYCHO_PCIAFSR_SMA))
pci_scan_for_master_abort(pbm, pbm->pci_bus);

/* For excessive retries, PSYCHO/PBM will abort the device
* and there is no way to specifically check for excessive
* retries in the config space status registers. So what
* we hope is that we'll catch it via the master/target
* abort events.
*/

if (error_bits & (PSYCHO_PCIAFSR_PPERR | PSYCHO_PCIAFSR_SPERR))
pci_scan_for_parity_error(pbm, pbm->pci_bus);

return IRQ_HANDLED;
}

/* XXX What about PowerFail/PowerManagement??? -DaveM */
#define PSYCHO_ECC_CTRL 0x0020
#define PSYCHO_ECCCTRL_EE 0x8000000000000000UL /* Enable ECC Checking */
Expand Down
Loading

0 comments on commit e6e0037

Please sign in to comment.