Skip to content

Commit

Permalink
IB/ipath: Workaround problem of errormask register being overwritten
Browse files Browse the repository at this point in the history
On some system hardware, we are seeing moderately common cases of the
chip errormask register being overwritten due to a chip bug in iba6120
that is triggered by a vendor-specific PCIe broadcast message.  This
patch merely checks periodically, and corrects it if needed (the
overwrite can cause us to not get error and hardware error
interrupts).  Also, make dd->ipath_errormask the one, true canonical
source for kr_errormask, and remove references to ipath_ignorederrs as
it is currently unused.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: John Gregor <john.gregor@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Dave Olson authored and Roland Dreier committed Jul 30, 2007
1 parent 3810f2a commit 78d1e02
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 29 deletions.
5 changes: 3 additions & 2 deletions drivers/infiniband/hw/ipath/ipath_init_chip.c
Original file line number Diff line number Diff line change
Expand Up @@ -851,13 +851,14 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrmask,
dd->ipath_hwerrmask);

dd->ipath_maskederrs = dd->ipath_ignorederrs;
/* clear all */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
/* enable errors that are masked, at least this first time. */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs);
/* clear any interrups up to this point (ints still not enabled) */
dd->ipath_errormask = ipath_read_kreg64(dd,
dd->ipath_kregs->kr_errormask);
/* clear any interrupts up to this point (ints still not enabled) */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);

/*
Expand Down
25 changes: 13 additions & 12 deletions drivers/infiniband/hw/ipath/ipath_intr.c
Original file line number Diff line number Diff line change
Expand Up @@ -517,10 +517,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)

supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);

/*
* don't report errors that are masked (includes those always
* ignored)
*/
/* don't report errors that are masked */
errs &= ~dd->ipath_maskederrs;

/* do these first, they are most important */
Expand Down Expand Up @@ -566,19 +563,19 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
* ones on this particular interrupt, which also isn't great
*/
dd->ipath_maskederrs |= dd->ipath_lasterror | errs;
dd->ipath_errormask &= ~dd->ipath_maskederrs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs);
dd->ipath_errormask);
s_iserr = ipath_decode_err(msg, sizeof msg,
(dd->ipath_maskederrs & ~dd->
ipath_ignorederrs));
dd->ipath_maskederrs);

if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
if (dd->ipath_maskederrs &
~(INFINIPATH_E_RRCVEGRFULL |
INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
ipath_dev_err(dd, "Temporarily disabling "
"error(s) %llx reporting; too frequent (%s)\n",
(unsigned long long) (dd->ipath_maskederrs &
~dd->ipath_ignorederrs), msg);
(unsigned long long)dd->ipath_maskederrs,
msg);
else {
/*
* rcvegrfull and rcvhdrqfull are "normal",
Expand Down Expand Up @@ -793,6 +790,9 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
/* disable error interrupts, to avoid confusion */
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);

/* also disable interrupts; errormask is sometimes overwriten */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);

/*
* clear all sends, because they have may been
* completed by usercode while in freeze mode, and
Expand All @@ -817,7 +817,7 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
for (i = 0; i < dd->ipath_pioavregs; i++) {
/* deal with 6110 chip bug */
im = i > 3 ? ((i&1) ? i-1 : i+1) : i;
val = ipath_read_kreg64(dd, 0x1000+(im*sizeof(u64)));
val = ipath_read_kreg64(dd, (0x1000/sizeof(u64))+im);
dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i]
= le64_to_cpu(val);
}
Expand All @@ -832,7 +832,8 @@ void ipath_clear_freeze(struct ipath_devdata *dd)
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
E_SPKT_ERRS_IGNORE);
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs);
dd->ipath_errormask);
ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, -1LL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
}

Expand Down
11 changes: 2 additions & 9 deletions drivers/infiniband/hw/ipath/ipath_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -261,18 +261,10 @@ struct ipath_devdata {
* limiting of hwerror reporting
*/
ipath_err_t ipath_lasthwerror;
/*
* errors masked because they occur too fast, also includes errors
* that are always ignored (ipath_ignorederrs)
*/
/* errors masked because they occur too fast */
ipath_err_t ipath_maskederrs;
/* time in jiffies at which to re-enable maskederrs */
unsigned long ipath_unmasktime;
/*
* errors always ignored (masked), at least for a given
* chip/device, because they are wrong or not useful
*/
ipath_err_t ipath_ignorederrs;
/* count of egrfull errors, combined for all ports */
u64 ipath_last_tidfull;
/* for ipath_qcheck() */
Expand Down Expand Up @@ -436,6 +428,7 @@ struct ipath_devdata {
u64 ipath_lastibcstat;
/* hwerrmask shadow */
ipath_err_t ipath_hwerrmask;
ipath_err_t ipath_errormask; /* errormask shadow */
/* interrupt config reg shadow */
u64 ipath_intconfig;
/* kr_sendpiobufbase value */
Expand Down
54 changes: 48 additions & 6 deletions drivers/infiniband/hw/ipath/ipath_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,45 @@ static void ipath_qcheck(struct ipath_devdata *dd)
}
}

static void ipath_chk_errormask(struct ipath_devdata *dd)
{
static u32 fixed;
u32 ctrl;
unsigned long errormask;
unsigned long hwerrs;

if (!dd->ipath_errormask || !(dd->ipath_flags & IPATH_INITTED))
return;

errormask = ipath_read_kreg64(dd, dd->ipath_kregs->kr_errormask);

if (errormask == dd->ipath_errormask)
return;
fixed++;

hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);

ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
dd->ipath_errormask);

if ((hwerrs & dd->ipath_hwerrmask) ||
(ctrl & INFINIPATH_C_FREEZEMODE)) {
/* force re-interrupt of pending events, just in case */
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, 0ULL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
dev_info(&dd->pcidev->dev,
"errormask fixed(%u) %lx -> %lx, ctrl %x hwerr %lx\n",
fixed, errormask, (unsigned long)dd->ipath_errormask,
ctrl, hwerrs);
} else
ipath_dbg("errormask fixed(%u) %lx -> %lx, no freeze\n",
fixed, errormask,
(unsigned long)dd->ipath_errormask);
}


/**
* ipath_get_faststats - get word counters from chip before they overflow
* @opaque - contains a pointer to the infinipath device ipath_devdata
Expand Down Expand Up @@ -251,14 +290,13 @@ void ipath_get_faststats(unsigned long opaque)
dd->ipath_lasterror = 0;
if (dd->ipath_lasthwerror)
dd->ipath_lasthwerror = 0;
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs)
if (dd->ipath_maskederrs
&& time_after(jiffies, dd->ipath_unmasktime)) {
char ebuf[256];
int iserr;
iserr = ipath_decode_err(ebuf, sizeof ebuf,
(dd->ipath_maskederrs & ~dd->
ipath_ignorederrs));
if ((dd->ipath_maskederrs & ~dd->ipath_ignorederrs) &
dd->ipath_maskederrs);
if (dd->ipath_maskederrs &
~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
INFINIPATH_E_PKTERRS ))
ipath_dev_err(dd, "Re-enabling masked errors "
Expand All @@ -278,9 +316,12 @@ void ipath_get_faststats(unsigned long opaque)
ipath_cdbg(ERRPKT, "Re-enabling packet"
" problem interrupt (%s)\n", ebuf);
}
dd->ipath_maskederrs = dd->ipath_ignorederrs;

/* re-enable masked errors */
dd->ipath_errormask |= dd->ipath_maskederrs;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
~dd->ipath_maskederrs);
dd->ipath_errormask);
dd->ipath_maskederrs = 0;
}

/* limit qfull messages to ~one per minute per port */
Expand All @@ -294,6 +335,7 @@ void ipath_get_faststats(unsigned long opaque)
}
}

ipath_chk_errormask(dd);
done:
mod_timer(&dd->ipath_stats_timer, jiffies + HZ * 5);
}

0 comments on commit 78d1e02

Please sign in to comment.