Skip to content

Commit

Permalink
IB/ipath: Log "active" time and some errors to EEPROM
Browse files Browse the repository at this point in the history
We currently track various errors, now we enhance that capability by
logging some of them to EEPROM.  We also now log a cumulative "active"
time defined by traffic though the InfiniPath HCA beyond the normal SM
traffic.

Signed-off-by: Michael Albaugh <michael.albaugh@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Michael Albaugh authored and Roland Dreier committed Jul 10, 2007
1 parent 8e9ab3f commit aecd3b5
Show file tree
Hide file tree
Showing 9 changed files with 370 additions and 8 deletions.
3 changes: 3 additions & 0 deletions drivers/infiniband/hw/ipath/ipath_driver.c
Original file line number Diff line number Diff line change
Expand Up @@ -2005,6 +2005,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);

ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
ipath_update_eeprom_log(dd);
}

/**
Expand Down
233 changes: 227 additions & 6 deletions drivers/infiniband/hw/ipath/ipath_eeprom.c
Original file line number Diff line number Diff line change
Expand Up @@ -367,8 +367,8 @@ static int eeprom_reset(struct ipath_devdata *dd)
* @len: number of bytes to receive
*/

int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
void *buffer, int len)
static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
u8 eeprom_offset, void *buffer, int len)
{
/* compiler complains unless initialized */
u8 single_byte = 0;
Expand Down Expand Up @@ -418,15 +418,16 @@ int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
return ret;
}


/**
* ipath_eeprom_write - writes data to the eeprom via I2C
* @dd: the infinipath device
* @eeprom_offset: where to place data
* @buffer: data to write
* @len: number of bytes to write
*/
int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
const void *buffer, int len)
int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
const void *buffer, int len)
{
u8 single_byte;
int sub_len;
Expand Down Expand Up @@ -500,6 +501,38 @@ int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
return ret;
}

/*
* The public entry-points ipath_eeprom_read() and ipath_eeprom_write()
* are now just wrappers around the internal functions.
*/
int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
void *buff, int len)
{
int ret;

ret = down_interruptible(&dd->ipath_eep_sem);
if (!ret) {
ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
up(&dd->ipath_eep_sem);
}

return ret;
}

int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
const void *buff, int len)
{
int ret;

ret = down_interruptible(&dd->ipath_eep_sem);
if (!ret) {
ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
up(&dd->ipath_eep_sem);
}

return ret;
}

static u8 flash_csum(struct ipath_flash *ifp, int adjust)
{
u8 *ip = (u8 *) ifp;
Expand Down Expand Up @@ -527,7 +560,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
void *buf;
struct ipath_flash *ifp;
__be64 guid;
int len;
int len, eep_stat;
u8 csum, *bguid;
int t = dd->ipath_unit;
struct ipath_devdata *dd0 = ipath_lookup(0);
Expand Down Expand Up @@ -571,7 +604,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
goto bail;
}

if (ipath_eeprom_read(dd, 0, buf, len)) {
down(&dd->ipath_eep_sem);
eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
up(&dd->ipath_eep_sem);

if (eep_stat) {
ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
goto done;
}
Expand Down Expand Up @@ -646,8 +683,192 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
(unsigned long long) be64_to_cpu(dd->ipath_guid));

memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
/*
* Power-on (actually "active") hours are kept as little-endian value
* in EEPROM, but as seconds in a (possibly as small as 24-bit)
* atomic_t while running.
*/
atomic_set(&dd->ipath_active_time, 0);
dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);

done:
vfree(buf);

bail:;
}

/**
* ipath_update_eeprom_log - copy active-time and error counters to eeprom
* @dd: the infinipath device
*
* Although the time is kept as seconds in the ipath_devdata struct, it is
* rounded to hours for re-write, as we have only 16 bits in EEPROM.
* First-cut code reads whole (expected) struct ipath_flash, modifies,
* re-writes. Future direction: read/write only what we need, assuming
* that the EEPROM had to have been "good enough" for driver init, and
* if not, we aren't making it worse.
*
*/

int ipath_update_eeprom_log(struct ipath_devdata *dd)
{
void *buf;
struct ipath_flash *ifp;
int len, hi_water;
uint32_t new_time, new_hrs;
u8 csum;
int ret, idx;
unsigned long flags;

/* first, check if we actually need to do anything. */
ret = 0;
for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
if (dd->ipath_eep_st_new_errs[idx]) {
ret = 1;
break;
}
}
new_time = atomic_read(&dd->ipath_active_time);

if (ret == 0 && new_time < 3600)
return 0;

/*
* The quick-check above determined that there is something worthy
* of logging, so get current contents and do a more detailed idea.
*/
len = offsetof(struct ipath_flash, if_future);
buf = vmalloc(len);
ret = 1;
if (!buf) {
ipath_dev_err(dd, "Couldn't allocate memory to read %u "
"bytes from eeprom for logging\n", len);
goto bail;
}

/* Grab semaphore and read current EEPROM. If we get an
* error, let go, but if not, keep it until we finish write.
*/
ret = down_interruptible(&dd->ipath_eep_sem);
if (ret) {
ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
goto free_bail;
}
ret = ipath_eeprom_internal_read(dd, 0, buf, len);
if (ret) {
up(&dd->ipath_eep_sem);
ipath_dev_err(dd, "Unable read EEPROM for logging\n");
goto free_bail;
}
ifp = (struct ipath_flash *)buf;

csum = flash_csum(ifp, 0);
if (csum != ifp->if_csum) {
up(&dd->ipath_eep_sem);
ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
csum, ifp->if_csum);
ret = 1;
goto free_bail;
}
hi_water = 0;
spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
int new_val = dd->ipath_eep_st_new_errs[idx];
if (new_val) {
/*
* If we have seen any errors, add to EEPROM values
* We need to saturate at 0xFF (255) and we also
* would need to adjust the checksum if we were
* trying to minimize EEPROM traffic
* Note that we add to actual current count in EEPROM,
* in case it was altered while we were running.
*/
new_val += ifp->if_errcntp[idx];
if (new_val > 0xFF)
new_val = 0xFF;
if (ifp->if_errcntp[idx] != new_val) {
ifp->if_errcntp[idx] = new_val;
hi_water = offsetof(struct ipath_flash,
if_errcntp) + idx;
}
/*
* update our shadow (used to minimize EEPROM
* traffic), to match what we are about to write.
*/
dd->ipath_eep_st_errs[idx] = new_val;
dd->ipath_eep_st_new_errs[idx] = 0;
}
}
/*
* now update active-time. We would like to round to the nearest hour
* but unless atomic_t are sure to be proper signed ints we cannot,
* because we need to account for what we "transfer" to EEPROM and
* if we log an hour at 31 minutes, then we would need to set
* active_time to -29 to accurately count the _next_ hour.
*/
if (new_time > 3600) {
new_hrs = new_time / 3600;
atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
new_hrs += dd->ipath_eep_hrs;
if (new_hrs > 0xFFFF)
new_hrs = 0xFFFF;
dd->ipath_eep_hrs = new_hrs;
if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
ifp->if_powerhour[0] = new_hrs & 0xFF;
hi_water = offsetof(struct ipath_flash, if_powerhour);
}
if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
ifp->if_powerhour[1] = new_hrs >> 8;
hi_water = offsetof(struct ipath_flash, if_powerhour)
+ 1;
}
}
/*
* There is a tiny possibility that we could somehow fail to write
* the EEPROM after updating our shadows, but problems from holding
* the spinlock too long are a much bigger issue.
*/
spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
if (hi_water) {
/* we made some change to the data, uopdate cksum and write */
csum = flash_csum(ifp, 1);
ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
}
up(&dd->ipath_eep_sem);
if (ret)
ipath_dev_err(dd, "Failed updating EEPROM\n");

free_bail:
vfree(buf);
bail:
return ret;

}

/**
* ipath_inc_eeprom_err - increment one of the four error counters
* that are logged to EEPROM.
* @dd: the infinipath device
* @eidx: 0..3, the counter to increment
* @incr: how much to add
*
* Each counter is 8-bits, and saturates at 255 (0xFF). They
* are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
* is called, but it can only be called in a context that allows sleep.
* This function can be called even at interrupt level.
*/

void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
{
uint new_val;
unsigned long flags;

spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
if (new_val > 255)
new_val = 255;
dd->ipath_eep_st_new_errs[eidx] = new_val;
spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
return;
}
22 changes: 22 additions & 0 deletions drivers/infiniband/hw/ipath/ipath_iba6110.c
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
u32 bits, ctrl;
int isfatal = 0;
char bitsmsg[64];
int log_idx;

hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);

Expand Down Expand Up @@ -468,6 +469,11 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,

hwerrs &= dd->ipath_hwerrmask;

/* We log some errors to EEPROM, check if we have any of those. */
for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
ipath_inc_eeprom_err(dd, log_idx, 1);

/*
* make sure we get this much out, unless told to be quiet,
* it's a parity error we may recover from,
Expand Down Expand Up @@ -1171,6 +1177,22 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd)

dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;

/*
* EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
* 2 is Some Misc, 3 is reserved for future.
*/
dd->ipath_eep_st_masks[0].hwerrs_to_log =
INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;

dd->ipath_eep_st_masks[1].hwerrs_to_log =
INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;

dd->ipath_eep_st_masks[2].errs_to_log =
INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;

}

/**
Expand Down
27 changes: 27 additions & 0 deletions drivers/infiniband/hw/ipath/ipath_iba6120.c
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
u32 bits, ctrl;
int isfatal = 0;
char bitsmsg[64];
int log_idx;

hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
if (!hwerrs) {
Expand Down Expand Up @@ -367,6 +368,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,

hwerrs &= dd->ipath_hwerrmask;

/* We log some errors to EEPROM, check if we have any of those. */
for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
ipath_inc_eeprom_err(dd, log_idx, 1);

/*
* make sure we get this much out, unless told to be quiet,
* or it's occurred within the last 5 seconds
Expand Down Expand Up @@ -950,6 +956,27 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd)

dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;

/*
* EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
* 2 is Some Misc, 3 is reserved for future.
*/
dd->ipath_eep_st_masks[0].hwerrs_to_log =
INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;

/* Ignore errors in PIO/PBC on systems with unordered write-combining */
if (ipath_unordered_wc())
dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY;

dd->ipath_eep_st_masks[1].hwerrs_to_log =
INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;

dd->ipath_eep_st_masks[2].errs_to_log =
INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;


}

/* setup the MSI stuff again after a reset. I'd like to just call
Expand Down
2 changes: 2 additions & 0 deletions drivers/infiniband/hw/ipath/ipath_init_chip.c
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,8 @@ static int init_chip_first(struct ipath_devdata *dd,
spin_lock_init(&dd->ipath_tid_lock);

spin_lock_init(&dd->ipath_gpio_lock);
spin_lock_init(&dd->ipath_eep_st_lock);
sema_init(&dd->ipath_eep_sem, 1);

done:
*pdp = pd;
Expand Down
Loading

0 comments on commit aecd3b5

Please sign in to comment.