Skip to content

Commit

Permalink
IB/ipath: Head of Line blocking vs forward progress of user apps
Browse files Browse the repository at this point in the history
There's a conflict between our need to quiesce PSM-based applications
to avoid HoL blocking when the IB link goes down and the apps' desire
to remain running so that their quiescence timout mechanism can keep
running.

The compromise is to STOP the processes for a fixed period of time and
then alternate between CONT and STOP until the link is again active.

If there are poor interactions with subnet manager configuration at a
given site, the interval can be adjusted via a module paramter.

Signed-off-by: John Gregor <john.gregor@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
John Gregor authored and Roland Dreier committed Apr 17, 2008
1 parent 6be979d commit 58411d1
Show file tree
Hide file tree
Showing 7 changed files with 310 additions and 182 deletions.
1 change: 1 addition & 0 deletions drivers/infiniband/hw/ipath/ipath_debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
#define __IPATH_IPATHERR 0x40000 /* Ethernet (IPATH) errors */
#define __IPATH_IPATHPD 0x80000 /* Ethernet (IPATH) packet dump */
#define __IPATH_IPATHTABLE 0x100000 /* Ethernet (IPATH) table dump */
#define __IPATH_LINKVERBDBG 0x200000 /* very verbose linkchange debug */

#else /* _IPATH_DEBUGGING */

Expand Down
18 changes: 14 additions & 4 deletions drivers/infiniband/hw/ipath/ipath_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
struct ipath_devdata *dd;
ssize_t ret = 0;
u64 val;
u32 l_state, lt_state; /* LinkState, LinkTrainingState */

if (count != sizeof(dp)) {
ret = -EINVAL;
Expand Down Expand Up @@ -396,10 +397,17 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
ret = -ENODEV;
goto bail;
}
/* Check link state, but not if we have custom PBC */
val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
if (!dp.pbc_wd && val != IPATH_IBSTATE_INIT &&
val != IPATH_IBSTATE_ARM && val != IPATH_IBSTATE_ACTIVE) {
/*
* Want to skip check for l_state if using custom PBC,
* because we might be trying to force an SM packet out.
* first-cut, skip _all_ state checking in that case.
*/
val = ipath_ib_state(dd, dd->ipath_lastibcstat);
lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
(val != dd->ib_init && val != dd->ib_arm &&
val != dd->ib_active))) {
ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
dd->ipath_unit, (unsigned long long) val);
ret = -EINVAL;
Expand Down Expand Up @@ -438,6 +446,8 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
ret = -EBUSY;
goto bail;
}
/* disarm it just to be extra sure */
ipath_disarm_piobufs(dd, pbufn, 1);

plen >>= 2; /* in dwords */

Expand Down
129 changes: 121 additions & 8 deletions drivers/infiniband/hw/ipath/ipath_driver.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");

static unsigned ipath_hol_timeout_ms = 13000;
module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
MODULE_PARM_DESC(hol_timeout_ms,
"duration of user app suspension after link failure");

MODULE_LICENSE("GPL");
MODULE_AUTHOR("QLogic <support@pathscale.com>");
MODULE_DESCRIPTION("QLogic InfiniPath driver");
Expand Down Expand Up @@ -1670,11 +1675,8 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
"is %s\n", dd->ipath_unit,
what[linkcmd],
ipath_ibcstatus_str[
(ipath_read_kreg64
(dd, dd->ipath_kregs->kr_ibcstatus) >>
INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
/* flush all queued sends when going to DOWN to be sure that
* they don't block MAD packets */
if (linkcmd == INFINIPATH_IBCC_LINKCMD_DOWN)
Expand Down Expand Up @@ -1925,9 +1927,8 @@ static void ipath_run_led_override(unsigned long opaque)
*/
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
INFINIPATH_IBCS_LINKSTATE_MASK;
dd->ibcs_lts_mask;
lstate = (val >> dd->ibcs_ls_shift) & INFINIPATH_IBCS_LINKSTATE_MASK;

dd->ipath_f_setextled(dd, lstate, ltstate);
mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
Expand Down Expand Up @@ -1988,6 +1989,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd)

ipath_dbg("Shutting down the device\n");

ipath_hol_up(dd); /* make sure user processes aren't suspended */

dd->ipath_flags |= IPATH_LINKUNK;
dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
IPATH_LINKINIT | IPATH_LINKARMED |
Expand Down Expand Up @@ -2037,6 +2040,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
*/
dd->ipath_f_quiet_serdes(dd);

/* stop all the timers that might still be running */
del_timer_sync(&dd->ipath_hol_timer);
if (dd->ipath_stats_timer_active) {
del_timer_sync(&dd->ipath_stats_timer);
dd->ipath_stats_timer_active = 0;
Expand Down Expand Up @@ -2252,6 +2257,114 @@ int ipath_reset_device(int unit)
return ret;
}

/*
* send a signal to all the processes that have the driver open
* through the normal interfaces (i.e., everything other than diags
* interface). Returns number of signalled processes.
*/
static int ipath_signal_procs(struct ipath_devdata *dd, int sig)
{
int i, sub, any = 0;
pid_t pid;

if (!dd->ipath_pd)
return 0;
for (i = 1; i < dd->ipath_cfgports; i++) {
if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt ||
!dd->ipath_pd[i]->port_pid)
continue;
pid = dd->ipath_pd[i]->port_pid;
dev_info(&dd->pcidev->dev, "context %d in use "
"(PID %u), sending signal %d\n",
i, pid, sig);
kill_proc(pid, sig, 1);
any++;
for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
pid = dd->ipath_pd[i]->port_subpid[sub];
if (!pid)
continue;
dev_info(&dd->pcidev->dev, "sub-context "
"%d:%d in use (PID %u), sending "
"signal %d\n", i, sub, pid, sig);
kill_proc(pid, sig, 1);
any++;
}
}
return any;
}

static void ipath_hol_signal_down(struct ipath_devdata *dd)
{
if (ipath_signal_procs(dd, SIGSTOP))
ipath_dbg("Stopped some processes\n");
ipath_cancel_sends(dd, 1);
}


static void ipath_hol_signal_up(struct ipath_devdata *dd)
{
if (ipath_signal_procs(dd, SIGCONT))
ipath_dbg("Continued some processes\n");
}

/*
* link is down, stop any users processes, and flush pending sends
* to prevent HoL blocking, then start the HoL timer that
* periodically continues, then stop procs, so they can detect
* link down if they want, and do something about it.
* Timer may already be running, so use __mod_timer, not add_timer.
*/
void ipath_hol_down(struct ipath_devdata *dd)
{
dd->ipath_hol_state = IPATH_HOL_DOWN;
ipath_hol_signal_down(dd);
dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
dd->ipath_hol_timer.expires = jiffies +
msecs_to_jiffies(ipath_hol_timeout_ms);
__mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
}

/*
* link is up, continue any user processes, and ensure timer
* is a nop, if running. Let timer keep running, if set; it
* will nop when it sees the link is up
*/
void ipath_hol_up(struct ipath_devdata *dd)
{
ipath_hol_signal_up(dd);
dd->ipath_hol_state = IPATH_HOL_UP;
}

/*
* toggle the running/not running state of user proceses
* to prevent HoL blocking on chip resources, but still allow
* user processes to do link down special case handling.
* Should only be called via the timer
*/
void ipath_hol_event(unsigned long opaque)
{
struct ipath_devdata *dd = (struct ipath_devdata *)opaque;

if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
&& dd->ipath_hol_state != IPATH_HOL_UP) {
dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
ipath_dbg("Stopping processes\n");
ipath_hol_signal_down(dd);
} else { /* may do "extra" if also in ipath_hol_up() */
dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
ipath_dbg("Continuing processes\n");
ipath_hol_signal_up(dd);
}
if (dd->ipath_hol_state == IPATH_HOL_UP)
ipath_dbg("link's up, don't resched timer\n");
else {
dd->ipath_hol_timer.expires = jiffies +
msecs_to_jiffies(ipath_hol_timeout_ms);
__mod_timer(&dd->ipath_hol_timer,
dd->ipath_hol_timer.expires);
}
}

int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
{
u64 val;
Expand Down
6 changes: 6 additions & 0 deletions drivers/infiniband/hw/ipath/ipath_init_chip.c
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,12 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
dd->ipath_stats_timer_active = 1;
}

/* Set up HoL state */
init_timer(&dd->ipath_hol_timer);
dd->ipath_hol_timer.function = ipath_hol_event;
dd->ipath_hol_timer.data = (unsigned long)dd;
dd->ipath_hol_state = IPATH_HOL_UP;

done:
if (!ret) {
*dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
Expand Down
Loading

0 comments on commit 58411d1

Please sign in to comment.