Skip to content

Commit

Permalink
RDMA/cxgb4: Reset wait condition atomically
Browse files Browse the repository at this point in the history
The driver was never really waiting for RDMA_WR/FINI completions
because the condition variable used to determine if the completion
happened was never reset, and this condition variable is reused for
both connection setup and teardown.  This causes various driver
crashes under heavy loads due to releasing resources too early.

The fix is to use atomic bits to correctly reset the condition
immediately after the completion is detected.

Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
  • Loading branch information
Steve Wise authored and Roland Dreier committed May 10, 2011
1 parent 85d215b commit d9594d9
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 30 deletions.
30 changes: 7 additions & 23 deletions drivers/infiniband/hw/cxgb4/cm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1198,9 +1198,7 @@ static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
}
PDBG("%s ep %p status %d error %d\n", __func__, ep,
rpl->status, status2errno(rpl->status));
ep->com.wr_wait.ret = status2errno(rpl->status);
ep->com.wr_wait.done = 1;
wake_up(&ep->com.wr_wait.wait);
c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));

return 0;
}
Expand Down Expand Up @@ -1234,9 +1232,7 @@ static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
struct c4iw_listen_ep *ep = lookup_stid(t, stid);

PDBG("%s ep %p\n", __func__, ep);
ep->com.wr_wait.ret = status2errno(rpl->status);
ep->com.wr_wait.done = 1;
wake_up(&ep->com.wr_wait.wait);
c4iw_wake_up(&ep->com.wr_wait, status2errno(rpl->status));
return 0;
}

Expand Down Expand Up @@ -1492,17 +1488,13 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
* in rdma connection migration (see c4iw_accept_cr()).
*/
__state_set(&ep->com, CLOSING);
ep->com.wr_wait.done = 1;
ep->com.wr_wait.ret = -ECONNRESET;
PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
wake_up(&ep->com.wr_wait.wait);
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
break;
case MPA_REP_SENT:
__state_set(&ep->com, CLOSING);
ep->com.wr_wait.done = 1;
ep->com.wr_wait.ret = -ECONNRESET;
PDBG("waking up ep %p tid %u\n", ep, ep->hwtid);
wake_up(&ep->com.wr_wait.wait);
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
break;
case FPDU_MODE:
start_ep_timer(ep);
Expand Down Expand Up @@ -1579,9 +1571,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
/*
* Wake up any threads in rdma_init() or rdma_fini().
*/
ep->com.wr_wait.done = 1;
ep->com.wr_wait.ret = -ECONNRESET;
wake_up(&ep->com.wr_wait.wait);
c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);

mutex_lock(&ep->com.mutex);
switch (ep->com.state) {
Expand Down Expand Up @@ -2294,14 +2284,8 @@ static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
PDBG("%s wr_waitp %p ret %u\n", __func__, wr_waitp, ret);
if (wr_waitp) {
if (ret)
wr_waitp->ret = -ret;
else
wr_waitp->ret = 0;
wr_waitp->done = 1;
wake_up(&wr_waitp->wait);
}
if (wr_waitp)
c4iw_wake_up(wr_waitp, ret ? -ret : 0);
kfree_skb(skb);
break;
case 2:
Expand Down
26 changes: 19 additions & 7 deletions drivers/infiniband/hw/cxgb4/iw_cxgb4.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,42 +131,54 @@ static inline int c4iw_num_stags(struct c4iw_rdev *rdev)

#define C4IW_WR_TO (10*HZ)

enum {
REPLY_READY = 0,
};

struct c4iw_wr_wait {
wait_queue_head_t wait;
int done;
unsigned long status;
int ret;
};

static inline void c4iw_init_wr_wait(struct c4iw_wr_wait *wr_waitp)
{
wr_waitp->ret = 0;
wr_waitp->done = 0;
wr_waitp->status = 0;
init_waitqueue_head(&wr_waitp->wait);
}

static inline void c4iw_wake_up(struct c4iw_wr_wait *wr_waitp, int ret)
{
wr_waitp->ret = ret;
set_bit(REPLY_READY, &wr_waitp->status);
wake_up(&wr_waitp->wait);
}

static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
struct c4iw_wr_wait *wr_waitp,
u32 hwtid, u32 qpid,
const char *func)
{
unsigned to = C4IW_WR_TO;
do {
int ret;

wait_event_timeout(wr_waitp->wait, wr_waitp->done, to);
if (!wr_waitp->done) {
do {
ret = wait_event_timeout(wr_waitp->wait,
test_and_clear_bit(REPLY_READY, &wr_waitp->status), to);
if (!ret) {
printk(KERN_ERR MOD "%s - Device %s not responding - "
"tid %u qpid %u\n", func,
pci_name(rdev->lldi.pdev), hwtid, qpid);
to = to << 2;
}
} while (!wr_waitp->done);
} while (!ret);
if (wr_waitp->ret)
PDBG("%s: FW reply %d tid %u qpid %u\n",
pci_name(rdev->lldi.pdev), wr_waitp->ret, hwtid, qpid);
return wr_waitp->ret;
}


struct c4iw_dev {
struct ib_device ibdev;
struct c4iw_rdev rdev;
Expand Down

0 comments on commit d9594d9

Please sign in to comment.