Skip to content

Commit

Permalink
IB/mlx4: Fix memory ordering problem when posting LSO sends
Browse files Browse the repository at this point in the history
The current work request posting code writes the LSO segment before
writing any data segments.  This leaves a window where the LSO segment
overwrites the stamping in one cacheline that the HCA prefetches
before the rest of the cacheline is filled with the correct data
segments.  When the HCA processes this work request, a local
protection error may result.

Fix this by saving the LSO header size field off and writing it only
after all data segments are written.  This fix is a cleaned-up version
of a patch from Jack Morgenstein <jackm@dev.mellanox.co.il>.

This fixes <https://bugs.openfabrics.org/show_bug.cgi?id=1383>.

Reported-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Roland Dreier committed Jan 16, 2009
1 parent d3b924d commit 0fd7e1d
Showing 1 changed file with 19 additions and 9 deletions.
28 changes: 19 additions & 9 deletions drivers/infiniband/hw/mlx4/qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1462,7 +1462,8 @@ static void __set_data_seg(struct mlx4_wqe_data_seg *dseg, struct ib_sge *sg)
}

static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
struct mlx4_ib_qp *qp, unsigned *lso_seg_len)
struct mlx4_ib_qp *qp, unsigned *lso_seg_len,
__be32 *lso_hdr_sz)
{
unsigned halign = ALIGN(sizeof *wqe + wr->wr.ud.hlen, 16);

Expand All @@ -1479,12 +1480,8 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,

memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);

/* make sure LSO header is written before overwriting stamping */
wmb();

wqe->mss_hdr_size = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
wr->wr.ud.hlen);

*lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
wr->wr.ud.hlen);
*lso_seg_len = halign;
return 0;
}
Expand Down Expand Up @@ -1518,13 +1515,18 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
int uninitialized_var(stamp);
int uninitialized_var(size);
unsigned uninitialized_var(seglen);
__be32 dummy;
__be32 *lso_wqe;
__be32 uninitialized_var(lso_hdr_sz);
int i;

spin_lock_irqsave(&qp->sq.lock, flags);

ind = qp->sq_next_wqe;

for (nreq = 0; wr; ++nreq, wr = wr->next) {
lso_wqe = &dummy;

if (mlx4_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
err = -ENOMEM;
*bad_wr = wr;
Expand Down Expand Up @@ -1606,11 +1608,12 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
size += sizeof (struct mlx4_wqe_datagram_seg) / 16;

if (wr->opcode == IB_WR_LSO) {
err = build_lso_seg(wqe, wr, qp, &seglen);
err = build_lso_seg(wqe, wr, qp, &seglen, &lso_hdr_sz);
if (unlikely(err)) {
*bad_wr = wr;
goto out;
}
lso_wqe = (__be32 *) wqe;
wqe += seglen;
size += seglen / 16;
}
Expand Down Expand Up @@ -1652,6 +1655,14 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
for (i = wr->num_sge - 1; i >= 0; --i, --dseg)
set_data_seg(dseg, wr->sg_list + i);

/*
* Possibly overwrite stamping in cacheline with LSO
* segment only after making sure all data segments
* are written.
*/
wmb();
*lso_wqe = lso_hdr_sz;

ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ?
MLX4_WQE_CTRL_FENCE : 0) | size;

Expand Down Expand Up @@ -1686,7 +1697,6 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
stamp_send_wqe(qp, stamp, size * 16);
ind = pad_wraparound(qp, ind);
}

}

out:
Expand Down

0 comments on commit 0fd7e1d

Please sign in to comment.