Skip to content

Commit

Permalink
IB/ipath: Performance optimization for CPU differences
Browse files Browse the repository at this point in the history
Different processors have different ordering restrictions for write
combining.  By taking advantage of this, we can eliminate some write
barriers when writing to the send buffers.

Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Ralph Campbell authored and Roland Dreier committed Oct 10, 2007
1 parent 327a338 commit 210d6ca
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 35 deletions.
22 changes: 12 additions & 10 deletions drivers/infiniband/hw/ipath/ipath_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -446,19 +446,21 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
dd->ipath_unit, plen - 1, pbufn);

if (dp.pbc_wd == 0)
/* Legacy operation, use computed pbc_wd */
dp.pbc_wd = plen;

/* we have to flush after the PBC for correctness on some cpus
* or WC buffer can be written out of order */
writeq(dp.pbc_wd, piobuf);
ipath_flush_wc();
/* copy all by the trigger word, then flush, so it's written
/*
* Copy all by the trigger word, then flush, so it's written
* to chip before trigger word, then write trigger word, then
* flush again, so packet is sent. */
__iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
ipath_flush_wc();
__raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
* flush again, so packet is sent.
*/
if (dd->ipath_flags & IPATH_PIO_FLUSH_WC) {
ipath_flush_wc();
__iowrite32_copy(piobuf + 2, tmpbuf, clen - 1);
ipath_flush_wc();
__raw_writel(tmpbuf[clen - 1], piobuf + clen + 1);
} else
__iowrite32_copy(piobuf + 2, tmpbuf, clen);

ipath_flush_wc();

ret = sizeof(dp);
Expand Down
2 changes: 2 additions & 0 deletions drivers/infiniband/hw/ipath/ipath_iba6120.c
Original file line number Diff line number Diff line change
Expand Up @@ -1273,6 +1273,8 @@ static void ipath_pe_tidtemplate(struct ipath_devdata *dd)
static int ipath_pe_early_init(struct ipath_devdata *dd)
{
dd->ipath_flags |= IPATH_4BYTE_TID;
if (ipath_unordered_wc())
dd->ipath_flags |= IPATH_PIO_FLUSH_WC;

/*
* For openfabrics, we need to be able to handle an IB header of
Expand Down
2 changes: 2 additions & 0 deletions drivers/infiniband/hw/ipath/ipath_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,8 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
#define IPATH_LINKACTIVE 0x200
/* link current state is unknown */
#define IPATH_LINKUNK 0x400
/* Write combining flush needed for PIO */
#define IPATH_PIO_FLUSH_WC 0x1000
/* no IB cable, or no device on IB cable */
#define IPATH_NOCABLE 0x4000
/* Supports port zero per packet receive interrupts via
Expand Down
62 changes: 37 additions & 25 deletions drivers/infiniband/hw/ipath/ipath_verbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
#endif

static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
u32 length)
u32 length, unsigned flush_wc)
{
u32 extra = 0;
u32 data = 0;
Expand Down Expand Up @@ -757,11 +757,14 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
}
/* Update address before sending packet. */
update_sge(ss, length);
/* must flush early everything before trigger word */
ipath_flush_wc();
__raw_writel(last, piobuf);
/* be sure trigger word is written */
ipath_flush_wc();
if (flush_wc) {
/* must flush early everything before trigger word */
ipath_flush_wc();
__raw_writel(last, piobuf);
/* be sure trigger word is written */
ipath_flush_wc();
} else
__raw_writel(last, piobuf);
}

/**
Expand All @@ -776,6 +779,7 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
u32 *hdr, u32 len, struct ipath_sge_state *ss)
{
u32 __iomem *piobuf;
unsigned flush_wc;
u32 plen;
int ret;

Expand All @@ -799,47 +803,55 @@ int ipath_verbs_send(struct ipath_devdata *dd, u32 hdrwords,
* or WC buffer can be written out of order.
*/
writeq(plen, piobuf);
ipath_flush_wc();
piobuf += 2;

flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
if (len == 0) {
/*
* If there is just the header portion, must flush before
* writing last word of header for correctness, and after
* the last header word (trigger word).
*/
__iowrite32_copy(piobuf, hdr, hdrwords - 1);
ipath_flush_wc();
__raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
ipath_flush_wc();
ret = 0;
goto bail;
if (flush_wc) {
ipath_flush_wc();
__iowrite32_copy(piobuf, hdr, hdrwords - 1);
ipath_flush_wc();
__raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
ipath_flush_wc();
} else
__iowrite32_copy(piobuf, hdr, hdrwords);
goto done;
}

if (flush_wc)
ipath_flush_wc();
__iowrite32_copy(piobuf, hdr, hdrwords);
piobuf += hdrwords;

/* The common case is aligned and contained in one segment. */
if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
!((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
u32 w;
u32 dwords;
u32 *addr = (u32 *) ss->sge.vaddr;

/* Update address before sending packet. */
update_sge(ss, len);
/* Need to round up for the last dword in the packet. */
w = (len + 3) >> 2;
__iowrite32_copy(piobuf, addr, w - 1);
/* must flush early everything before trigger word */
ipath_flush_wc();
__raw_writel(addr[w - 1], piobuf + w - 1);
/* be sure trigger word is written */
ipath_flush_wc();
ret = 0;
goto bail;
dwords = (len + 3) >> 2;
if (flush_wc) {
__iowrite32_copy(piobuf, addr, dwords - 1);
/* must flush early everything before trigger word */
ipath_flush_wc();
__raw_writel(addr[dwords - 1], piobuf + dwords - 1);
/* be sure trigger word is written */
ipath_flush_wc();
} else
__iowrite32_copy(piobuf, addr, dwords);
goto done;
}
copy_io(piobuf, ss, len);
copy_io(piobuf, ss, len, flush_wc);
done:
ret = 0;

bail:
return ret;
}
Expand Down

0 comments on commit 210d6ca

Please sign in to comment.