Skip to content

Commit

Permalink
net: fec: Add software TSO support
Browse files Browse the repository at this point in the history
Add software TSO support for FEC.
This feature allows to improve outbound throughput performance.

Tested on imx6dl sabresd board, running iperf tcp tests shows:
- 16.2% improvement comparing with FEC SG patch
- 82% improvement comparing with NO SG & TSO patch

$ ethtool -K eth0 tso on
$ iperf -c 10.192.242.167 -t 3 &
[  3] local 10.192.242.108 port 35388 connected with 10.192.242.167 port 5001
[ ID] Interval       Transfer     Bandwidth
[  3]  0.0- 3.0 sec   181 MBytes   506 Mbits/sec

During the testing, CPU loading is 30%.
Since imx6dl FEC Bandwidth is limited to SOC system bus bandwidth, the
performance with SW TSO is a milestone.

CC: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Laight <David.Laight@ACULAB.COM>
CC: Li Frank <B20596@freescale.com>
Signed-off-by: Fugang Duan <B38611@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Nimrod Andy authored and David S. Miller committed Jun 12, 2014
1 parent 6e90928 commit 79f3391
Show file tree
Hide file tree
Showing 2 changed files with 238 additions and 23 deletions.
6 changes: 6 additions & 0 deletions drivers/net/ethernet/freescale/fec.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,12 @@ struct fec_enet_private {
unsigned short bufdesc_size;
unsigned short tx_ring_size;
unsigned short rx_ring_size;
unsigned short tx_stop_threshold;
unsigned short tx_wake_threshold;

/* Software TSO */
char *tso_hdrs;
dma_addr_t tso_hdrs_dma;

struct platform_device *pdev;

Expand Down
255 changes: 232 additions & 23 deletions drivers/net/ethernet/freescale/fec_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <net/ip.h>
#include <net/tso.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
Expand Down Expand Up @@ -228,6 +229,15 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
#define FEC_PAUSE_FLAG_AUTONEG 0x1
#define FEC_PAUSE_FLAG_ENABLE 0x2

#define TSO_HEADER_SIZE 128
/* Max number of allowed TCP segments for software TSO */
#define FEC_MAX_TSO_SEGS 100
#define FEC_MAX_SKB_DESCS (FEC_MAX_TSO_SEGS * 2 + MAX_SKB_FRAGS)

#define IS_TSO_HEADER(txq, addr) \
((addr >= txq->tso_hdrs_dma) && \
(addr < txq->tso_hdrs_dma + txq->tx_ring_size * TSO_HEADER_SIZE))

static int mii_cnt;

static inline
Expand Down Expand Up @@ -438,8 +448,17 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
unsigned short buflen;
unsigned int estatus = 0;
unsigned int index;
int entries_free;
int ret;

entries_free = fec_enet_get_free_txdesc_num(fep);
if (entries_free < MAX_SKB_FRAGS + 1) {
dev_kfree_skb_any(skb);
if (net_ratelimit())
netdev_err(ndev, "NOT enough BD for SG!\n");
return NETDEV_TX_OK;
}

/* Protocol checksum off-load for TCP and UDP. */
if (fec_enet_clear_csum(skb, ndev)) {
dev_kfree_skb_any(skb);
Expand Down Expand Up @@ -534,35 +553,210 @@ static int fec_enet_txq_submit_skb(struct sk_buff *skb, struct net_device *ndev)
return 0;
}

static netdev_tx_t
fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
static int
fec_enet_txq_put_data_tso(struct sk_buff *skb, struct net_device *ndev,
struct bufdesc *bdp, int index, char *data,
int size, bool last_tcp, bool is_last)
{
struct fec_enet_private *fep = netdev_priv(ndev);
struct bufdesc *bdp;
unsigned short status;
int entries_free;
int ret;

/* Fill in a Tx ring entry */
bdp = fep->cur_tx;
const struct platform_device_id *id_entry =
platform_get_device_id(fep->pdev);
struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
unsigned short status;
unsigned int estatus = 0;

status = bdp->cbd_sc;
status &= ~BD_ENET_TX_STATS;

if (status & BD_ENET_TX_READY) {
/* Ooops. All transmit buffers are full. Bail out.
* This should not happen, since ndev->tbusy should be set.
*/
status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);
bdp->cbd_datlen = size;

if (((unsigned long) data) & FEC_ALIGNMENT ||
id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
memcpy(fep->tx_bounce[index], data, size);
data = fep->tx_bounce[index];

if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
swap_buffer(data, size);
}

bdp->cbd_bufaddr = dma_map_single(&fep->pdev->dev, data,
size, DMA_TO_DEVICE);
if (dma_mapping_error(&fep->pdev->dev, bdp->cbd_bufaddr)) {
dev_kfree_skb_any(skb);
if (net_ratelimit())
netdev_err(ndev, "tx queue full!\n");
netdev_err(ndev, "Tx DMA memory map failed\n");
return NETDEV_TX_BUSY;
}

ret = fec_enet_txq_submit_skb(skb, ndev);
if (fep->bufdesc_ex) {
if (skb->ip_summed == CHECKSUM_PARTIAL)
estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
ebdp->cbd_bdu = 0;
ebdp->cbd_esc = estatus;
}

/* Handle the last BD specially */
if (last_tcp)
status |= (BD_ENET_TX_LAST | BD_ENET_TX_TC);
if (is_last) {
status |= BD_ENET_TX_INTR;
if (fep->bufdesc_ex)
ebdp->cbd_esc |= BD_ENET_TX_INT;
}

bdp->cbd_sc = status;

return 0;
}

static int
fec_enet_txq_put_hdr_tso(struct sk_buff *skb, struct net_device *ndev,
struct bufdesc *bdp, int index)
{
struct fec_enet_private *fep = netdev_priv(ndev);
const struct platform_device_id *id_entry =
platform_get_device_id(fep->pdev);
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
struct bufdesc_ex *ebdp = (struct bufdesc_ex *)bdp;
void *bufaddr;
unsigned long dmabuf;
unsigned short status;
unsigned int estatus = 0;

status = bdp->cbd_sc;
status &= ~BD_ENET_TX_STATS;
status |= (BD_ENET_TX_TC | BD_ENET_TX_READY);

bufaddr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
dmabuf = fep->tso_hdrs_dma + index * TSO_HEADER_SIZE;
if (((unsigned long) bufaddr) & FEC_ALIGNMENT ||
id_entry->driver_data & FEC_QUIRK_SWAP_FRAME) {
memcpy(fep->tx_bounce[index], skb->data, hdr_len);
bufaddr = fep->tx_bounce[index];

if (id_entry->driver_data & FEC_QUIRK_SWAP_FRAME)
swap_buffer(bufaddr, hdr_len);

dmabuf = dma_map_single(&fep->pdev->dev, bufaddr,
hdr_len, DMA_TO_DEVICE);
if (dma_mapping_error(&fep->pdev->dev, dmabuf)) {
dev_kfree_skb_any(skb);
if (net_ratelimit())
netdev_err(ndev, "Tx DMA memory map failed\n");
return NETDEV_TX_BUSY;
}
}

bdp->cbd_bufaddr = dmabuf;
bdp->cbd_datlen = hdr_len;

if (fep->bufdesc_ex) {
if (skb->ip_summed == CHECKSUM_PARTIAL)
estatus |= BD_ENET_TX_PINS | BD_ENET_TX_IINS;
ebdp->cbd_bdu = 0;
ebdp->cbd_esc = estatus;
}

bdp->cbd_sc = status;

return 0;
}

static int fec_enet_txq_submit_tso(struct sk_buff *skb, struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
int total_len, data_left;
struct bufdesc *bdp = fep->cur_tx;
struct tso_t tso;
unsigned int index = 0;
int ret;

if (tso_count_descs(skb) >= fec_enet_get_free_txdesc_num(fep)) {
dev_kfree_skb_any(skb);
if (net_ratelimit())
netdev_err(ndev, "NOT enough BD for TSO!\n");
return NETDEV_TX_OK;
}

/* Protocol checksum off-load for TCP and UDP. */
if (fec_enet_clear_csum(skb, ndev)) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}

/* Initialize the TSO handler, and prepare the first payload */
tso_start(skb, &tso);

total_len = skb->len - hdr_len;
while (total_len > 0) {
char *hdr;

index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len);
total_len -= data_left;

/* prepare packet headers: MAC + IP + TCP */
hdr = fep->tso_hdrs + index * TSO_HEADER_SIZE;
tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0);
ret = fec_enet_txq_put_hdr_tso(skb, ndev, bdp, index);
if (ret)
goto err_release;

while (data_left > 0) {
int size;

size = min_t(int, tso.size, data_left);
bdp = fec_enet_get_nextdesc(bdp, fep);
index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);
ret = fec_enet_txq_put_data_tso(skb, ndev, bdp, index, tso.data,
size, size == data_left,
total_len == 0);
if (ret)
goto err_release;

data_left -= size;
tso_build_data(skb, &tso, size);
}

bdp = fec_enet_get_nextdesc(bdp, fep);
}

/* Save skb pointer */
fep->tx_skbuff[index] = skb;

fec_enet_submit_work(bdp, fep);

skb_tx_timestamp(skb);
fep->cur_tx = bdp;

/* Trigger transmission start */
writel(0, fep->hwp + FEC_X_DES_ACTIVE);

return 0;

err_release:
/* TODO: Release all used data descriptors for TSO */
return ret;
}

static netdev_tx_t
fec_enet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
{
struct fec_enet_private *fep = netdev_priv(ndev);
int entries_free;
int ret;

if (skb_is_gso(skb))
ret = fec_enet_txq_submit_tso(skb, ndev);
else
ret = fec_enet_txq_submit_skb(skb, ndev);
if (ret)
return ret;

entries_free = fec_enet_get_free_txdesc_num(fep);
if (entries_free < MAX_SKB_FRAGS + 1)
if (entries_free <= fep->tx_stop_threshold)
netif_stop_queue(ndev);

return NETDEV_TX_OK;
Expand Down Expand Up @@ -883,7 +1077,7 @@ fec_enet_tx(struct net_device *ndev)
unsigned short status;
struct sk_buff *skb;
int index = 0;
int entries;
int entries_free;

fep = netdev_priv(ndev);
bdp = fep->dirty_tx;
Expand All @@ -900,8 +1094,9 @@ fec_enet_tx(struct net_device *ndev)
index = fec_enet_get_bd_index(fep->tx_bd_base, bdp, fep);

skb = fep->tx_skbuff[index];
dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr, bdp->cbd_datlen,
DMA_TO_DEVICE);
if (!IS_TSO_HEADER(fep, bdp->cbd_bufaddr))
dma_unmap_single(&fep->pdev->dev, bdp->cbd_bufaddr,
bdp->cbd_datlen, DMA_TO_DEVICE);
bdp->cbd_bufaddr = 0;
if (!skb) {
bdp = fec_enet_get_nextdesc(bdp, fep);
Expand Down Expand Up @@ -962,9 +1157,11 @@ fec_enet_tx(struct net_device *ndev)

/* Since we have freed up a buffer, the ring is no longer full
*/
entries = fec_enet_get_free_txdesc_num(fep);
if (entries >= MAX_SKB_FRAGS + 1 && netif_queue_stopped(ndev))
netif_wake_queue(ndev);
if (netif_queue_stopped(ndev)) {
entries_free = fec_enet_get_free_txdesc_num(fep);
if (entries_free >= fep->tx_wake_threshold)
netif_wake_queue(ndev);
}
}
return;
}
Expand Down Expand Up @@ -2166,6 +2363,9 @@ static int fec_enet_init(struct net_device *ndev)
fep->tx_ring_size = TX_RING_SIZE;
fep->rx_ring_size = RX_RING_SIZE;

fep->tx_stop_threshold = FEC_MAX_SKB_DESCS;
fep->tx_wake_threshold = (fep->tx_ring_size - fep->tx_stop_threshold) / 2;

if (fep->bufdesc_ex)
fep->bufdesc_size = sizeof(struct bufdesc_ex);
else
Expand All @@ -2179,6 +2379,13 @@ static int fec_enet_init(struct net_device *ndev)
if (!cbd_base)
return -ENOMEM;

fep->tso_hdrs = dma_alloc_coherent(NULL, fep->tx_ring_size * TSO_HEADER_SIZE,
&fep->tso_hdrs_dma, GFP_KERNEL);
if (!fep->tso_hdrs) {
dma_free_coherent(NULL, bd_size, cbd_base, fep->bd_dma);
return -ENOMEM;
}

memset(cbd_base, 0, PAGE_SIZE);

fep->netdev = ndev;
Expand Down Expand Up @@ -2209,9 +2416,11 @@ static int fec_enet_init(struct net_device *ndev)
ndev->features |= NETIF_F_HW_VLAN_CTAG_RX;

if (id_entry->driver_data & FEC_QUIRK_HAS_CSUM) {
ndev->gso_max_segs = FEC_MAX_TSO_SEGS;

/* enable hw accelerator */
ndev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
| NETIF_F_RXCSUM | NETIF_F_SG);
| NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_TSO);
fep->csum_flags |= FLAG_RX_CSUM_ENABLED;
}

Expand Down

0 comments on commit 79f3391

Please sign in to comment.