Skip to content

Commit

Permalink
net: socionext: different approach on DMA
Browse files Browse the repository at this point in the history
Current driver dynamically allocates an skb and maps it as DMA Rx
buffer. In order to prepare for upcoming XDP changes, let's introduce a
different allocation scheme.
Buffers are allocated dynamically and mapped into hardware.
During the Rx operation the driver uses build_skb() to produce the
necessary buffers for the network stack.
This change increases performance ~15% on 64b packets with smmu disabled
and ~5% with smmu enabled

Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Ilias Apalodimas authored and David S. Miller committed Nov 9, 2018
1 parent 026b907 commit 4acb20b
Showing 1 changed file with 128 additions and 99 deletions.
227 changes: 128 additions & 99 deletions drivers/net/ethernet/socionext/netsec.c
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,9 @@

#define DESC_NUM 256

#define NETSEC_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN)
#define NETSEC_RX_BUF_SZ 1536

#define DESC_SZ sizeof(struct netsec_de)

#define NETSEC_F_NETSEC_VER_MAJOR_NUM(x) ((x) & 0xffff0000)
Expand Down Expand Up @@ -571,34 +574,10 @@ static const struct ethtool_ops netsec_ethtool_ops = {

/************* NETDEV_OPS FOLLOW *************/

static struct sk_buff *netsec_alloc_skb(struct netsec_priv *priv,
struct netsec_desc *desc)
{
struct sk_buff *skb;

if (device_get_dma_attr(priv->dev) == DEV_DMA_COHERENT) {
skb = netdev_alloc_skb_ip_align(priv->ndev, desc->len);
} else {
desc->len = L1_CACHE_ALIGN(desc->len);
skb = netdev_alloc_skb(priv->ndev, desc->len);
}
if (!skb)
return NULL;

desc->addr = skb->data;
desc->dma_addr = dma_map_single(priv->dev, desc->addr, desc->len,
DMA_FROM_DEVICE);
if (dma_mapping_error(priv->dev, desc->dma_addr)) {
dev_kfree_skb_any(skb);
return NULL;
}
return skb;
}

static void netsec_set_rx_de(struct netsec_priv *priv,
struct netsec_desc_ring *dring, u16 idx,
const struct netsec_desc *desc,
struct sk_buff *skb)
const struct netsec_desc *desc)
{
struct netsec_de *de = dring->vaddr + DESC_SZ * idx;
u32 attr = (1 << NETSEC_RX_PKT_OWN_FIELD) |
Expand All @@ -617,59 +596,6 @@ static void netsec_set_rx_de(struct netsec_priv *priv,
dring->desc[idx].dma_addr = desc->dma_addr;
dring->desc[idx].addr = desc->addr;
dring->desc[idx].len = desc->len;
dring->desc[idx].skb = skb;
}

static struct sk_buff *netsec_get_rx_de(struct netsec_priv *priv,
struct netsec_desc_ring *dring,
u16 idx,
struct netsec_rx_pkt_info *rxpi,
struct netsec_desc *desc, u16 *len)
{
struct netsec_de de = {};

memcpy(&de, dring->vaddr + DESC_SZ * idx, DESC_SZ);

*len = de.buf_len_info >> 16;

rxpi->err_flag = (de.attr >> NETSEC_RX_PKT_ER_FIELD) & 1;
rxpi->rx_cksum_result = (de.attr >> NETSEC_RX_PKT_CO_FIELD) & 3;
rxpi->err_code = (de.attr >> NETSEC_RX_PKT_ERR_FIELD) &
NETSEC_RX_PKT_ERR_MASK;
*desc = dring->desc[idx];
return desc->skb;
}

static struct sk_buff *netsec_get_rx_pkt_data(struct netsec_priv *priv,
struct netsec_rx_pkt_info *rxpi,
struct netsec_desc *desc,
u16 *len)
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
struct sk_buff *tmp_skb, *skb = NULL;
struct netsec_desc td;
int tail;

*rxpi = (struct netsec_rx_pkt_info){};

td.len = priv->ndev->mtu + 22;

tmp_skb = netsec_alloc_skb(priv, &td);

tail = dring->tail;

if (!tmp_skb) {
netsec_set_rx_de(priv, dring, tail, &dring->desc[tail],
dring->desc[tail].skb);
} else {
skb = netsec_get_rx_de(priv, dring, tail, rxpi, desc, len);
netsec_set_rx_de(priv, dring, tail, &td, tmp_skb);
}

/* move tail ahead */
dring->tail = (dring->tail + 1) % DESC_NUM;

return skb;
}

static int netsec_clean_tx_dring(struct netsec_priv *priv, int budget)
Expand Down Expand Up @@ -736,19 +662,65 @@ static int netsec_process_tx(struct netsec_priv *priv, int budget)
return done;
}

static void *netsec_alloc_rx_data(struct netsec_priv *priv,
dma_addr_t *dma_handle, u16 *desc_len)
{
size_t total_len = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
size_t payload_len = NETSEC_RX_BUF_SZ;
dma_addr_t mapping;
void *buf;

total_len += SKB_DATA_ALIGN(payload_len + NETSEC_SKB_PAD);

buf = napi_alloc_frag(total_len);
if (!buf)
return NULL;

mapping = dma_map_single(priv->dev, buf + NETSEC_SKB_PAD, payload_len,
DMA_FROM_DEVICE);
if (unlikely(dma_mapping_error(priv->dev, mapping)))
goto err_out;

*dma_handle = mapping;
*desc_len = payload_len;

return buf;

err_out:
skb_free_frag(buf);
return NULL;
}

static void netsec_rx_fill(struct netsec_priv *priv, u16 from, u16 num)
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
u16 idx = from;

while (num) {
netsec_set_rx_de(priv, dring, idx, &dring->desc[idx]);
idx++;
if (idx >= DESC_NUM)
idx = 0;
num--;
}
}

static int netsec_process_rx(struct netsec_priv *priv, int budget)
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
struct net_device *ndev = priv->ndev;
struct netsec_rx_pkt_info rx_info;
int done = 0;
struct netsec_desc desc;
struct sk_buff *skb;
u16 len;
int done = 0;

while (done < budget) {
u16 idx = dring->tail;
struct netsec_de *de = dring->vaddr + (DESC_SZ * idx);
struct netsec_desc *desc = &dring->desc[idx];
u16 pkt_len, desc_len;
dma_addr_t dma_handle;
void *buf_addr;
u32 truesize;

if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) {
/* reading the register clears the irq */
Expand All @@ -762,18 +734,59 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
*/
dma_rmb();
done++;
skb = netsec_get_rx_pkt_data(priv, &rx_info, &desc, &len);
if (unlikely(!skb) || rx_info.err_flag) {

pkt_len = de->buf_len_info >> 16;
rx_info.err_code = (de->attr >> NETSEC_RX_PKT_ERR_FIELD) &
NETSEC_RX_PKT_ERR_MASK;
rx_info.err_flag = (de->attr >> NETSEC_RX_PKT_ER_FIELD) & 1;
if (rx_info.err_flag) {
netif_err(priv, drv, priv->ndev,
"%s: rx fail err(%d)\n",
__func__, rx_info.err_code);
"%s: rx fail err(%d)\n", __func__,
rx_info.err_code);
ndev->stats.rx_dropped++;
dring->tail = (dring->tail + 1) % DESC_NUM;
/* reuse buffer page frag */
netsec_rx_fill(priv, idx, 1);
continue;
}
rx_info.rx_cksum_result =
(de->attr >> NETSEC_RX_PKT_CO_FIELD) & 3;

dma_unmap_single(priv->dev, desc.dma_addr, desc.len,
DMA_FROM_DEVICE);
skb_put(skb, len);
/* allocate a fresh buffer and map it to the hardware.
* This will eventually replace the old buffer in the hardware
*/
buf_addr = netsec_alloc_rx_data(priv, &dma_handle, &desc_len);
if (unlikely(!buf_addr))
break;

dma_sync_single_for_cpu(priv->dev, desc->dma_addr, pkt_len,
DMA_FROM_DEVICE);
prefetch(desc->addr);

truesize = SKB_DATA_ALIGN(desc->len + NETSEC_SKB_PAD) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
skb = build_skb(desc->addr, truesize);
if (unlikely(!skb)) {
/* free the newly allocated buffer, we are not going to
* use it
*/
dma_unmap_single(priv->dev, dma_handle, desc_len,
DMA_FROM_DEVICE);
skb_free_frag(buf_addr);
netif_err(priv, drv, priv->ndev,
"rx failed to build skb\n");
break;
}
dma_unmap_single_attrs(priv->dev, desc->dma_addr, desc->len,
DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);

/* Update the descriptor with the new buffer we allocated */
desc->len = desc_len;
desc->dma_addr = dma_handle;
desc->addr = buf_addr;

skb_reserve(skb, NETSEC_SKB_PAD);
skb_put(skb, pkt_len);
skb->protocol = eth_type_trans(skb, priv->ndev);

if (priv->rx_cksum_offload_flag &&
Expand All @@ -782,8 +795,11 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)

if (napi_gro_receive(&priv->napi, skb) != GRO_DROP) {
ndev->stats.rx_packets++;
ndev->stats.rx_bytes += len;
ndev->stats.rx_bytes += pkt_len;
}

netsec_rx_fill(priv, idx, 1);
dring->tail = (dring->tail + 1) % DESC_NUM;
}

return done;
Expand Down Expand Up @@ -946,7 +962,10 @@ static void netsec_uninit_pkt_dring(struct netsec_priv *priv, int id)
dma_unmap_single(priv->dev, desc->dma_addr, desc->len,
id == NETSEC_RING_RX ? DMA_FROM_DEVICE :
DMA_TO_DEVICE);
dev_kfree_skb(desc->skb);
if (id == NETSEC_RING_RX)
skb_free_frag(desc->addr);
else if (id == NETSEC_RING_TX)
dev_kfree_skb(desc->skb);
}

memset(dring->desc, 0, sizeof(struct netsec_desc) * DESC_NUM);
Expand Down Expand Up @@ -1002,22 +1021,30 @@ static int netsec_alloc_dring(struct netsec_priv *priv, enum ring_id id)
static int netsec_setup_rx_dring(struct netsec_priv *priv)
{
struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
struct netsec_desc desc;
struct sk_buff *skb;
int n;
int i;

desc.len = priv->ndev->mtu + 22;
for (i = 0; i < DESC_NUM; i++) {
struct netsec_desc *desc = &dring->desc[i];
dma_addr_t dma_handle;
void *buf;
u16 len;

for (n = 0; n < DESC_NUM; n++) {
skb = netsec_alloc_skb(priv, &desc);
if (!skb) {
buf = netsec_alloc_rx_data(priv, &dma_handle, &len);
if (!buf) {
netsec_uninit_pkt_dring(priv, NETSEC_RING_RX);
return -ENOMEM;
goto err_out;
}
netsec_set_rx_de(priv, dring, n, &desc, skb);
desc->dma_addr = dma_handle;
desc->addr = buf;
desc->len = len;
}

netsec_rx_fill(priv, 0, DESC_NUM);

return 0;

err_out:
return -ENOMEM;
}

static int netsec_netdev_load_ucode_region(struct netsec_priv *priv, u32 reg,
Expand Down Expand Up @@ -1377,6 +1404,8 @@ static int netsec_netdev_init(struct net_device *ndev)
int ret;
u16 data;

BUILD_BUG_ON_NOT_POWER_OF_2(DESC_NUM);

ret = netsec_alloc_dring(priv, NETSEC_RING_TX);
if (ret)
return ret;
Expand Down

0 comments on commit 4acb20b

Please sign in to comment.