Skip to content

Commit

Permalink
Merge branch 'veth-gro'
Browse files Browse the repository at this point in the history
Paolo Abeni  says:

====================
veth: allow GRO even without XDP

This series allows the user-space to enable GRO/NAPI on a veth
device even without attaching an XDP program.

It does not change the default veth behavior (no NAPI, no GRO),
except that the GRO feature bit on top of this series will be
effectively off by default on veth devices. Note that currently
the GRO bit is on by default, but GRO never takes place in
absence of XDP.

On top of this series, setting the GRO feature bit enables NAPI
and allows the GRO to take place. The TSO features on the peer
device are preserved.

The main goal is improving UDP forwarding performances for
containers in a typical virtual network setup:

(container) veth -> veth peer -> bridge/ovs -> vxlan -> NIC

Enabling the NAPI threaded mode, GRO the NETIF_F_GRO_UDP_FWD
feature on the veth peer improves the UDP stream performance
with not void netfilter configuration by 2x factor with no
measurable overhead for TCP traffic: some heuristic ensures
that TCP will not go through the additional NAPI/GRO layer.

Some self-tests are added to check the expected behavior in
the default configuration, with XDP and with plain GRO enabled.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Apr 11, 2021
2 parents 7dc85b5 + 1c3cadb commit 23cfa4d
Show file tree
Hide file tree
Showing 3 changed files with 316 additions and 14 deletions.
152 changes: 138 additions & 14 deletions drivers/net/veth.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ struct veth_rq_stats {

struct veth_rq {
struct napi_struct xdp_napi;
struct napi_struct __rcu *napi; /* points to xdp_napi when the latter is initialized */
struct net_device *dev;
struct bpf_prog __rcu *xdp_prog;
struct xdp_mem_info xdp_mem;
Expand Down Expand Up @@ -293,13 +294,32 @@ static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb,
netif_rx(skb);
}

/* return true if the specified skb has chances of GRO aggregation
* Don't strive for accuracy, but try to avoid GRO overhead in the most
* common scenarios.
* When XDP is enabled, all traffic is considered eligible, as the xmit
* device has TSO off.
* When TSO is enabled on the xmit device, we are likely interested only
* in UDP aggregation, explicitly check for that if the skb is suspected
* - the sock_wfree destructor is used by UDP, ICMP and XDP sockets -
* to belong to locally generated UDP traffic.
*/
static bool veth_skb_is_eligible_for_gro(const struct net_device *dev,
const struct net_device *rcv,
const struct sk_buff *skb)
{
return !(dev->features & NETIF_F_ALL_TSO) ||
(skb->destructor == sock_wfree &&
rcv->features & (NETIF_F_GRO_FRAGLIST | NETIF_F_GRO_UDP_FWD));
}

static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
struct veth_rq *rq = NULL;
struct net_device *rcv;
int length = skb->len;
bool rcv_xdp = false;
bool use_napi = false;
int rxq;

rcu_read_lock();
Expand All @@ -313,20 +333,26 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
rxq = skb_get_queue_mapping(skb);
if (rxq < rcv->real_num_rx_queues) {
rq = &rcv_priv->rq[rxq];
rcv_xdp = rcu_access_pointer(rq->xdp_prog);

/* The napi pointer is available when an XDP program is
* attached or when GRO is enabled
* Don't bother with napi/GRO if the skb can't be aggregated
*/
use_napi = rcu_access_pointer(rq->napi) &&
veth_skb_is_eligible_for_gro(dev, rcv, skb);
skb_record_rx_queue(skb, rxq);
}

skb_tx_timestamp(skb);
if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
if (!rcv_xdp)
if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
if (!use_napi)
dev_lstats_add(dev, length);
} else {
drop:
atomic64_inc(&priv->dropped);
}

if (rcv_xdp)
if (use_napi)
__veth_xdp_flush(rq);

rcu_read_unlock();
Expand Down Expand Up @@ -686,7 +712,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
int mac_len, delta, off;
struct xdp_buff xdp;

skb_orphan(skb);
skb_orphan_partial(skb);

rcu_read_lock();
xdp_prog = rcu_dereference(rq->xdp_prog);
Expand Down Expand Up @@ -903,7 +929,7 @@ static int veth_poll(struct napi_struct *napi, int budget)
return done;
}

static int veth_napi_add(struct net_device *dev)
static int __veth_napi_enable(struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
int err, i;
Expand All @@ -920,6 +946,7 @@ static int veth_napi_add(struct net_device *dev)
struct veth_rq *rq = &priv->rq[i];

napi_enable(&rq->xdp_napi);
rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi);
}

return 0;
Expand All @@ -938,6 +965,7 @@ static void veth_napi_del(struct net_device *dev)
for (i = 0; i < dev->real_num_rx_queues; i++) {
struct veth_rq *rq = &priv->rq[i];

rcu_assign_pointer(priv->rq[i].napi, NULL);
napi_disable(&rq->xdp_napi);
__netif_napi_del(&rq->xdp_napi);
}
Expand All @@ -951,16 +979,23 @@ static void veth_napi_del(struct net_device *dev)
}
}

static bool veth_gro_requested(const struct net_device *dev)
{
return !!(dev->wanted_features & NETIF_F_GRO);
}

static int veth_enable_xdp(struct net_device *dev)
{
bool napi_already_on = veth_gro_requested(dev) && (dev->flags & IFF_UP);
struct veth_priv *priv = netdev_priv(dev);
int err, i;

if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
for (i = 0; i < dev->real_num_rx_queues; i++) {
struct veth_rq *rq = &priv->rq[i];

netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
if (!napi_already_on)
netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i, rq->xdp_napi.napi_id);
if (err < 0)
goto err_rxq_reg;
Expand All @@ -975,13 +1010,25 @@ static int veth_enable_xdp(struct net_device *dev)
rq->xdp_mem = rq->xdp_rxq.mem;
}

err = veth_napi_add(dev);
if (err)
goto err_rxq_reg;
if (!napi_already_on) {
err = __veth_napi_enable(dev);
if (err)
goto err_rxq_reg;

if (!veth_gro_requested(dev)) {
/* user-space did not require GRO, but adding XDP
* is supposed to get GRO working
*/
dev->features |= NETIF_F_GRO;
netdev_features_change(dev);
}
}
}

for (i = 0; i < dev->real_num_rx_queues; i++)
for (i = 0; i < dev->real_num_rx_queues; i++) {
rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog);
rcu_assign_pointer(priv->rq[i].napi, &priv->rq[i].xdp_napi);
}

return 0;
err_reg_mem:
Expand All @@ -991,7 +1038,8 @@ static int veth_enable_xdp(struct net_device *dev)
struct veth_rq *rq = &priv->rq[i];

xdp_rxq_info_unreg(&rq->xdp_rxq);
netif_napi_del(&rq->xdp_napi);
if (!napi_already_on)
netif_napi_del(&rq->xdp_napi);
}

return err;
Expand All @@ -1004,7 +1052,19 @@ static void veth_disable_xdp(struct net_device *dev)

for (i = 0; i < dev->real_num_rx_queues; i++)
rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
veth_napi_del(dev);

if (!netif_running(dev) || !veth_gro_requested(dev)) {
veth_napi_del(dev);

/* if user-space did not require GRO, since adding XDP
* enabled it, clear it now
*/
if (!veth_gro_requested(dev) && netif_running(dev)) {
dev->features &= ~NETIF_F_GRO;
netdev_features_change(dev);
}
}

for (i = 0; i < dev->real_num_rx_queues; i++) {
struct veth_rq *rq = &priv->rq[i];

Expand All @@ -1013,6 +1073,29 @@ static void veth_disable_xdp(struct net_device *dev)
}
}

static int veth_napi_enable(struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
int err, i;

for (i = 0; i < dev->real_num_rx_queues; i++) {
struct veth_rq *rq = &priv->rq[i];

netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
}

err = __veth_napi_enable(dev);
if (err) {
for (i = 0; i < dev->real_num_rx_queues; i++) {
struct veth_rq *rq = &priv->rq[i];

netif_napi_del(&rq->xdp_napi);
}
return err;
}
return err;
}

static int veth_open(struct net_device *dev)
{
struct veth_priv *priv = netdev_priv(dev);
Expand All @@ -1026,6 +1109,10 @@ static int veth_open(struct net_device *dev)
err = veth_enable_xdp(dev);
if (err)
return err;
} else if (veth_gro_requested(dev)) {
err = veth_napi_enable(dev);
if (err)
return err;
}

if (peer->flags & IFF_UP) {
Expand All @@ -1047,6 +1134,8 @@ static int veth_close(struct net_device *dev)

if (priv->_xdp_prog)
veth_disable_xdp(dev);
else if (veth_gro_requested(dev))
veth_napi_del(dev);

return 0;
}
Expand Down Expand Up @@ -1145,10 +1234,32 @@ static netdev_features_t veth_fix_features(struct net_device *dev,
if (peer_priv->_xdp_prog)
features &= ~NETIF_F_GSO_SOFTWARE;
}
if (priv->_xdp_prog)
features |= NETIF_F_GRO;

return features;
}

static int veth_set_features(struct net_device *dev,
netdev_features_t features)
{
netdev_features_t changed = features ^ dev->features;
struct veth_priv *priv = netdev_priv(dev);
int err;

if (!(changed & NETIF_F_GRO) || !(dev->flags & IFF_UP) || priv->_xdp_prog)
return 0;

if (features & NETIF_F_GRO) {
err = veth_napi_enable(dev);
if (err)
return err;
} else {
veth_napi_del(dev);
}
return 0;
}

static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
{
struct veth_priv *peer_priv, *priv = netdev_priv(dev);
Expand Down Expand Up @@ -1267,6 +1378,7 @@ static const struct net_device_ops veth_netdev_ops = {
#endif
.ndo_get_iflink = veth_get_iflink,
.ndo_fix_features = veth_fix_features,
.ndo_set_features = veth_set_features,
.ndo_features_check = passthru_features_check,
.ndo_set_rx_headroom = veth_set_rx_headroom,
.ndo_bpf = veth_xdp,
Expand Down Expand Up @@ -1329,6 +1441,13 @@ static int veth_validate(struct nlattr *tb[], struct nlattr *data[],

static struct rtnl_link_ops veth_link_ops;

static void veth_disable_gro(struct net_device *dev)
{
dev->features &= ~NETIF_F_GRO;
dev->wanted_features &= ~NETIF_F_GRO;
netdev_update_features(dev);
}

static int veth_newlink(struct net *src_net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
Expand Down Expand Up @@ -1401,6 +1520,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
if (err < 0)
goto err_register_peer;

/* keep GRO disabled by default to be consistent with the established
* veth behavior
*/
veth_disable_gro(peer);
netif_carrier_off(peer);

err = rtnl_configure_link(peer, ifmp);
Expand Down Expand Up @@ -1438,6 +1561,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
priv = netdev_priv(peer);
rcu_assign_pointer(priv->peer, dev);

veth_disable_gro(dev);
return 0;

err_register_dev:
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/net/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS += bareudp.sh
TEST_PROGS += unicast_extensions.sh
TEST_PROGS += udpgro_fwd.sh
TEST_PROGS += veth.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
Expand Down
Loading

0 comments on commit 23cfa4d

Please sign in to comment.