Skip to content

Commit

Permalink
vxlan: Support for PMTU discovery on directly bridged links
Browse files Browse the repository at this point in the history
If the interface is a bridge or Open vSwitch port, and we can't
forward a packet because it exceeds the local PMTU estimate,
trigger an ICMP or ICMPv6 reply to the sender, using the same
interface to forward it back.

If metadata collection is enabled, reverse destination and source
addresses, so that Open vSwitch is able to match this packet against
the existing, reverse flow.

v2: Use netif_is_any_bridge_port() (David Ahern)

Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Stefano Brivio authored and David S. Miller committed Aug 4, 2020
1 parent 4cb47a8 commit fc68c99
Showing 1 changed file with 41 additions and 6 deletions.
47 changes: 41 additions & 6 deletions drivers/net/vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -2500,7 +2500,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,

/* Bypass encapsulation if the destination is local */
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
struct vxlan_dev *dst_vxlan, __be32 vni)
struct vxlan_dev *dst_vxlan, __be32 vni,
bool snoop)
{
struct pcpu_sw_netstats *tx_stats, *rx_stats;
union vxlan_addr loopback;
Expand Down Expand Up @@ -2532,7 +2533,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
goto drop;
}

if (dst_vxlan->cfg.flags & VXLAN_F_LEARN)
if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);

u64_stats_update_begin(&tx_stats->syncp);
Expand Down Expand Up @@ -2581,7 +2582,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,

return -ENOENT;
}
vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni);
vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
return 1;
}

Expand Down Expand Up @@ -2617,7 +2618,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (vxlan_addr_any(dst)) {
if (did_rsc) {
/* short-circuited back to local bridge */
vxlan_encap_bypass(skb, vxlan, vxlan, default_vni);
vxlan_encap_bypass(skb, vxlan, vxlan,
default_vni, true);
return;
}
goto drop;
Expand Down Expand Up @@ -2720,7 +2722,23 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}

ndst = &rt->dst;
skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM, false);
err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
netif_is_any_bridge_port(dev));
if (err < 0) {
goto tx_error;
} else if (err) {
if (info) {
struct in_addr src, dst;

src = remote_ip.sin.sin_addr;
dst = local_ip.sin.sin_addr;
info->key.u.ipv4.src = src.s_addr;
info->key.u.ipv4.dst = dst.s_addr;
}
vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
dst_release(ndst);
goto out_unlock;
}

tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
Expand Down Expand Up @@ -2760,7 +2778,24 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto out_unlock;
}

skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM, false);
err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM,
netif_is_any_bridge_port(dev));
if (err < 0) {
goto tx_error;
} else if (err) {
if (info) {
struct in6_addr src, dst;

src = remote_ip.sin6.sin6_addr;
dst = local_ip.sin6.sin6_addr;
info->key.u.ipv6.src = src;
info->key.u.ipv6.dst = dst;
}

vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
dst_release(ndst);
goto out_unlock;
}

tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
ttl = ttl ? : ip6_dst_hoplimit(ndst);
Expand Down

0 comments on commit fc68c99

Please sign in to comment.