Skip to content

Commit

Permalink
Merge branch 'vxlan-headers-and-tx-cleanups'
Browse files Browse the repository at this point in the history
Jiri Benc says:

====================
vxlan: cleanup headers and tx path

This is first part of vxlan cleanup. It makes vxlan.h better organized and
removes code duplication in the tx path. There's no change in functionality.

This is in preparation for VXLAN-GPE support. Other sets will follow with
cleanup of rx path, modifications of vxlan interface setup and
cleanup/modification of fdb handling. The goal of these patchsets is to
consolidate VXLAN extension handling (right now it's scattered all around
the code) and allow vxlan to operate in L3 mode (i.e. without inner Ethernet
headers).

The full picture (all 30 patches) can be seen at:
https://github.com/jbenc/linux-vxlan/commits/master

Changelog:
v2: added patch 5/6, fixing the udp sum problem spotted by Paolo Abeni
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Feb 7, 2016
2 parents e3e17b7 + f491e56 commit 19f76f6
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 210 deletions.
231 changes: 69 additions & 162 deletions drivers/net/vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1684,18 +1684,14 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
}

#if IS_ENABLED(CONFIG_IPV6)
static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr, __u8 prio, __u8 ttl,
__be16 src_port, __be16 dst_port, __be32 vni,
struct vxlan_metadata *md, bool xnet, u32 vxflags)
static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
int iphdr_len, __be32 vni,
struct vxlan_metadata *md, u32 vxflags,
bool udp_sum)
{
struct vxlanhdr *vxh;
int min_headroom;
int err;
bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX);
int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
u16 hdrlen = sizeof(struct vxlanhdr);

Expand All @@ -1712,93 +1708,8 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
}
}

skb_scrub_packet(skb, xnet);

min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
+ VXLAN_HLEN + sizeof(struct ipv6hdr)
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);

/* Need space for new headers (invalidates iph ptr) */
err = skb_cow_head(skb, min_headroom);
if (unlikely(err)) {
kfree_skb(skb);
goto err;
}

skb = vlan_hwaccel_push_inside(skb);
if (WARN_ON(!skb)) {
err = -ENOMEM;
goto err;
}

skb = iptunnel_handle_offloads(skb, udp_sum, type);
if (IS_ERR(skb)) {
err = -EINVAL;
goto err;
}

vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI);
vxh->vx_vni = vni;

if (type & SKB_GSO_TUNNEL_REMCSUM) {
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
VXLAN_RCO_SHIFT;

if (skb->csum_offset == offsetof(struct udphdr, check))
data |= VXLAN_RCO_UDP;

vxh->vx_vni |= htonl(data);
vxh->vx_flags |= htonl(VXLAN_HF_RCO);

if (!skb_is_gso(skb)) {
skb->ip_summed = CHECKSUM_NONE;
skb->encapsulation = 0;
}
}

if (vxflags & VXLAN_F_GBP)
vxlan_build_gbp_hdr(vxh, vxflags, md);

skb_set_inner_protocol(skb, htons(ETH_P_TEB));

udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio,
ttl, src_port, dst_port,
!!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX));
return 0;
err:
dst_release(dst);
return err;
}
#endif

static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
__be16 src_port, __be16 dst_port, __be32 vni,
struct vxlan_metadata *md, bool xnet, u32 vxflags)
{
struct vxlanhdr *vxh;
int min_headroom;
int err;
bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM);
int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
u16 hdrlen = sizeof(struct vxlanhdr);

if ((vxflags & VXLAN_F_REMCSUM_TX) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
int csum_start = skb_checksum_start_offset(skb);

if (csum_start <= VXLAN_MAX_REMCSUM_START &&
!(csum_start & VXLAN_RCO_SHIFT_MASK) &&
(skb->csum_offset == offsetof(struct udphdr, check) ||
skb->csum_offset == offsetof(struct tcphdr, check))) {
udp_sum = false;
type |= SKB_GSO_TUNNEL_REMCSUM;
}
}

min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ VXLAN_HLEN + sizeof(struct iphdr)
+ VXLAN_HLEN + iphdr_len
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);

/* Need space for new headers (invalidates iph ptr) */
Expand Down Expand Up @@ -1840,13 +1751,30 @@ static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *sk
vxlan_build_gbp_hdr(vxh, vxflags, md);

skb_set_inner_protocol(skb, htons(ETH_P_TEB));

udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, ttl, df,
src_port, dst_port, xnet,
!(vxflags & VXLAN_F_UDP_CSUM));
return 0;
}

static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif, u8 tos,
__be32 daddr, __be32 *saddr)
{
struct rtable *rt = NULL;
struct flowi4 fl4;

memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_oif = oif;
fl4.flowi4_tos = RT_TOS(tos);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_proto = IPPROTO_UDP;
fl4.daddr = daddr;
fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;

rt = ip_route_output_key(vxlan->net, &fl4);
if (!IS_ERR(rt))
*saddr = fl4.saddr;
return rt;
}

#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
struct sk_buff *skb, int oif,
Expand Down Expand Up @@ -1928,7 +1856,6 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct sock *sk;
struct rtable *rt = NULL;
const struct iphdr *old_iph;
struct flowi4 fl4;
union vxlan_addr *dst;
union vxlan_addr remote_ip;
struct vxlan_metadata _md;
Expand All @@ -1939,6 +1866,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
__u8 tos, ttl;
int err;
u32 flags = vxlan->flags;
bool udp_sum = false;
bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));

info = skb_tunnel_info(skb);

Expand Down Expand Up @@ -1987,6 +1916,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (info) {
ttl = info->key.ttl;
tos = info->key.tos;
udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);

if (info->options_len)
md = ip_tunnel_info_opts(info);
Expand All @@ -1995,29 +1925,22 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}

if (dst->sa.sa_family == AF_INET) {
__be32 saddr;

if (!vxlan->vn4_sock)
goto drop;
sk = vxlan->vn4_sock->sock->sk;

if (info) {
if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
df = htons(IP_DF);

if (info->key.tun_flags & TUNNEL_CSUM)
flags |= VXLAN_F_UDP_CSUM;
else
flags &= ~VXLAN_F_UDP_CSUM;
} else {
udp_sum = !!(flags & VXLAN_F_UDP_CSUM);
}

memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0;
fl4.flowi4_tos = RT_TOS(tos);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_proto = IPPROTO_UDP;
fl4.daddr = dst->sin.sin_addr.s_addr;
fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;

rt = ip_route_output_key(vxlan->net, &fl4);
rt = vxlan_get_route(vxlan, skb,
rdst ? rdst->remote_ifindex : 0, tos,
dst->sin.sin_addr.s_addr, &saddr);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n",
&dst->sin.sin_addr.s_addr);
Expand Down Expand Up @@ -2049,16 +1972,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr,
dst->sin.sin_addr.s_addr, tos, ttl, df,
src_port, dst_port, htonl(vni << 8), md,
!net_eq(vxlan->net, dev_net(vxlan->dev)),
flags);
if (err < 0) {
/* skb is already freed. */
skb = NULL;
goto rt_tx_error;
}
err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr),
htonl(vni << 8), md, flags, udp_sum);
if (err < 0)
goto xmit_tx_error;

udp_tunnel_xmit_skb(rt, sk, skb, saddr,
dst->sin.sin_addr.s_addr, tos, ttl, df,
src_port, dst_port, xnet, !udp_sum);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct dst_entry *ndst;
Expand Down Expand Up @@ -2103,18 +2024,20 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
return;
}

if (info) {
if (info->key.tun_flags & TUNNEL_CSUM)
flags &= ~VXLAN_F_UDP_ZERO_CSUM6_TX;
else
flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
}
if (!info)
udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);

ttl = ttl ? : ip6_dst_hoplimit(ndst);
err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr,
0, ttl, src_port, dst_port, htonl(vni << 8), md,
!net_eq(vxlan->net, dev_net(vxlan->dev)),
flags);
skb_scrub_packet(skb, xnet);
err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
htonl(vni << 8), md, flags, udp_sum);
if (err < 0) {
dst_release(ndst);
return;
}
udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
&saddr, &dst->sin6.sin6_addr,
0, ttl, src_port, dst_port, !udp_sum);
#endif
}

Expand All @@ -2124,6 +2047,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dev->stats.tx_dropped++;
goto tx_free;

xmit_tx_error:
/* skb is already freed. */
skb = NULL;
rt_tx_error:
ip_rt_put(rt);
tx_error:
Expand Down Expand Up @@ -2390,31 +2316,6 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
return 0;
}

static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
struct ip_tunnel_info *info,
__be16 sport, __be16 dport)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
struct rtable *rt;
struct flowi4 fl4;

memset(&fl4, 0, sizeof(fl4));
fl4.flowi4_tos = RT_TOS(info->key.tos);
fl4.flowi4_mark = skb->mark;
fl4.flowi4_proto = IPPROTO_UDP;
fl4.daddr = info->key.u.ipv4.dst;

rt = ip_route_output_key(vxlan->net, &fl4);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);

info->key.u.ipv4.src = fl4.saddr;
info->key.tp_src = sport;
info->key.tp_dst = dport;
return 0;
}

static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
Expand All @@ -2426,9 +2327,16 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
dport = info->key.tp_dst ? : vxlan->cfg.dst_port;

if (ip_tunnel_info_af(info) == AF_INET) {
struct rtable *rt;

if (!vxlan->vn4_sock)
return -EINVAL;
return egress_ipv4_tun_info(dev, skb, info, sport, dport);
rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
info->key.u.ipv4.dst,
&info->key.u.ipv4.src);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);
} else {
#if IS_ENABLED(CONFIG_IPV6)
struct dst_entry *ndst;
Expand All @@ -2441,13 +2349,12 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
if (IS_ERR(ndst))
return PTR_ERR(ndst);
dst_release(ndst);

info->key.tp_src = sport;
info->key.tp_dst = dport;
#else /* !CONFIG_IPV6 */
return -EPFNOSUPPORT;
#endif
}
info->key.tp_src = sport;
info->key.tp_dst = dport;
return 0;
}

Expand Down
Loading

0 comments on commit 19f76f6

Please sign in to comment.