From 64f87d3616a01c53262c6e0e948d62df15923f1c Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 23 Feb 2016 18:02:55 +0100 Subject: [PATCH 1/5] vxlan: consolidate GBP handling even more Now when the packet is scrubbed early, skb->mark can be set in the GBP handling code. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 909f7931c297a..656a303c0ac88 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1164,6 +1164,7 @@ static bool vxlan_remcsum(struct vxlanhdr *unparsed, } static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed, + struct sk_buff *skb, u32 vxflags, struct vxlan_metadata *md, struct metadata_dst *tun_dst) { @@ -1183,6 +1184,9 @@ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed, if (gbp->policy_applied) md->gbp |= VXLAN_GBP_POLICY_APPLIED; + /* In flow-based mode, GBP is carried in dst_metadata */ + if (!(vxflags & VXLAN_F_COLLECT_METADATA)) + skb->mark = md->gbp; out: unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS; } @@ -1228,9 +1232,6 @@ static void vxlan_rcv(struct vxlan_dev *vxlan, struct vxlan_sock *vs, goto drop; skb_reset_network_header(skb); - /* In flow-based mode, GBP is carried in dst_metadata */ - if (!(vs->flags & VXLAN_F_COLLECT_METADATA)) - skb->mark = md->gbp; if (oip6) err = IP6_ECN_decapsulate(oip6, skb); @@ -1329,7 +1330,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) if (!vxlan_remcsum(&unparsed, skb, vs->flags)) goto drop; if (vs->flags & VXLAN_F_GBP) - vxlan_parse_gbp_hdr(&unparsed, md, tun_dst); + vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md, tun_dst); if (unparsed.vx_flags || unparsed.vx_vni) { /* If there are any unprocessed flags remaining treat From 1ab016e237e7d0734fb7adbbfe6d4cacb3520421 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 23 Feb 2016 18:02:56 +0100 Subject: [PATCH 2/5] vxlan: move inner L2 header processing to a separate function This code will be different for VXLAN-GPE, so move it to a separate function. It will also make the rx path less spaghetti-like. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 49 ++++++++++++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 656a303c0ac88..68a8f9f43e2aa 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1191,15 +1191,11 @@ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed, unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS; } -static void vxlan_rcv(struct vxlan_dev *vxlan, struct vxlan_sock *vs, - struct sk_buff *skb, struct vxlan_metadata *md, - struct metadata_dst *tun_dst) +static bool vxlan_set_mac(struct vxlan_dev *vxlan, + struct vxlan_sock *vs, + struct sk_buff *skb) { - struct iphdr *oip = NULL; - struct ipv6hdr *oip6 = NULL; - struct pcpu_sw_netstats *stats; union vxlan_addr saddr; - int err = 0; skb_reset_mac_header(skb); skb->protocol = eth_type_trans(skb, vxlan->dev); @@ -1207,30 +1203,51 @@ static void vxlan_rcv(struct vxlan_dev *vxlan, struct vxlan_sock *vs, /* Ignore packet loops (and multicast echo) */ if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr)) - goto drop; + return false; /* Get data from the outer IP header */ if (vxlan_get_sk_family(vs) == AF_INET) { - oip = ip_hdr(skb); - saddr.sin.sin_addr.s_addr = oip->saddr; + saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; saddr.sa.sa_family = AF_INET; #if IS_ENABLED(CONFIG_IPV6) } else { - oip6 = ipv6_hdr(skb); - saddr.sin6.sin6_addr = oip6->saddr; + saddr.sin6.sin6_addr = ipv6_hdr(skb)->saddr; saddr.sa.sa_family = AF_INET6; #endif } + if ((vxlan->flags & VXLAN_F_LEARN) && + vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source)) + return false; + + return true; +} + +static void vxlan_rcv(struct vxlan_dev *vxlan, struct vxlan_sock *vs, + struct sk_buff *skb, struct vxlan_metadata *md, + struct metadata_dst *tun_dst) +{ + struct iphdr *oip = NULL; + struct ipv6hdr *oip6 = NULL; + struct pcpu_sw_netstats *stats; + int err = 0; + + if (!vxlan_set_mac(vxlan, vs, skb)) + goto drop; + + /* Get data from the outer IP header */ + if (vxlan_get_sk_family(vs) == AF_INET) + oip = ip_hdr(skb); +#if IS_ENABLED(CONFIG_IPV6) + else + oip6 = ipv6_hdr(skb); +#endif + if (tun_dst) { skb_dst_set(skb, (struct dst_entry *)tun_dst); tun_dst = NULL; } - if ((vxlan->flags & VXLAN_F_LEARN) && - vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source)) - goto drop; - skb_reset_network_header(skb); if (oip6) From 760c68054e9ed1c6e43b06f916f6efc2c8a1adcc Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 23 Feb 2016 18:02:57 +0100 Subject: [PATCH 3/5] vxlan: move ECN decapsulation to a separate function It simplifies the vxlan_rcv function. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 62 ++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 68a8f9f43e2aa..382535bc9e59d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1205,7 +1205,7 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan, if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr)) return false; - /* Get data from the outer IP header */ + /* Get address from the outer IP header */ if (vxlan_get_sk_family(vs) == AF_INET) { saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; saddr.sa.sa_family = AF_INET; @@ -1223,52 +1223,52 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan, return true; } +static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph, + struct sk_buff *skb) +{ + int err = 0; + + if (vxlan_get_sk_family(vs) == AF_INET) + err = IP_ECN_decapsulate(oiph, skb); +#if IS_ENABLED(CONFIG_IPV6) + else + err = IP6_ECN_decapsulate(oiph, skb); +#endif + + if (unlikely(err) && log_ecn_error) { + if (vxlan_get_sk_family(vs) == AF_INET) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &((struct iphdr *)oiph)->saddr, + ((struct iphdr *)oiph)->tos); + else + net_info_ratelimited("non-ECT from %pI6\n", + &((struct ipv6hdr *)oiph)->saddr); + } + return err <= 1; +} + static void vxlan_rcv(struct vxlan_dev *vxlan, struct vxlan_sock *vs, struct sk_buff *skb, struct vxlan_metadata *md, struct metadata_dst *tun_dst) { - struct iphdr *oip = NULL; - struct ipv6hdr *oip6 = NULL; struct pcpu_sw_netstats *stats; - int err = 0; + void *oiph; if (!vxlan_set_mac(vxlan, vs, skb)) goto drop; - /* Get data from the outer IP header */ - if (vxlan_get_sk_family(vs) == AF_INET) - oip = ip_hdr(skb); -#if IS_ENABLED(CONFIG_IPV6) - else - oip6 = ipv6_hdr(skb); -#endif - if (tun_dst) { skb_dst_set(skb, (struct dst_entry *)tun_dst); tun_dst = NULL; } + oiph = skb_network_header(skb); skb_reset_network_header(skb); - if (oip6) - err = IP6_ECN_decapsulate(oip6, skb); - if (oip) - err = IP_ECN_decapsulate(oip, skb); - - if (unlikely(err)) { - if (log_ecn_error) { - if (oip6) - net_info_ratelimited("non-ECT from %pI6\n", - &oip6->saddr); - if (oip) - net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", - &oip->saddr, oip->tos); - } - if (err > 1) { - ++vxlan->dev->stats.rx_frame_errors; - ++vxlan->dev->stats.rx_errors; - goto drop; - } + if (!vxlan_ecn_decapsulate(vs, oiph, skb)) { + ++vxlan->dev->stats.rx_frame_errors; + ++vxlan->dev->stats.rx_errors; + goto drop; } stats = this_cpu_ptr(vxlan->dev->tstats); From f2d1968ec85e85def98fdea0cf325851433bb60a Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 23 Feb 2016 18:02:58 +0100 Subject: [PATCH 4/5] vxlan: consolidate rx handling to a single function Now when both vxlan_udp_encap_recv and vxlan_rcv are much shorter, combine them into a single function. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 72 ++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 44 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 382535bc9e59d..cfd6deb9f0905 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1247,56 +1247,17 @@ static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph, return err <= 1; } -static void vxlan_rcv(struct vxlan_dev *vxlan, struct vxlan_sock *vs, - struct sk_buff *skb, struct vxlan_metadata *md, - struct metadata_dst *tun_dst) -{ - struct pcpu_sw_netstats *stats; - void *oiph; - - if (!vxlan_set_mac(vxlan, vs, skb)) - goto drop; - - if (tun_dst) { - skb_dst_set(skb, (struct dst_entry *)tun_dst); - tun_dst = NULL; - } - - oiph = skb_network_header(skb); - skb_reset_network_header(skb); - - if (!vxlan_ecn_decapsulate(vs, oiph, skb)) { - ++vxlan->dev->stats.rx_frame_errors; - ++vxlan->dev->stats.rx_errors; - goto drop; - } - - stats = this_cpu_ptr(vxlan->dev->tstats); - u64_stats_update_begin(&stats->syncp); - stats->rx_packets++; - stats->rx_bytes += skb->len; - u64_stats_update_end(&stats->syncp); - - gro_cells_receive(&vxlan->gro_cells, skb); - - return; -drop: - if (tun_dst) - dst_release((struct dst_entry *)tun_dst); - - /* Consume bad packet */ - kfree_skb(skb); -} - /* Callback from net/ipv4/udp.c to receive packets */ -static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) +static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) { struct metadata_dst *tun_dst = NULL; + struct pcpu_sw_netstats *stats; struct vxlan_dev *vxlan; struct vxlan_sock *vs; struct vxlanhdr unparsed; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; + void *oiph; /* Need Vxlan and inner Ethernet header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) @@ -1361,7 +1322,30 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto drop; } - vxlan_rcv(vxlan, vs, skb, md, tun_dst); + if (!vxlan_set_mac(vxlan, vs, skb)) + goto drop; + + if (tun_dst) { + skb_dst_set(skb, (struct dst_entry *)tun_dst); + tun_dst = NULL; + } + + oiph = skb_network_header(skb); + skb_reset_network_header(skb); + + if (!vxlan_ecn_decapsulate(vs, oiph, skb)) { + ++vxlan->dev->stats.rx_frame_errors; + ++vxlan->dev->stats.rx_errors; + goto drop; + } + + stats = this_cpu_ptr(vxlan->dev->tstats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += skb->len; + u64_stats_update_end(&stats->syncp); + + gro_cells_receive(&vxlan->gro_cells, skb); return 0; drop: @@ -2666,7 +2650,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6, /* Mark socket as an encapsulation socket. */ tunnel_cfg.sk_user_data = vs; tunnel_cfg.encap_type = 1; - tunnel_cfg.encap_rcv = vxlan_udp_encap_recv; + tunnel_cfg.encap_rcv = vxlan_rcv; tunnel_cfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, sock, &tunnel_cfg); From 10a5af238cd29b7e43af0dc0690ae9baa0650c36 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Tue, 23 Feb 2016 18:02:59 +0100 Subject: [PATCH 5/5] vxlan: simplify metadata_dst usage in vxlan_rcv Now when the packet is scrubbed early, the metadata_dst can be assigned to the skb as soon as it is allocated. This simplifies the error cleanup path, as the dst will be freed by kfree_skb. It is also not necessary to pass it as a parameter to functions anymore. Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index cfd6deb9f0905..775ddb48388d6 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1165,16 +1165,17 @@ static bool vxlan_remcsum(struct vxlanhdr *unparsed, static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed, struct sk_buff *skb, u32 vxflags, - struct vxlan_metadata *md, - struct metadata_dst *tun_dst) + struct vxlan_metadata *md) { struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed; + struct metadata_dst *tun_dst; if (!(unparsed->vx_flags & VXLAN_HF_GBP)) goto out; md->gbp = ntohs(gbp->policy_id); + tun_dst = (struct metadata_dst *)skb_dst(skb); if (tun_dst) tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT; @@ -1250,7 +1251,6 @@ static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph, /* Callback from net/ipv4/udp.c to receive packets */ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) { - struct metadata_dst *tun_dst = NULL; struct pcpu_sw_netstats *stats; struct vxlan_dev *vxlan; struct vxlan_sock *vs; @@ -1289,6 +1289,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) if (vxlan_collect_metadata(vs)) { __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni); + struct metadata_dst *tun_dst; tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, vxlan_vni_to_tun_id(vni), sizeof(*md)); @@ -1297,6 +1298,8 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) goto drop; md = ip_tunnel_info_opts(&tun_dst->u.tun_info); + + skb_dst_set(skb, (struct dst_entry *)tun_dst); } else { memset(md, 0, sizeof(*md)); } @@ -1308,7 +1311,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) if (!vxlan_remcsum(&unparsed, skb, vs->flags)) goto drop; if (vs->flags & VXLAN_F_GBP) - vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md, tun_dst); + vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md); if (unparsed.vx_flags || unparsed.vx_vni) { /* If there are any unprocessed flags remaining treat @@ -1325,11 +1328,6 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) if (!vxlan_set_mac(vxlan, vs, skb)) goto drop; - if (tun_dst) { - skb_dst_set(skb, (struct dst_entry *)tun_dst); - tun_dst = NULL; - } - oiph = skb_network_header(skb); skb_reset_network_header(skb); @@ -1349,9 +1347,6 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) return 0; drop: - if (tun_dst) - dst_release((struct dst_entry *)tun_dst); - /* Consume bad packet */ kfree_skb(skb); return 0;