Skip to content

Commit

Permalink
Merge branch 'udp-gro-L4'
Browse files Browse the repository at this point in the history
Paolo Abeni says:

====================
udp: GRO L4 improvements

This series improves the UDP L4 - either 'forward' or 'frag_list' -
co-existence with UDP tunnel GRO, allowing the first to take place
correctly even for encapsulated UDP traffic.

The first for patches are mostly bugfixes, addressing some GRO
edge-cases when both tunnels and L4 are present, enabled and in use.

The next 3 patches avoid unneeded segmentation when UDP GRO
traffic traverses in the receive path UDP tunnels.

Finally, some self-tests are included, covering the relevant
GRO scenarios.

Even if most patches are actually bugfixes, this series is
targeting net-next, as overall it makes available a new feature.

v2 -> v3:
 - no code changes, more verbose commit messages and comment in
   patch 1/8

v1 -> v2:
 - restrict post segmentation csum fixup to the only the relevant pkts
 - use individual 'accept_gso_type' fields instead of whole gso bitmask
   (Willem)
 - use only ipv6 addesses from test range in self-tests (Willem)
 - hopefully clarified most individual patches commit messages
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Mar 31, 2021
2 parents dc5fa20 + a062260 commit df82e9c
Show file tree
Hide file tree
Showing 11 changed files with 323 additions and 13 deletions.
1 change: 1 addition & 0 deletions drivers/net/bareudp.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ static struct socket *bareudp_create_sock(struct net *net, __be16 port)
if (err < 0)
return ERR_PTR(err);

udp_allow_gso(sock->sk);
return sock;
}

Expand Down
1 change: 1 addition & 0 deletions drivers/net/geneve.c
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,7 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6,
if (err < 0)
return ERR_PTR(err);

udp_allow_gso(sock->sk);
return sock;
}

Expand Down
1 change: 1 addition & 0 deletions drivers/net/vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -3484,6 +3484,7 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
if (err < 0)
return ERR_PTR(err);

udp_allow_gso(sock->sk);
return sock;
}

Expand Down
22 changes: 19 additions & 3 deletions include/linux/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,9 @@ struct udp_sock {
* different encapsulation layer set
* this
*/
gro_enabled:1; /* Can accept GRO packets */
gro_enabled:1, /* Request GRO aggregation */
accept_udp_l4:1,
accept_udp_fraglist:1;
/*
* Following member retains the information to create a UDP header
* when the socket is uncorked.
Expand Down Expand Up @@ -131,8 +133,22 @@ static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,

static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
{
return !udp_sk(sk)->gro_enabled && skb_is_gso(skb) &&
skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4;
if (!skb_is_gso(skb))
return false;

if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 && !udp_sk(sk)->accept_udp_l4)
return true;

if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST && !udp_sk(sk)->accept_udp_fraglist)
return true;

return false;
}

static inline void udp_allow_gso(struct sock *sk)
{
udp_sk(sk)->accept_udp_l4 = 1;
udp_sk(sk)->accept_udp_fraglist = 1;
}

#define udp_portaddr_for_each_entry(__sk, list) \
Expand Down
23 changes: 23 additions & 0 deletions include/net/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,29 @@ static inline struct sk_buff *udp_rcv_segment(struct sock *sk,
return segs;
}

static inline void udp_post_segment_fix_csum(struct sk_buff *skb)
{
/* UDP-lite can't land here - no GRO */
WARN_ON_ONCE(UDP_SKB_CB(skb)->partial_cov);

/* UDP packets generated with UDP_SEGMENT and traversing:
*
* UDP tunnel(xmit) -> veth (segmentation) -> veth (gro) -> UDP tunnel (rx)
*
* can reach an UDP socket with CHECKSUM_NONE, because
* __iptunnel_pull_header() converts CHECKSUM_PARTIAL into NONE.
* SKB_GSO_UDP_L4 or SKB_GSO_FRAGLIST packets with no UDP tunnel will
* have a valid checksum, as the GRO engine validates the UDP csum
* before the aggregation and nobody strips such info in between.
* Instead of adding another check in the tunnel fastpath, we can force
* a valid csum after the segmentation.
* Additionally fixup the UDP CB.
*/
UDP_SKB_CB(skb)->cscov = skb->len;
if (skb->ip_summed == CHECKSUM_NONE && !skb->csum_valid)
skb->csum_valid = 1;
}

#ifdef CONFIG_BPF_SYSCALL
struct sk_psock;
struct proto *udp_bpf_get_proto(struct sock *sk, struct sk_psock *psock);
Expand Down
5 changes: 5 additions & 0 deletions net/ipv4/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2178,6 +2178,8 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
segs = udp_rcv_segment(sk, skb, true);
skb_list_walk_safe(segs, skb, next) {
__skb_pull(skb, skb_transport_offset(skb));

udp_post_segment_fix_csum(skb);
ret = udp_queue_rcv_one_skb(sk, skb);
if (ret > 0)
ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret);
Expand Down Expand Up @@ -2664,9 +2666,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,

case UDP_GRO:
lock_sock(sk);

/* when enabling GRO, accept the related GSO packet type */
if (valbool)
udp_tunnel_encap_enable(sk->sk_socket);
up->gro_enabled = valbool;
up->accept_udp_l4 = valbool;
release_sock(sk);
break;

Expand Down
27 changes: 18 additions & 9 deletions net/ipv4/udp_offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -515,21 +515,24 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
unsigned int off = skb_gro_offset(skb);
int flush = 1;

/* we can do L4 aggregation only if the packet can't land in a tunnel
* otherwise we could corrupt the inner stream
*/
NAPI_GRO_CB(skb)->is_flist = 0;
if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1;
if (!sk || !udp_sk(sk)->gro_receive) {
if (skb->dev->features & NETIF_F_GRO_FRAGLIST)
NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1;

if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) ||
(sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) {
pp = call_gro_receive(udp_gro_receive_segment, head, skb);
if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) ||
(sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist)
pp = call_gro_receive(udp_gro_receive_segment, head, skb);
return pp;
}

if (!sk || NAPI_GRO_CB(skb)->encap_mark ||
if (NAPI_GRO_CB(skb)->encap_mark ||
(uh->check && skb->ip_summed != CHECKSUM_PARTIAL &&
NAPI_GRO_CB(skb)->csum_cnt == 0 &&
!NAPI_GRO_CB(skb)->csum_valid) ||
!udp_sk(sk)->gro_receive)
!NAPI_GRO_CB(skb)->csum_valid))
goto out;

/* mark that this skb passed once through the tunnel gro layer */
Expand Down Expand Up @@ -639,6 +642,11 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff,
skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM
: SKB_GSO_UDP_TUNNEL;

/* clear the encap mark, so that inner frag_list gro_complete
* can take place
*/
NAPI_GRO_CB(skb)->encap_mark = 0;

/* Set encapsulation before calling into inner gro_complete()
* functions to make them set up the inner offsets.
*/
Expand All @@ -662,7 +670,8 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
const struct iphdr *iph = ip_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);

if (NAPI_GRO_CB(skb)->is_flist) {
/* do fraglist only if there is no outer UDP encap (or we already processed it) */
if (NAPI_GRO_CB(skb)->is_flist && !NAPI_GRO_CB(skb)->encap_mark) {
uh->len = htons(skb->len - nhoff);

skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
Expand Down
1 change: 1 addition & 0 deletions net/ipv6/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
skb_list_walk_safe(segs, skb, next) {
__skb_pull(skb, skb_transport_offset(skb));

udp_post_segment_fix_csum(skb);
ret = udpv6_queue_rcv_one_skb(sk, skb);
if (ret > 0)
ip6_protocol_deliver_rcu(dev_net(skb->dev), skb, ret,
Expand Down
3 changes: 2 additions & 1 deletion net/ipv6/udp_offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,8 @@ INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);

if (NAPI_GRO_CB(skb)->is_flist) {
/* do fraglist only if there is no outer UDP encap (or we already processed it) */
if (NAPI_GRO_CB(skb)->is_flist && !NAPI_GRO_CB(skb)->encap_mark) {
uh->len = htons(skb->len - nhoff);

skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/net/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ TEST_PROGS += drop_monitor_tests.sh
TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS += bareudp.sh
TEST_PROGS += unicast_extensions.sh
TEST_PROGS += udpgro_fwd.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
Expand Down
Loading

0 comments on commit df82e9c

Please sign in to comment.