Skip to content

Commit

Permalink
udp_tunnel: create a fastpath GRO lookup.
Browse files Browse the repository at this point in the history
Most UDP tunnels bind a socket to a local port, with ANY address, no
peer and no interface index specified.
Additionally it's quite common to have a single tunnel device per
namespace.

Track in each namespace the UDP tunnel socket respecting the above.
When only a single one is present, store a reference in the netns.

When such reference is not NULL, UDP tunnel GRO lookup just need to
match the incoming packet destination port vs the socket local port.

The tunnel socket never sets the reuse[port] flag[s]. When bound to no
address and interface, no other socket can exist in the same netns
matching the specified local port.

Matching packets with non-local destination addresses will be
aggregated, and eventually segmented as needed - no behavior changes
intended.

Note that the UDP tunnel socket reference is stored into struct
netns_ipv4 for both IPv4 and IPv6 tunnels. That is intentional to keep
all the fastpath-related netns fields in the same struct and allow
cacheline-based optimization. Currently both the IPv4 and IPv6 socket
pointer share the same cacheline as the `udp_table` field.

Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://patch.msgid.link/4d5c319c4471161829f50cb8436841de81a5edae.1741718157.git.pabeni@redhat.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
  • Loading branch information
Paolo Abeni committed Mar 18, 2025
1 parent f5825e7 commit 8d4880d
Show file tree
Hide file tree
Showing 9 changed files with 114 additions and 1 deletion.
16 changes: 16 additions & 0 deletions include/linux/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@ struct udp_sock {

/* Cache friendly copy of sk->sk_peek_off >= 0 */
bool peeking_with_offset;

/*
* Accounting for the tunnel GRO fastpath.
* Unprotected by compilers guard, as it uses space available in
* the last UDP socket cacheline.
*/
struct hlist_node tunnel_list;
};

#define udp_test_bit(nr, sk) \
Expand Down Expand Up @@ -219,4 +226,13 @@ static inline void udp_allow_gso(struct sock *sk)

#define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE)

static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6)
{
#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
return rcu_dereference(net->ipv4.udp_tunnel_gro[is_ipv6].sk);
#else
return NULL;
#endif
}

#endif /* _LINUX_UDP_H */
11 changes: 11 additions & 0 deletions include/net/netns/ipv4.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ struct sysctl_fib_multipath_hash_seed {
};
#endif

struct udp_tunnel_gro {
struct sock __rcu *sk;
struct hlist_head list;
};

struct netns_ipv4 {
/* Cacheline organization can be found documented in
* Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst.
Expand Down Expand Up @@ -85,6 +90,11 @@ struct netns_ipv4 {
struct inet_timewait_death_row tcp_death_row;
struct udp_table *udp_table;

#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
/* Not in a pernet subsys because need to be available at GRO stage */
struct udp_tunnel_gro udp_tunnel_gro[2];
#endif

#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
struct ctl_table_header *frags_hdr;
Expand Down Expand Up @@ -277,4 +287,5 @@ struct netns_ipv4 {
struct hlist_head *inet_addr_lst;
struct delayed_work addr_chk_work;
};

#endif
1 change: 1 addition & 0 deletions include/net/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ static inline void udp_lib_init_sock(struct sock *sk)
struct udp_sock *up = udp_sk(sk);

skb_queue_head_init(&up->reader_queue);
INIT_HLIST_NODE(&up->tunnel_list);
up->forward_threshold = sk->sk_rcvbuf >> 2;
set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
}
Expand Down
18 changes: 18 additions & 0 deletions include/net/udp_tunnel.h
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,24 @@ static inline void udp_tunnel_encap_enable(struct sock *sk)
udp_encap_enable();
}

#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add);
#else
static inline void udp_tunnel_update_gro_lookup(struct net *net,
struct sock *sk, bool add) {}
#endif

static inline void udp_tunnel_cleanup_gro(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
struct net *net = sock_net(sk);

if (!up->tunnel_list.pprev)
return;

udp_tunnel_update_gro_lookup(net, sk, false);
}

#define UDP_TUNNEL_NIC_MAX_TABLES 4

enum udp_tunnel_nic_info_flags {
Expand Down
13 changes: 12 additions & 1 deletion net/ipv4/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2891,8 +2891,10 @@ void udp_destroy_sock(struct sock *sk)
if (encap_destroy)
encap_destroy(sk);
}
if (udp_test_bit(ENCAP_ENABLED, sk))
if (udp_test_bit(ENCAP_ENABLED, sk)) {
static_branch_dec(&udp_encap_needed_key);
udp_tunnel_cleanup_gro(sk);
}
}
}

Expand Down Expand Up @@ -3804,6 +3806,15 @@ static void __net_init udp_set_table(struct net *net)

static int __net_init udp_pernet_init(struct net *net)
{
#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
int i;

/* No tunnel is configured */
for (i = 0; i < ARRAY_SIZE(net->ipv4.udp_tunnel_gro); ++i) {
INIT_HLIST_HEAD(&net->ipv4.udp_tunnel_gro[i].list);
RCU_INIT_POINTER(net->ipv4.udp_tunnel_gro[i].sk, NULL);
}
#endif
udp_sysctl_init(net);
udp_set_table(net);

Expand Down
37 changes: 37 additions & 0 deletions net/ipv4/udp_offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,38 @@
#include <net/udp.h>
#include <net/protocol.h>
#include <net/inet_common.h>
#include <net/udp_tunnel.h>

#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
static DEFINE_SPINLOCK(udp_tunnel_gro_lock);

void udp_tunnel_update_gro_lookup(struct net *net, struct sock *sk, bool add)
{
bool is_ipv6 = sk->sk_family == AF_INET6;
struct udp_sock *tup, *up = udp_sk(sk);
struct udp_tunnel_gro *udp_tunnel_gro;

spin_lock(&udp_tunnel_gro_lock);
udp_tunnel_gro = &net->ipv4.udp_tunnel_gro[is_ipv6];
if (add)
hlist_add_head(&up->tunnel_list, &udp_tunnel_gro->list);
else
hlist_del_init(&up->tunnel_list);

if (udp_tunnel_gro->list.first &&
!udp_tunnel_gro->list.first->next) {
tup = hlist_entry(udp_tunnel_gro->list.first, struct udp_sock,
tunnel_list);

rcu_assign_pointer(udp_tunnel_gro->sk, (struct sock *)tup);
} else {
RCU_INIT_POINTER(udp_tunnel_gro->sk, NULL);
}

spin_unlock(&udp_tunnel_gro_lock);
}
EXPORT_SYMBOL_GPL(udp_tunnel_update_gro_lookup);
#endif

static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
Expand Down Expand Up @@ -635,8 +667,13 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
{
const struct iphdr *iph = skb_gro_network_header(skb);
struct net *net = dev_net_rcu(skb->dev);
struct sock *sk;
int iif, sdif;

sk = udp_tunnel_sk(net, false);
if (sk && dport == htons(sk->sk_num))
return sk;

inet_get_iif_sdif(skb, &iif, &sdif);

return __udp4_lib_lookup(net, iph->saddr, sport,
Expand Down
12 changes: 12 additions & 0 deletions net/ipv4/udp_tunnel_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,15 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
}
EXPORT_SYMBOL(udp_sock_create4);

static bool sk_saddr_any(struct sock *sk)
{
#if IS_ENABLED(CONFIG_IPV6)
return ipv6_addr_any(&sk->sk_v6_rcv_saddr);
#else
return !sk->sk_rcv_saddr;
#endif
}

void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
struct udp_tunnel_sock_cfg *cfg)
{
Expand All @@ -80,6 +89,9 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
udp_sk(sk)->gro_complete = cfg->gro_complete;

udp_tunnel_encap_enable(sk);

if (!sk->sk_dport && !sk->sk_bound_dev_if && sk_saddr_any(sock->sk))
udp_tunnel_update_gro_lookup(net, sock->sk, true);
}
EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);

Expand Down
2 changes: 2 additions & 0 deletions net/ipv6/udp.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include <net/tcp_states.h>
#include <net/ip6_checksum.h>
#include <net/ip6_tunnel.h>
#include <net/udp_tunnel.h>
#include <net/xfrm.h>
#include <net/inet_hashtables.h>
#include <net/inet6_hashtables.h>
Expand Down Expand Up @@ -1825,6 +1826,7 @@ void udpv6_destroy_sock(struct sock *sk)
if (udp_test_bit(ENCAP_ENABLED, sk)) {
static_branch_dec(&udpv6_encap_needed_key);
udp_encap_disable();
udp_tunnel_cleanup_gro(sk);
}
}
}
Expand Down
5 changes: 5 additions & 0 deletions net/ipv6/udp_offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,13 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
{
const struct ipv6hdr *iph = skb_gro_network_header(skb);
struct net *net = dev_net_rcu(skb->dev);
struct sock *sk;
int iif, sdif;

sk = udp_tunnel_sk(net, true);
if (sk && dport == htons(sk->sk_num))
return sk;

inet6_get_iif_sdif(skb, &iif, &sdif);

return __udp6_lib_lookup(net, &iph->saddr, sport,
Expand Down

0 comments on commit 8d4880d

Please sign in to comment.