Skip to content

Commit

Permalink
Merge branch 'ICMP-error-handling-for-UDP-tunnels'
Browse files Browse the repository at this point in the history
Stefano Brivio says:

====================
ICMP error handling for UDP tunnels

This series introduces ICMP error handling for UDP tunnels and
encapsulations and related selftests. We need to handle ICMP errors to
support PMTU discovery and route redirection -- this support is entirely
missing right now:

- patch 1/11 adds a socket lookup for UDP tunnels that use, by design,
  the same destination port on both endpoints -- i.e. VXLAN and GENEVE
- patches 2/11 to 7/11 are specific to VxLAN and GENEVE
- patches 8/11 and 9/11 add infrastructure for lookup of encapsulations
  where sent packets cannot be matched via receiving socket lookup, i.e.
  FoU and GUE
- patches 10/11 and 11/11 are specific to FoU and GUE

v2: changes are listed in the single patches
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Nov 9, 2018
2 parents 141b95d + 56fd865 commit 20da4ef
Show file tree
Hide file tree
Showing 40 changed files with 1,083 additions and 166 deletions.
107 changes: 99 additions & 8 deletions drivers/net/geneve.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ struct geneve_dev {
bool collect_md;
bool use_udp6_rx_checksums;
bool ttl_inherit;
enum ifla_geneve_df df;
};

struct geneve_sock {
Expand Down Expand Up @@ -387,6 +388,57 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
return 0;
}

/* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
{
struct genevehdr *geneveh;
struct geneve_sock *gs;
u8 zero_vni[3] = { 0 };
u8 *vni = zero_vni;

if (skb->len < GENEVE_BASE_HLEN)
return -EINVAL;

geneveh = geneve_hdr(skb);
if (geneveh->ver != GENEVE_VER)
return -EINVAL;

if (geneveh->proto_type != htons(ETH_P_TEB))
return -EINVAL;

gs = rcu_dereference_sk_user_data(sk);
if (!gs)
return -ENOENT;

if (geneve_get_sk_family(gs) == AF_INET) {
struct iphdr *iph = ip_hdr(skb);
__be32 addr4 = 0;

if (!gs->collect_md) {
vni = geneve_hdr(skb)->vni;
addr4 = iph->daddr;
}

return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT;
}

#if IS_ENABLED(CONFIG_IPV6)
if (geneve_get_sk_family(gs) == AF_INET6) {
struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct in6_addr addr6 = { 0 };

if (!gs->collect_md) {
vni = geneve_hdr(skb)->vni;
addr6 = ip6h->daddr;
}

return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT;
}
#endif

return -EPFNOSUPPORT;
}

static struct socket *geneve_create_sock(struct net *net, bool ipv6,
__be16 port, bool ipv6_rx_csum)
{
Expand Down Expand Up @@ -544,6 +596,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
tunnel_cfg.gro_receive = geneve_gro_receive;
tunnel_cfg.gro_complete = geneve_gro_complete;
tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup;
tunnel_cfg.encap_destroy = NULL;
setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
list_add(&gs->list, &gn->sock_list);
Expand Down Expand Up @@ -823,8 +876,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct rtable *rt;
struct flowi4 fl4;
__u8 tos, ttl;
__be16 df = 0;
__be16 sport;
__be16 df;
int err;

rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
Expand All @@ -838,15 +891,31 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (geneve->collect_md) {
tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
ttl = key->ttl;

df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
} else {
tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
if (geneve->ttl_inherit)
ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
else
ttl = key->ttl;
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);

if (geneve->df == GENEVE_DF_SET) {
df = htons(IP_DF);
} else if (geneve->df == GENEVE_DF_INHERIT) {
struct ethhdr *eth = eth_hdr(skb);

if (ntohs(eth->h_proto) == ETH_P_IPV6) {
df = htons(IP_DF);
} else if (ntohs(eth->h_proto) == ETH_P_IP) {
struct iphdr *iph = ip_hdr(skb);

if (iph->frag_off & htons(IP_DF))
df = htons(IP_DF);
}
}
}
df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;

err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr));
if (unlikely(err))
Expand Down Expand Up @@ -1093,6 +1162,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
[IFLA_GENEVE_UDP_ZERO_CSUM6_TX] = { .type = NLA_U8 },
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
[IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 },
[IFLA_GENEVE_DF] = { .type = NLA_U8 },
};

static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
Expand Down Expand Up @@ -1128,6 +1198,16 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
}
}

if (data[IFLA_GENEVE_DF]) {
enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]);

if (df < 0 || df > GENEVE_DF_MAX) {
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_GENEVE_DF],
"Invalid DF attribute");
return -EINVAL;
}
}

return 0;
}

Expand Down Expand Up @@ -1173,7 +1253,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
struct netlink_ext_ack *extack,
const struct ip_tunnel_info *info,
bool metadata, bool ipv6_rx_csum,
bool ttl_inherit)
bool ttl_inherit, enum ifla_geneve_df df)
{
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_dev *t, *geneve = netdev_priv(dev);
Expand Down Expand Up @@ -1223,6 +1303,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
geneve->collect_md = metadata;
geneve->use_udp6_rx_checksums = ipv6_rx_csum;
geneve->ttl_inherit = ttl_inherit;
geneve->df = df;

err = register_netdevice(dev);
if (err)
Expand All @@ -1242,7 +1323,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack,
struct ip_tunnel_info *info, bool *metadata,
bool *use_udp6_rx_checksums, bool *ttl_inherit,
bool changelink)
enum ifla_geneve_df *df, bool changelink)
{
int attrtype;

Expand Down Expand Up @@ -1330,6 +1411,9 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
if (data[IFLA_GENEVE_TOS])
info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);

if (data[IFLA_GENEVE_DF])
*df = nla_get_u8(data[IFLA_GENEVE_DF]);

if (data[IFLA_GENEVE_LABEL]) {
info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
IPV6_FLOWLABEL_MASK;
Expand Down Expand Up @@ -1448,6 +1532,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
enum ifla_geneve_df df = GENEVE_DF_UNSET;
bool use_udp6_rx_checksums = false;
struct ip_tunnel_info info;
bool ttl_inherit = false;
Expand All @@ -1456,12 +1541,12 @@ static int geneve_newlink(struct net *net, struct net_device *dev,

init_tnl_info(&info, GENEVE_UDP_PORT);
err = geneve_nl2info(tb, data, extack, &info, &metadata,
&use_udp6_rx_checksums, &ttl_inherit, false);
&use_udp6_rx_checksums, &ttl_inherit, &df, false);
if (err)
return err;

err = geneve_configure(net, dev, extack, &info, metadata,
use_udp6_rx_checksums, ttl_inherit);
use_udp6_rx_checksums, ttl_inherit, df);
if (err)
return err;

Expand Down Expand Up @@ -1524,6 +1609,7 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
struct ip_tunnel_info info;
bool metadata;
bool use_udp6_rx_checksums;
enum ifla_geneve_df df;
bool ttl_inherit;
int err;

Expand All @@ -1539,7 +1625,7 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
use_udp6_rx_checksums = geneve->use_udp6_rx_checksums;
ttl_inherit = geneve->ttl_inherit;
err = geneve_nl2info(tb, data, extack, &info, &metadata,
&use_udp6_rx_checksums, &ttl_inherit, true);
&use_udp6_rx_checksums, &ttl_inherit, &df, true);
if (err)
return err;

Expand Down Expand Up @@ -1572,6 +1658,7 @@ static size_t geneve_get_size(const struct net_device *dev)
nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TOS */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_DF */
nla_total_size(sizeof(__be32)) + /* IFLA_GENEVE_LABEL */
nla_total_size(sizeof(__be16)) + /* IFLA_GENEVE_PORT */
nla_total_size(0) + /* IFLA_GENEVE_COLLECT_METADATA */
Expand Down Expand Up @@ -1620,6 +1707,9 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
goto nla_put_failure;

if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->df))
goto nla_put_failure;

if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
goto nla_put_failure;

Expand Down Expand Up @@ -1671,7 +1761,8 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
return dev;

init_tnl_info(&info, dst_port);
err = geneve_configure(net, dev, NULL, &info, true, true, false);
err = geneve_configure(net, dev, NULL, &info,
true, true, false, GENEVE_DF_UNSET);
if (err) {
free_netdev(dev);
return ERR_PTR(err);
Expand Down
58 changes: 57 additions & 1 deletion drivers/net/vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1552,6 +1552,34 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
}

/* Callback from net/ipv{4,6}/udp.c to check that we have a VNI for errors */
static int vxlan_err_lookup(struct sock *sk, struct sk_buff *skb)
{
struct vxlan_dev *vxlan;
struct vxlan_sock *vs;
struct vxlanhdr *hdr;
__be32 vni;

if (skb->len < VXLAN_HLEN)
return -EINVAL;

hdr = vxlan_hdr(skb);

if (!(hdr->vx_flags & VXLAN_HF_VNI))
return -EINVAL;

vs = rcu_dereference_sk_user_data(sk);
if (!vs)
return -ENOENT;

vni = vxlan_vni(hdr->vx_vni);
vxlan = vxlan_vs_find_vni(vs, skb->dev->ifindex, vni);
if (!vxlan)
return -ENOENT;

return 0;
}

static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
Expand Down Expand Up @@ -2250,13 +2278,24 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}

/* Bypass encapsulation if the destination is local */
if (!info) {
/* Bypass encapsulation if the destination is local */
err = encap_bypass_if_local(skb, dev, vxlan, dst,
dst_port, ifindex, vni,
&rt->dst, rt->rt_flags);
if (err)
goto out_unlock;

if (vxlan->cfg.df == VXLAN_DF_SET) {
df = htons(IP_DF);
} else if (vxlan->cfg.df == VXLAN_DF_INHERIT) {
struct ethhdr *eth = eth_hdr(skb);

if (ntohs(eth->h_proto) == ETH_P_IPV6 ||
(ntohs(eth->h_proto) == ETH_P_IP &&
old_iph->frag_off & htons(IP_DF)))
df = htons(IP_DF);
}
} else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) {
df = htons(IP_DF);
}
Expand Down Expand Up @@ -2809,6 +2848,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
[IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
[IFLA_VXLAN_TTL_INHERIT] = { .type = NLA_FLAG },
[IFLA_VXLAN_DF] = { .type = NLA_U8 },
};

static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
Expand Down Expand Up @@ -2865,6 +2905,16 @@ static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
}
}

if (data[IFLA_VXLAN_DF]) {
enum ifla_vxlan_df df = nla_get_u8(data[IFLA_VXLAN_DF]);

if (df < 0 || df > VXLAN_DF_MAX) {
NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_VXLAN_DF],
"Invalid DF attribute");
return -EINVAL;
}
}

return 0;
}

Expand Down Expand Up @@ -2948,6 +2998,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
tunnel_cfg.sk_user_data = vs;
tunnel_cfg.encap_type = 1;
tunnel_cfg.encap_rcv = vxlan_rcv;
tunnel_cfg.encap_err_lookup = vxlan_err_lookup;
tunnel_cfg.encap_destroy = NULL;
tunnel_cfg.gro_receive = vxlan_gro_receive;
tunnel_cfg.gro_complete = vxlan_gro_complete;
Expand Down Expand Up @@ -3509,6 +3560,9 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
conf->mtu = nla_get_u32(tb[IFLA_MTU]);
}

if (data[IFLA_VXLAN_DF])
conf->df = nla_get_u8(data[IFLA_VXLAN_DF]);

return 0;
}

Expand Down Expand Up @@ -3601,6 +3655,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL_INHERIT */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_DF */
nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
Expand Down Expand Up @@ -3667,6 +3722,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_VXLAN_TTL_INHERIT,
!!(vxlan->cfg.flags & VXLAN_F_TTL_INHERIT)) ||
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
nla_put_u8(skb, IFLA_VXLAN_DF, vxlan->cfg.df) ||
nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
nla_put_u8(skb, IFLA_VXLAN_LEARNING,
!!(vxlan->cfg.flags & VXLAN_F_LEARN)) ||
Expand Down
1 change: 1 addition & 0 deletions include/linux/udp.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ struct udp_sock {
* For encapsulation sockets.
*/
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb);
void (*encap_destroy)(struct sock *sk);

/* GRO functions for UDP socket */
Expand Down
2 changes: 1 addition & 1 deletion include/net/icmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ struct net;

void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
int icmp_rcv(struct sk_buff *skb);
void icmp_err(struct sk_buff *skb, u32 info);
int icmp_err(struct sk_buff *skb, u32 info);
int icmp_init(void);
void icmp_out_count(struct net *net, unsigned char type);

Expand Down
2 changes: 2 additions & 0 deletions include/net/ip6_tunnel.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ struct ip6_tnl_encap_ops {
size_t (*encap_hlen)(struct ip_tunnel_encap *e);
int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
u8 *protocol, struct flowi6 *fl6);
int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
u8 type, u8 code, int offset, __be32 info);
};

#ifdef CONFIG_INET
Expand Down
1 change: 1 addition & 0 deletions include/net/ip_tunnels.h
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,7 @@ struct ip_tunnel_encap_ops {
size_t (*encap_hlen)(struct ip_tunnel_encap *e);
int (*build_header)(struct sk_buff *skb, struct ip_tunnel_encap *e,
u8 *protocol, struct flowi4 *fl4);
int (*err_handler)(struct sk_buff *skb, u32 info);
};

#define MAX_IPTUN_ENCAP_OPS 8
Expand Down
Loading

0 comments on commit 20da4ef

Please sign in to comment.