Skip to content

Commit

Permalink
Merge branch 'vxlan-gpe'
Browse files Browse the repository at this point in the history
Jiri Benc says:

====================
vxlan: implement Generic Protocol Extension (GPE)

v3: just rebased on top of the current net-next, no changes

This patchset implements VXLAN-GPE. It follows the same model as the tun/tap
driver: depending on the chosen mode, the vxlan interface is created either
as ARPHRD_ETHER (non-GPE) or ARPHRD_NONE (GPE).

Note that the internal fdb control plane cannot be used together with
VXLAN-GPE and attempt to configure it will be rejected by the driver. In
fact, COLLECT_METADATA is required to be set for now. This can be relaxed in
the future by adding support for static PtP configuration; it will be
backward compatible and won't affect existing users.

The previous version of the patchset supported two GPE modes, L2 and L3. The
L2 mode (now called "ether mode" in the code) was removed from this version.
It can be easily added later if there's demand. The L3 mode is now called
"raw mode" and supports also encapsulated Ethernet headers (via ETH_P_TEB).

The only limitation of not having "ether mode" for GPE is for ip route based
encapsulation: with such setup, only IP packets can be encapsulated. Meaning
no Ethernet encapsulation. It seems there's not much use for this, though.
If it turns out to be useful, we'll add it.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Apr 6, 2016
2 parents 8a21ec4 + e1e5314 commit 6f55563
Show file tree
Hide file tree
Showing 5 changed files with 258 additions and 40 deletions.
210 changes: 176 additions & 34 deletions drivers/net/vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -1192,6 +1192,45 @@ static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
}

static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
__be32 *protocol,
struct sk_buff *skb, u32 vxflags)
{
struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;

/* Need to have Next Protocol set for interfaces in GPE mode. */
if (!gpe->np_applied)
return false;
/* "The initial version is 0. If a receiver does not support the
* version indicated it MUST drop the packet.
*/
if (gpe->version != 0)
return false;
/* "When the O bit is set to 1, the packet is an OAM packet and OAM
* processing MUST occur." However, we don't implement OAM
* processing, thus drop the packet.
*/
if (gpe->oam_flag)
return false;

switch (gpe->next_protocol) {
case VXLAN_GPE_NP_IPV4:
*protocol = htons(ETH_P_IP);
break;
case VXLAN_GPE_NP_IPV6:
*protocol = htons(ETH_P_IPV6);
break;
case VXLAN_GPE_NP_ETHERNET:
*protocol = htons(ETH_P_TEB);
break;
default:
return false;
}

unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
return true;
}

static bool vxlan_set_mac(struct vxlan_dev *vxlan,
struct vxlan_sock *vs,
struct sk_buff *skb)
Expand Down Expand Up @@ -1257,9 +1296,11 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
struct vxlanhdr unparsed;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
__be32 protocol = htons(ETH_P_TEB);
bool raw_proto = false;
void *oiph;

/* Need Vxlan and inner Ethernet header to be present */
/* Need UDP and VXLAN header to be present */
if (!pskb_may_pull(skb, VXLAN_HLEN))
return 1;

Expand All @@ -1283,9 +1324,18 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
if (!vxlan)
goto drop;

if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB),
!net_eq(vxlan->net, dev_net(vxlan->dev))))
goto drop;
/* For backwards compatibility, only allow reserved fields to be
* used by VXLAN extensions if explicitly requested.
*/
if (vs->flags & VXLAN_F_GPE) {
if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
goto drop;
raw_proto = true;
}

if (__iptunnel_pull_header(skb, VXLAN_HLEN, protocol, raw_proto,
!net_eq(vxlan->net, dev_net(vxlan->dev))))
goto drop;

if (vxlan_collect_metadata(vs)) {
__be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
Expand All @@ -1304,14 +1354,14 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
memset(md, 0, sizeof(*md));
}

/* For backwards compatibility, only allow reserved fields to be
* used by VXLAN extensions if explicitly requested.
*/
if (vs->flags & VXLAN_F_REMCSUM_RX)
if (!vxlan_remcsum(&unparsed, skb, vs->flags))
goto drop;
if (vs->flags & VXLAN_F_GBP)
vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md);
/* Note that GBP and GPE can never be active together. This is
* ensured in vxlan_dev_configure.
*/

if (unparsed.vx_flags || unparsed.vx_vni) {
/* If there are any unprocessed flags remaining treat
Expand All @@ -1325,8 +1375,13 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
goto drop;
}

if (!vxlan_set_mac(vxlan, vs, skb))
goto drop;
if (!raw_proto) {
if (!vxlan_set_mac(vxlan, vs, skb))
goto drop;
} else {
skb->dev = vxlan->dev;
skb->pkt_type = PACKET_HOST;
}

oiph = skb_network_header(skb);
skb_reset_network_header(skb);
Expand Down Expand Up @@ -1685,6 +1740,27 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
}

static int vxlan_build_gpe_hdr(struct vxlanhdr *vxh, u32 vxflags,
__be16 protocol)
{
struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)vxh;

gpe->np_applied = 1;

switch (protocol) {
case htons(ETH_P_IP):
gpe->next_protocol = VXLAN_GPE_NP_IPV4;
return 0;
case htons(ETH_P_IPV6):
gpe->next_protocol = VXLAN_GPE_NP_IPV6;
return 0;
case htons(ETH_P_TEB):
gpe->next_protocol = VXLAN_GPE_NP_ETHERNET;
return 0;
}
return -EPFNOSUPPORT;
}

static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
int iphdr_len, __be32 vni,
struct vxlan_metadata *md, u32 vxflags,
Expand All @@ -1694,6 +1770,7 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
int min_headroom;
int err;
int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
__be16 inner_protocol = htons(ETH_P_TEB);

if ((vxflags & VXLAN_F_REMCSUM_TX) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
Expand All @@ -1712,10 +1789,8 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,

/* Need space for new headers (invalidates iph ptr) */
err = skb_cow_head(skb, min_headroom);
if (unlikely(err)) {
kfree_skb(skb);
return err;
}
if (unlikely(err))
goto out_free;

skb = vlan_hwaccel_push_inside(skb);
if (WARN_ON(!skb))
Expand Down Expand Up @@ -1744,9 +1819,19 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,

if (vxflags & VXLAN_F_GBP)
vxlan_build_gbp_hdr(vxh, vxflags, md);
if (vxflags & VXLAN_F_GPE) {
err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol);
if (err < 0)
goto out_free;
inner_protocol = skb->protocol;
}

skb_set_inner_protocol(skb, htons(ETH_P_TEB));
skb_set_inner_protocol(skb, inner_protocol);
return 0;

out_free:
kfree_skb(skb);
return err;
}

static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
Expand Down Expand Up @@ -2106,9 +2191,17 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
info = skb_tunnel_info(skb);

skb_reset_mac_header(skb);
eth = eth_hdr(skb);

if ((vxlan->flags & VXLAN_F_PROXY)) {
if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
if (info && info->mode & IP_TUNNEL_INFO_TX)
vxlan_xmit_one(skb, dev, NULL, false);
else
kfree_skb(skb);
return NETDEV_TX_OK;
}

if (vxlan->flags & VXLAN_F_PROXY) {
eth = eth_hdr(skb);
if (ntohs(eth->h_proto) == ETH_P_ARP)
return arp_reduce(dev, skb);
#if IS_ENABLED(CONFIG_IPV6)
Expand All @@ -2123,18 +2216,10 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION)
return neigh_reduce(dev, skb);
}
eth = eth_hdr(skb);
#endif
}

if (vxlan->flags & VXLAN_F_COLLECT_METADATA) {
if (info && info->mode & IP_TUNNEL_INFO_TX)
vxlan_xmit_one(skb, dev, NULL, false);
else
kfree_skb(skb);
return NETDEV_TX_OK;
}

eth = eth_hdr(skb);
f = vxlan_find_mac(vxlan, eth->h_dest);
did_rsc = false;

Expand Down Expand Up @@ -2404,7 +2489,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
return 0;
}

static const struct net_device_ops vxlan_netdev_ops = {
static const struct net_device_ops vxlan_netdev_ether_ops = {
.ndo_init = vxlan_init,
.ndo_uninit = vxlan_uninit,
.ndo_open = vxlan_open,
Expand All @@ -2421,6 +2506,17 @@ static const struct net_device_ops vxlan_netdev_ops = {
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
};

static const struct net_device_ops vxlan_netdev_raw_ops = {
.ndo_init = vxlan_init,
.ndo_uninit = vxlan_uninit,
.ndo_open = vxlan_open,
.ndo_stop = vxlan_stop,
.ndo_start_xmit = vxlan_xmit,
.ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_change_mtu = vxlan_change_mtu,
.ndo_fill_metadata_dst = vxlan_fill_metadata_dst,
};

/* Info for udev, that this is a virtual tunnel endpoint */
static struct device_type vxlan_type = {
.name = "vxlan",
Expand Down Expand Up @@ -2458,10 +2554,6 @@ static void vxlan_setup(struct net_device *dev)
struct vxlan_dev *vxlan = netdev_priv(dev);
unsigned int h;

eth_hw_addr_random(dev);
ether_setup(dev);

dev->netdev_ops = &vxlan_netdev_ops;
dev->destructor = free_netdev;
SET_NETDEV_DEVTYPE(dev, &vxlan_type);

Expand All @@ -2476,8 +2568,7 @@ static void vxlan_setup(struct net_device *dev)
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
netif_keep_dst(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
dev->priv_flags |= IFF_NO_QUEUE;

INIT_LIST_HEAD(&vxlan->next);
spin_lock_init(&vxlan->hash_lock);
Expand All @@ -2496,6 +2587,26 @@ static void vxlan_setup(struct net_device *dev)
INIT_HLIST_HEAD(&vxlan->fdb_head[h]);
}

static void vxlan_ether_setup(struct net_device *dev)
{
eth_hw_addr_random(dev);
ether_setup(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
dev->netdev_ops = &vxlan_netdev_ether_ops;
}

static void vxlan_raw_setup(struct net_device *dev)
{
dev->type = ARPHRD_NONE;
dev->hard_header_len = 0;
dev->addr_len = 0;
dev->mtu = ETH_DATA_LEN;
dev->tx_queue_len = 1000;
dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
dev->netdev_ops = &vxlan_netdev_raw_ops;
}

static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_ID] = { .type = NLA_U32 },
[IFLA_VXLAN_GROUP] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
Expand All @@ -2522,6 +2633,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
[IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
[IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
[IFLA_VXLAN_GPE] = { .type = NLA_FLAG, },
[IFLA_VXLAN_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG },
};

Expand Down Expand Up @@ -2722,6 +2834,21 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
__be16 default_port = vxlan->cfg.dst_port;
struct net_device *lowerdev = NULL;

if (conf->flags & VXLAN_F_GPE) {
if (conf->flags & ~VXLAN_F_ALLOWED_GPE)
return -EINVAL;
/* For now, allow GPE only together with COLLECT_METADATA.
* This can be relaxed later; in such case, the other side
* of the PtP link will have to be provided.
*/
if (!(conf->flags & VXLAN_F_COLLECT_METADATA))
return -EINVAL;

vxlan_raw_setup(dev);
} else {
vxlan_ether_setup(dev);
}

vxlan->net = src_net;

dst->remote_vni = conf->vni;
Expand Down Expand Up @@ -2783,8 +2910,12 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
dev->needed_headroom = needed_headroom;

memcpy(&vxlan->cfg, conf, sizeof(*conf));
if (!vxlan->cfg.dst_port)
vxlan->cfg.dst_port = default_port;
if (!vxlan->cfg.dst_port) {
if (conf->flags & VXLAN_F_GPE)
vxlan->cfg.dst_port = 4790; /* IANA assigned VXLAN-GPE port */
else
vxlan->cfg.dst_port = default_port;
}
vxlan->flags |= conf->flags;

if (!vxlan->cfg.age_interval)
Expand Down Expand Up @@ -2955,6 +3086,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
if (data[IFLA_VXLAN_GBP])
conf.flags |= VXLAN_F_GBP;

if (data[IFLA_VXLAN_GPE])
conf.flags |= VXLAN_F_GPE;

if (data[IFLA_VXLAN_REMCSUM_NOPARTIAL])
conf.flags |= VXLAN_F_REMCSUM_NOPARTIAL;

Expand All @@ -2971,6 +3105,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
case -EEXIST:
pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
break;

case -EINVAL:
pr_info("unsupported combination of extensions\n");
break;
}

return err;
Expand Down Expand Up @@ -3098,6 +3236,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_flag(skb, IFLA_VXLAN_GBP))
goto nla_put_failure;

if (vxlan->flags & VXLAN_F_GPE &&
nla_put_flag(skb, IFLA_VXLAN_GPE))
goto nla_put_failure;

if (vxlan->flags & VXLAN_F_REMCSUM_NOPARTIAL &&
nla_put_flag(skb, IFLA_VXLAN_REMCSUM_NOPARTIAL))
goto nla_put_failure;
Expand Down
11 changes: 9 additions & 2 deletions include/net/ip_tunnels.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,8 +295,15 @@ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph,
return INET_ECN_encapsulate(tos, inner);
}

int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto,
bool xnet);
int __iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
__be16 inner_proto, bool raw_proto, bool xnet);

static inline int iptunnel_pull_header(struct sk_buff *skb, int hdr_len,
__be16 inner_proto, bool xnet)
{
return __iptunnel_pull_header(skb, hdr_len, inner_proto, false, xnet);
}

void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, u8 proto,
u8 tos, u8 ttl, __be16 df, bool xnet);
Expand Down
Loading

0 comments on commit 6f55563

Please sign in to comment.