Skip to content

Commit

Permalink
vxlan: support both IPv4 and IPv6 sockets in a single vxlan device
Browse files Browse the repository at this point in the history
For metadata based vxlan interface, open both IPv4 and IPv6 socket. This is
much more user friendly: it's not necessary to create two vxlan interfaces
and pay attention to using the right one in routing rules.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Jiri Benc authored and David S. Miller committed Sep 27, 2015
1 parent 205f356 commit b1be00a
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 42 deletions.
128 changes: 90 additions & 38 deletions drivers/net/vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -993,19 +993,30 @@ static bool vxlan_snoop(struct net_device *dev,
static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
{
struct vxlan_dev *vxlan;
unsigned short family = dev->default_dst.remote_ip.sa.sa_family;

/* The vxlan_sock is only used by dev, leaving group has
* no effect on other vxlan devices.
*/
if (atomic_read(&dev->vn_sock->refcnt) == 1)
if (family == AF_INET && dev->vn4_sock &&
atomic_read(&dev->vn4_sock->refcnt) == 1)
return false;
#if IS_ENABLED(CONFIG_IPV6)
if (family == AF_INET6 && dev->vn6_sock &&
atomic_read(&dev->vn6_sock->refcnt) == 1)
return false;
#endif

list_for_each_entry(vxlan, &vn->vxlan_list, next) {
if (!netif_running(vxlan->dev) || vxlan == dev)
continue;

if (vxlan->vn_sock != dev->vn_sock)
if (family == AF_INET && vxlan->vn4_sock != dev->vn4_sock)
continue;
#if IS_ENABLED(CONFIG_IPV6)
if (family == AF_INET6 && vxlan->vn6_sock != dev->vn6_sock)
continue;
#endif

if (!vxlan_addr_equal(&vxlan->default_dst.remote_ip,
&dev->default_dst.remote_ip))
Expand All @@ -1021,16 +1032,16 @@ static bool vxlan_group_used(struct vxlan_net *vn, struct vxlan_dev *dev)
return false;
}

static void vxlan_sock_release(struct vxlan_dev *vxlan)
static void __vxlan_sock_release(struct vxlan_sock *vs)
{
struct vxlan_sock *vs = vxlan->vn_sock;
struct sock *sk = vs->sock->sk;
struct net *net = sock_net(sk);
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_net *vn;

if (!vs)
return;
if (!atomic_dec_and_test(&vs->refcnt))
return;

vn = net_generic(sock_net(vs->sock->sk), vxlan_net_id);
spin_lock(&vn->sock_lock);
hlist_del_rcu(&vs->hlist);
vxlan_notify_del_rx_port(vs);
Expand All @@ -1039,60 +1050,74 @@ static void vxlan_sock_release(struct vxlan_dev *vxlan)
queue_work(vxlan_wq, &vs->del_work);
}

static void vxlan_sock_release(struct vxlan_dev *vxlan)
{
__vxlan_sock_release(vxlan->vn4_sock);
#if IS_ENABLED(CONFIG_IPV6)
__vxlan_sock_release(vxlan->vn6_sock);
#endif
}

/* Update multicast group membership when first VNI on
* multicast address is brought up
*/
static int vxlan_igmp_join(struct vxlan_dev *vxlan)
{
struct vxlan_sock *vs = vxlan->vn_sock;
struct sock *sk = vs->sock->sk;
struct sock *sk;
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
int ifindex = vxlan->default_dst.remote_ifindex;
int ret = -EINVAL;

lock_sock(sk);
if (ip->sa.sa_family == AF_INET) {
struct ip_mreqn mreq = {
.imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
.imr_ifindex = ifindex,
};

sk = vxlan->vn4_sock->sock->sk;
lock_sock(sk);
ret = ip_mc_join_group(sk, &mreq);
release_sock(sk);
#if IS_ENABLED(CONFIG_IPV6)
} else {
sk = vxlan->vn6_sock->sock->sk;
lock_sock(sk);
ret = ipv6_stub->ipv6_sock_mc_join(sk, ifindex,
&ip->sin6.sin6_addr);
release_sock(sk);
#endif
}
release_sock(sk);

return ret;
}

/* Inverse of vxlan_igmp_join when last VNI is brought down */
static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
{
struct vxlan_sock *vs = vxlan->vn_sock;
struct sock *sk = vs->sock->sk;
struct sock *sk;
union vxlan_addr *ip = &vxlan->default_dst.remote_ip;
int ifindex = vxlan->default_dst.remote_ifindex;
int ret = -EINVAL;

lock_sock(sk);
if (ip->sa.sa_family == AF_INET) {
struct ip_mreqn mreq = {
.imr_multiaddr.s_addr = ip->sin.sin_addr.s_addr,
.imr_ifindex = ifindex,
};

sk = vxlan->vn4_sock->sock->sk;
lock_sock(sk);
ret = ip_mc_leave_group(sk, &mreq);
release_sock(sk);
#if IS_ENABLED(CONFIG_IPV6)
} else {
sk = vxlan->vn6_sock->sock->sk;
lock_sock(sk);
ret = ipv6_stub->ipv6_sock_mc_drop(sk, ifindex,
&ip->sin6.sin6_addr);
release_sock(sk);
#endif
}
release_sock(sk);

return ret;
}
Expand Down Expand Up @@ -1873,8 +1898,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
{
struct ip_tunnel_info *info;
struct vxlan_dev *vxlan = netdev_priv(dev);
struct sock *sk = vxlan->vn_sock->sock->sk;
unsigned short family = vxlan_get_sk_family(vxlan->vn_sock);
struct sock *sk;
struct rtable *rt = NULL;
const struct iphdr *old_iph;
struct flowi4 fl4;
Expand All @@ -1901,13 +1925,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dev->name);
goto drop;
}
if (family != ip_tunnel_info_af(info))
goto drop;

dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
vni = be64_to_cpu(info->key.tun_id);
remote_ip.sa.sa_family = family;
if (family == AF_INET)
remote_ip.sa.sa_family = ip_tunnel_info_af(info);
if (remote_ip.sa.sa_family == AF_INET)
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
else
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
Expand Down Expand Up @@ -1952,6 +1973,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}

if (dst->sa.sa_family == AF_INET) {
if (!vxlan->vn4_sock)
goto drop;
sk = vxlan->vn4_sock->sock->sk;

if (info && (info->key.tun_flags & TUNNEL_DONT_FRAGMENT))
df = htons(IP_DF);

Expand Down Expand Up @@ -2013,6 +2038,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct flowi6 fl6;
u32 rt6i_flags;

if (!vxlan->vn6_sock)
goto drop;
sk = vxlan->vn6_sock->sock->sk;

memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = rdst ? rdst->remote_ifindex : 0;
fl6.daddr = dst->sin6.sin6_addr;
Expand Down Expand Up @@ -2204,7 +2233,6 @@ static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
__u32 vni = vxlan->default_dst.remote_vni;

vxlan->vn_sock = vs;
spin_lock(&vn->sock_lock);
hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
spin_unlock(&vn->sock_lock);
Expand Down Expand Up @@ -2535,14 +2563,13 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
}

/* Create new listen socket if needed */
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
u32 flags)
static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
__be16 port, u32 flags)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
struct socket *sock;
unsigned int h;
bool ipv6 = !!(flags & VXLAN_F_IPV6);
struct udp_tunnel_sock_cfg tunnel_cfg;

vs = kzalloc(sizeof(*vs), GFP_KERNEL);
Expand Down Expand Up @@ -2587,11 +2614,10 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
return vs;
}

static int vxlan_sock_add(struct vxlan_dev *vxlan)
static int __vxlan_sock_add(struct vxlan_dev *vxlan, bool ipv6)
{
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
struct vxlan_sock *vs = NULL;
bool ipv6 = vxlan->flags & VXLAN_F_IPV6;

if (!vxlan->cfg.no_share) {
spin_lock(&vn->sock_lock);
Expand All @@ -2604,20 +2630,46 @@ static int vxlan_sock_add(struct vxlan_dev *vxlan)
spin_unlock(&vn->sock_lock);
}
if (!vs)
vs = vxlan_socket_create(vxlan->net, vxlan->cfg.dst_port,
vxlan->flags);
vs = vxlan_socket_create(vxlan->net, ipv6,
vxlan->cfg.dst_port, vxlan->flags);
if (IS_ERR(vs))
return PTR_ERR(vs);
#if IS_ENABLED(CONFIG_IPV6)
if (ipv6)
vxlan->vn6_sock = vs;
else
#endif
vxlan->vn4_sock = vs;
vxlan_vs_add_dev(vs, vxlan);
return 0;
}

static int vxlan_sock_add(struct vxlan_dev *vxlan)
{
bool ipv6 = vxlan->flags & VXLAN_F_IPV6;
bool metadata = vxlan->flags & VXLAN_F_COLLECT_METADATA;
int ret = 0;

vxlan->vn4_sock = NULL;
#if IS_ENABLED(CONFIG_IPV6)
vxlan->vn6_sock = NULL;
if (ipv6 || metadata)
ret = __vxlan_sock_add(vxlan, true);
#endif
if (!ret && (!ipv6 || metadata))
ret = __vxlan_sock_add(vxlan, false);
if (ret < 0)
vxlan_sock_release(vxlan);
return ret;
}

static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
struct vxlan_config *conf)
{
struct vxlan_net *vn = net_generic(src_net, vxlan_net_id);
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_rdst *dst = &vxlan->default_dst;
unsigned short needed_headroom = ETH_HLEN;
int err;
bool use_ipv6 = false;
__be16 default_port = vxlan->cfg.dst_port;
Expand All @@ -2637,6 +2689,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
if (!IS_ENABLED(CONFIG_IPV6))
return -EPFNOSUPPORT;
use_ipv6 = true;
vxlan->flags |= VXLAN_F_IPV6;
}

if (conf->remote_ifindex) {
Expand All @@ -2657,22 +2710,21 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
pr_info("IPv6 is disabled via sysctl\n");
return -EPERM;
}
vxlan->flags |= VXLAN_F_IPV6;
}
#endif

if (!conf->mtu)
dev->mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);

dev->needed_headroom = lowerdev->hard_header_len +
(use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
} else if (use_ipv6) {
vxlan->flags |= VXLAN_F_IPV6;
dev->needed_headroom = ETH_HLEN + VXLAN6_HEADROOM;
} else {
dev->needed_headroom = ETH_HLEN + VXLAN_HEADROOM;
needed_headroom = lowerdev->hard_header_len;
}

if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
needed_headroom += VXLAN6_HEADROOM;
else
needed_headroom += VXLAN_HEADROOM;
dev->needed_headroom = needed_headroom;

memcpy(&vxlan->cfg, conf, sizeof(*conf));
if (!vxlan->cfg.dst_port)
vxlan->cfg.dst_port = default_port;
Expand Down
14 changes: 11 additions & 3 deletions include/net/vxlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,10 @@ struct vxlan_config {
struct vxlan_dev {
struct hlist_node hlist; /* vni hash table */
struct list_head next; /* vxlan's per namespace list */
struct vxlan_sock *vn_sock; /* listening socket */
struct vxlan_sock *vn4_sock; /* listening socket for IPv4 */
#if IS_ENABLED(CONFIG_IPV6)
struct vxlan_sock *vn6_sock; /* listening socket for IPv6 */
#endif
struct net_device *dev;
struct net *net; /* netns for packet i/o */
struct vxlan_rdst default_dst; /* default destination */
Expand Down Expand Up @@ -195,9 +198,14 @@ struct vxlan_dev {
struct net_device *vxlan_dev_create(struct net *net, const char *name,
u8 name_assign_type, struct vxlan_config *conf);

static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan)
static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan,
unsigned short family)
{
return inet_sk(vxlan->vn_sock->sock->sk)->inet_sport;
#if IS_ENABLED(CONFIG_IPV6)
if (family == AF_INET6)
return inet_sk(vxlan->vn6_sock->sock->sk)->inet_sport;
#endif
return inet_sk(vxlan->vn4_sock->sock->sk)->inet_sport;
}

static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
Expand Down
3 changes: 2 additions & 1 deletion net/openvswitch/vport-vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,8 @@ static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
{
struct vxlan_dev *vxlan = netdev_priv(vport->dev);
struct net *net = ovs_dp_get_net(vport->dp);
__be16 dst_port = vxlan_dev_dst_port(vxlan);
unsigned short family = ip_tunnel_info_af(upcall->egress_tun_info);
__be16 dst_port = vxlan_dev_dst_port(vxlan, family);
__be16 src_port;
int port_min;
int port_max;
Expand Down

0 comments on commit b1be00a

Please sign in to comment.