Skip to content

Commit

Permalink
Merge branch 'vxlan_group_policy_extension'
Browse files Browse the repository at this point in the history
Thomas Graf says:

====================
VXLAN Group Policy Extension

Implements supports for the Group Policy VXLAN extension [0] to provide
a lightweight and simple security label mechanism across network peers
based on VXLAN. The security context and associated metadata is mapped
to/from skb->mark. This allows further mapping to a SELinux context
using SECMARK, to implement ACLs directly with nftables, iptables, OVS,
tc, etc.

The extension is disabled by default and should be run on a distinct
port in mixed Linux VXLAN VTEP environments. Liberal VXLAN VTEPs
which ignore unknown reserved bits will be able to receive VXLAN-GBP
frames.

Simple usage example:

10.1.1.1:
   # ip link add vxlan0 type vxlan id 10 remote 10.1.1.2 gbp
   # iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200

10.1.1.2:
   # ip link add vxlan0 type vxlan id 10 remote 10.1.1.1 gbp
   # iptables -I INPUT -m mark --mark 0x200 -j DROP

iproute2 [1] and OVS [2] support will be provided in separate patches.

[0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
[1] https://github.com/tgraf/iproute2/tree/vxlan-gbp
[2] https://github.com/tgraf/ovs/tree/vxlan-gbp
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jan 15, 2015
2 parents 3f3558b + 1dd144c commit 2e62fa6
Show file tree
Hide file tree
Showing 11 changed files with 491 additions and 140 deletions.
113 changes: 89 additions & 24 deletions drivers/net/vxlan.c
Original file line number Diff line number Diff line change
Expand Up @@ -263,15 +263,19 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
}

/* Find VXLAN socket based on network namespace, address family and UDP port */
static struct vxlan_sock *vxlan_find_sock(struct net *net,
sa_family_t family, __be16 port)
/* Find VXLAN socket based on network namespace, address family and UDP port
* and enabled unshareable flags.
*/
static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
__be16 port, u32 flags)
{
struct vxlan_sock *vs;
u32 match_flags = flags & VXLAN_F_UNSHAREABLE;

hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
if (inet_sk(vs->sock->sk)->inet_sport == port &&
inet_sk(vs->sock->sk)->sk.sk_family == family)
inet_sk(vs->sock->sk)->sk.sk_family == family &&
(vs->flags & VXLAN_F_UNSHAREABLE) == match_flags)
return vs;
}
return NULL;
Expand All @@ -291,11 +295,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)

/* Look up VNI in a per net namespace table */
static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
sa_family_t family, __be16 port)
sa_family_t family, __be16 port,
u32 flags)
{
struct vxlan_sock *vs;

vs = vxlan_find_sock(net, family, port);
vs = vxlan_find_sock(net, family, port, flags);
if (!vs)
return NULL;

Expand Down Expand Up @@ -620,7 +625,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
continue;

vh2 = (struct vxlanhdr *)(p->data + off_vx);
if (vh->vx_vni != vh2->vx_vni) {
if (vh->vx_flags != vh2->vx_flags ||
vh->vx_vni != vh2->vx_vni) {
NAPI_GRO_CB(p)->same_flow = 0;
continue;
}
Expand Down Expand Up @@ -1183,6 +1189,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
struct vxlan_sock *vs;
struct vxlanhdr *vxh;
u32 flags, vni;
struct vxlan_metadata md = {0};

/* Need Vxlan and inner Ethernet header to be present */
if (!pskb_may_pull(skb, VXLAN_HLEN))
Expand Down Expand Up @@ -1216,6 +1223,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
vni &= VXLAN_VID_MASK;
}

/* For backwards compatibility, only allow reserved fields to be
* used by VXLAN extensions if explicitly requested.
*/
if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
struct vxlanhdr_gbp *gbp;

gbp = (struct vxlanhdr_gbp *)vxh;
md.gbp = ntohs(gbp->policy_id);

if (gbp->dont_learn)
md.gbp |= VXLAN_GBP_DONT_LEARN;

if (gbp->policy_applied)
md.gbp |= VXLAN_GBP_POLICY_APPLIED;

flags &= ~VXLAN_GBP_USED_BITS;
}

if (flags || (vni & ~VXLAN_VID_MASK)) {
/* If there are any unprocessed flags remaining treat
* this as a malformed packet. This behavior diverges from
Expand All @@ -1229,7 +1254,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
goto bad_flags;
}

vs->rcv(vs, skb, vxh->vx_vni);
md.vni = vxh->vx_vni;
vs->rcv(vs, skb, &md);
return 0;

drop:
Expand All @@ -1246,8 +1272,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
return 1;
}

static void vxlan_rcv(struct vxlan_sock *vs,
struct sk_buff *skb, __be32 vx_vni)
static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
struct vxlan_metadata *md)
{
struct iphdr *oip = NULL;
struct ipv6hdr *oip6 = NULL;
Expand All @@ -1258,7 +1284,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
int err = 0;
union vxlan_addr *remote_ip;

vni = ntohl(vx_vni) >> 8;
vni = ntohl(md->vni) >> 8;
/* Is this VNI defined? */
vxlan = vxlan_vs_find_vni(vs, vni);
if (!vxlan)
Expand Down Expand Up @@ -1292,6 +1318,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
goto drop;

skb_reset_network_header(skb);
skb->mark = md->gbp;

if (oip6)
err = IP6_ECN_decapsulate(oip6, skb);
Expand Down Expand Up @@ -1641,13 +1668,30 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
return false;
}

static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
struct vxlan_metadata *md)
{
struct vxlanhdr_gbp *gbp;

gbp = (struct vxlanhdr_gbp *)vxh;
vxh->vx_flags |= htonl(VXLAN_HF_GBP);

if (md->gbp & VXLAN_GBP_DONT_LEARN)
gbp->dont_learn = 1;

if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
gbp->policy_applied = 1;

gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
}

#if IS_ENABLED(CONFIG_IPV6)
static int vxlan6_xmit_skb(struct vxlan_sock *vs,
struct dst_entry *dst, struct sk_buff *skb,
struct net_device *dev, struct in6_addr *saddr,
struct in6_addr *daddr, __u8 prio, __u8 ttl,
__be16 src_port, __be16 dst_port, __be32 vni,
bool xnet)
__be16 src_port, __be16 dst_port,
struct vxlan_metadata *md, bool xnet)
{
struct vxlanhdr *vxh;
int min_headroom;
Expand Down Expand Up @@ -1696,7 +1740,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,

vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI);
vxh->vx_vni = vni;
vxh->vx_vni = md->vni;

if (type & SKB_GSO_TUNNEL_REMCSUM) {
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
Expand All @@ -1714,6 +1758,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
}
}

if (vs->flags & VXLAN_F_GBP)
vxlan_build_gbp_hdr(vxh, vs, md);

skb_set_inner_protocol(skb, htons(ETH_P_TEB));

udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
Expand All @@ -1728,7 +1775,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
int vxlan_xmit_skb(struct vxlan_sock *vs,
struct rtable *rt, struct sk_buff *skb,
__be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
__be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
__be16 src_port, __be16 dst_port,
struct vxlan_metadata *md, bool xnet)
{
struct vxlanhdr *vxh;
int min_headroom;
Expand Down Expand Up @@ -1771,7 +1819,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,

vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
vxh->vx_flags = htonl(VXLAN_HF_VNI);
vxh->vx_vni = vni;
vxh->vx_vni = md->vni;

if (type & SKB_GSO_TUNNEL_REMCSUM) {
u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
Expand All @@ -1789,6 +1837,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
}
}

if (vs->flags & VXLAN_F_GBP)
vxlan_build_gbp_hdr(vxh, vs, md);

skb_set_inner_protocol(skb, htons(ETH_P_TEB));

return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
Expand Down Expand Up @@ -1849,6 +1900,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *old_iph;
struct flowi4 fl4;
union vxlan_addr *dst;
struct vxlan_metadata md;
__be16 src_port = 0, dst_port;
u32 vni;
__be16 df = 0;
Expand Down Expand Up @@ -1910,7 +1962,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

ip_rt_put(rt);
dst_vxlan = vxlan_find_vni(vxlan->net, vni,
dst->sa.sa_family, dst_port);
dst->sa.sa_family, dst_port,
vxlan->flags);
if (!dst_vxlan)
goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
Expand All @@ -1919,11 +1972,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
md.vni = htonl(vni << 8);
md.gbp = skb->mark;

err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
fl4.saddr, dst->sin.sin_addr.s_addr,
tos, ttl, df, src_port, dst_port,
htonl(vni << 8),
tos, ttl, df, src_port, dst_port, &md,
!net_eq(vxlan->net, dev_net(vxlan->dev)));
if (err < 0) {
/* skb is already freed. */
Expand Down Expand Up @@ -1968,18 +2022,21 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,

dst_release(ndst);
dst_vxlan = vxlan_find_vni(vxlan->net, vni,
dst->sa.sa_family, dst_port);
dst->sa.sa_family, dst_port,
vxlan->flags);
if (!dst_vxlan)
goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
return;
}

ttl = ttl ? : ip6_dst_hoplimit(ndst);
md.vni = htonl(vni << 8);
md.gbp = skb->mark;

err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
dev, &fl6.saddr, &fl6.daddr, 0, ttl,
src_port, dst_port, htonl(vni << 8),
src_port, dst_port, &md,
!net_eq(vxlan->net, dev_net(vxlan->dev)));
#endif
}
Expand Down Expand Up @@ -2136,7 +2193,7 @@ static int vxlan_init(struct net_device *dev)

spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
vxlan->dst_port);
vxlan->dst_port, vxlan->flags);
if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {
/* If we have a socket with same port already, reuse it */
vxlan_vs_add_dev(vs, vxlan);
Expand Down Expand Up @@ -2382,6 +2439,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
[IFLA_VXLAN_REMCSUM_TX] = { .type = NLA_U8 },
[IFLA_VXLAN_REMCSUM_RX] = { .type = NLA_U8 },
[IFLA_VXLAN_GBP] = { .type = NLA_FLAG, },
};

static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
Expand Down Expand Up @@ -2542,7 +2600,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
return vs;

spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port);
vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags);
if (vs && ((vs->rcv != rcv) ||
!atomic_add_unless(&vs->refcnt, 1, 0)))
vs = ERR_PTR(-EBUSY);
Expand Down Expand Up @@ -2706,8 +2764,11 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
vxlan->flags |= VXLAN_F_REMCSUM_RX;

if (data[IFLA_VXLAN_GBP])
vxlan->flags |= VXLAN_F_GBP;

if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
vxlan->dst_port)) {
vxlan->dst_port, vxlan->flags)) {
pr_info("duplicate VNI %u\n", vni);
return -EEXIST;
}
Expand Down Expand Up @@ -2851,6 +2912,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
goto nla_put_failure;

if (vxlan->flags & VXLAN_F_GBP &&
nla_put_flag(skb, IFLA_VXLAN_GBP))
goto nla_put_failure;

return 0;

nla_put_failure:
Expand Down
5 changes: 4 additions & 1 deletion include/net/ip_tunnels.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ struct ip_tunnel {
#define TUNNEL_DONT_FRAGMENT __cpu_to_be16(0x0100)
#define TUNNEL_OAM __cpu_to_be16(0x0200)
#define TUNNEL_CRIT_OPT __cpu_to_be16(0x0400)
#define TUNNEL_OPTIONS_PRESENT __cpu_to_be16(0x0800)
#define TUNNEL_GENEVE_OPT __cpu_to_be16(0x0800)
#define TUNNEL_VXLAN_OPT __cpu_to_be16(0x1000)

#define TUNNEL_OPTIONS_PRESENT (TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)

struct tnl_ptk_info {
__be16 flags;
Expand Down
Loading

0 comments on commit 2e62fa6

Please sign in to comment.