Skip to content

Commit

Permalink
ipvs: allow tunneling with gue encapsulation
Browse files Browse the repository at this point in the history
ipip packets are blocked in some public cloud environments, this patch
allows gue encapsulation with the tunneling method, which would make
tunneling working in those environments.

Signed-off-by: Jacky Hu <hengqing.hu@gmail.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
  • Loading branch information
Jacky Hu authored and Pablo Neira Ayuso committed Apr 8, 2019
1 parent 227e1e4 commit 84c0d5e
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 5 deletions.
5 changes: 5 additions & 0 deletions include/net/ip_vs.h
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,9 @@ struct ip_vs_dest_user_kern {

/* Address family of addr */
u16 af;

u16 tun_type; /* tunnel type */
__be16 tun_port; /* tunnel port */
};


Expand Down Expand Up @@ -660,6 +663,8 @@ struct ip_vs_dest {
atomic_t conn_flags; /* flags to copy to conn */
atomic_t weight; /* server weight */
atomic_t last_weight; /* server latest weight */
__u16 tun_type; /* tunnel type */
__be16 tun_port; /* tunnel port */

refcount_t refcnt; /* reference counter */
struct ip_vs_stats stats; /* statistics */
Expand Down
11 changes: 11 additions & 0 deletions include/uapi/linux/ip_vs.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,13 @@

#define IP_VS_PEDATA_MAXLEN 255

/* Tunnel types */
enum {
IP_VS_CONN_F_TUNNEL_TYPE_IPIP = 0, /* IPIP */
IP_VS_CONN_F_TUNNEL_TYPE_GUE, /* GUE */
IP_VS_CONN_F_TUNNEL_TYPE_MAX,
};

/*
* The struct ip_vs_service_user and struct ip_vs_dest_user are
* used to set IPVS rules through setsockopt.
Expand Down Expand Up @@ -392,6 +399,10 @@ enum {

IPVS_DEST_ATTR_STATS64, /* nested attribute for dest stats */

IPVS_DEST_ATTR_TUN_TYPE, /* tunnel type */

IPVS_DEST_ATTR_TUN_PORT, /* tunnel port */

__IPVS_DEST_ATTR_MAX,
};

Expand Down
35 changes: 34 additions & 1 deletion net/netfilter/ipvs/ip_vs_ctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
conn_flags |= IP_VS_CONN_F_INACTIVE;

/* set the tunnel info */
dest->tun_type = udest->tun_type;
dest->tun_port = udest->tun_port;

/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
conn_flags |= IP_VS_CONN_F_NOOUTPUT;
Expand Down Expand Up @@ -987,6 +991,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return -ERANGE;
}

if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
if (udest->tun_port == 0) {
pr_err("%s(): tunnel port is zero\n", __func__);
return -EINVAL;
}
}

ip_vs_addr_copy(udest->af, &daddr, &udest->addr);

/* We use function that requires RCU lock */
Expand Down Expand Up @@ -1051,6 +1062,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
return -ERANGE;
}

if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
if (udest->tun_port == 0) {
pr_err("%s(): tunnel port is zero\n", __func__);
return -EINVAL;
}
}

ip_vs_addr_copy(udest->af, &daddr, &udest->addr);

/* We use function that requires RCU lock */
Expand Down Expand Up @@ -2333,6 +2351,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
udest->u_threshold = udest_compat->u_threshold;
udest->l_threshold = udest_compat->l_threshold;
udest->af = AF_INET;
udest->tun_type = IP_VS_CONN_F_TUNNEL_TYPE_IPIP;
}

static int
Expand Down Expand Up @@ -2890,6 +2909,8 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
[IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
[IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
[IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 },
[IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 },
[IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 },
};

static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
Expand Down Expand Up @@ -3193,6 +3214,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
IP_VS_CONN_F_FWD_MASK)) ||
nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT,
atomic_read(&dest->weight)) ||
nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE,
dest->tun_type) ||
nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT,
dest->tun_port) ||
nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) ||
nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) ||
nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
Expand Down Expand Up @@ -3315,12 +3340,14 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
*nla_l_thresh;
*nla_l_thresh, *nla_tun_type, *nla_tun_port;

nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE];
nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT];

if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
return -EINVAL;
Expand All @@ -3330,6 +3357,12 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
udest->weight = nla_get_u32(nla_weight);
udest->u_threshold = nla_get_u32(nla_u_thresh);
udest->l_threshold = nla_get_u32(nla_l_thresh);

if (nla_tun_type)
udest->tun_type = nla_get_u8(nla_tun_type);

if (nla_tun_port)
udest->tun_port = nla_get_be16(nla_tun_port);
}

return 0;
Expand Down
84 changes: 80 additions & 4 deletions net/netfilter/ipvs/ip_vs_xmit.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <linux/slab.h>
#include <linux/tcp.h> /* for tcphdr */
#include <net/ip.h>
#include <net/gue.h>
#include <net/tcp.h> /* for csum_tcpudp_magic */
#include <net/udp.h>
#include <net/icmp.h> /* for icmp_send */
Expand Down Expand Up @@ -382,6 +383,10 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
mtu = dst_mtu(&rt->dst);
} else {
mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
if (!dest)
goto err_put;
if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
if (mtu < 68) {
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto err_put;
Expand Down Expand Up @@ -533,6 +538,10 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb,
mtu = dst_mtu(&rt->dst);
else {
mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
if (!dest)
goto err_put;
if (dest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
mtu -= sizeof(struct udphdr) + sizeof(struct guehdr);
if (mtu < IPV6_MIN_MTU) {
IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__,
IPV6_MIN_MTU);
Expand Down Expand Up @@ -989,6 +998,41 @@ static inline int __tun_gso_type_mask(int encaps_af, int orig_af)
}
}

static int
ipvs_gue_encap(struct net *net, struct sk_buff *skb,
struct ip_vs_conn *cp, __u8 *next_protocol)
{
__be16 dport;
__be16 sport = udp_flow_src_port(net, skb, 0, 0, false);
struct udphdr *udph; /* Our new UDP header */
struct guehdr *gueh; /* Our new GUE header */

skb_push(skb, sizeof(struct guehdr));

gueh = (struct guehdr *)skb->data;

gueh->control = 0;
gueh->version = 0;
gueh->hlen = 0;
gueh->flags = 0;
gueh->proto_ctype = *next_protocol;

skb_push(skb, sizeof(struct udphdr));
skb_reset_transport_header(skb);

udph = udp_hdr(skb);

dport = cp->dest->tun_port;
udph->dest = dport;
udph->source = sport;
udph->len = htons(skb->len);
udph->check = 0;

*next_protocol = IPPROTO_UDP;

return 0;
}

/*
* IP Tunneling transmitter
*
Expand Down Expand Up @@ -1025,6 +1069,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
int tun_type, gso_type;

EnterFunction(10);

Expand All @@ -1046,6 +1091,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);

tun_type = cp->dest->tun_type;

if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr);

/* We only care about the df field if sysctl_pmtu_disc(ipvs) is set */
dfp = sysctl_pmtu_disc(ipvs) ? &df : NULL;
skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
Expand All @@ -1054,11 +1104,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
if (IS_ERR(skb))
goto tx_error;

if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET, cp->af)))
gso_type = __tun_gso_type_mask(AF_INET, cp->af);
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
gso_type |= SKB_GSO_UDP_TUNNEL;

if (iptunnel_handle_offloads(skb, gso_type))
goto tx_error;

skb->transport_header = skb->network_header;

skb_set_inner_ipproto(skb, next_protocol);

if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
ipvs_gue_encap(net, skb, cp, &next_protocol);

skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Expand Down Expand Up @@ -1102,6 +1161,8 @@ int
ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ip_vs_protocol *pp, struct ip_vs_iphdr *ipvsh)
{
struct netns_ipvs *ipvs = cp->ipvs;
struct net *net = ipvs->net;
struct rt6_info *rt; /* Route to the other host */
struct in6_addr saddr; /* Source for tunnel */
struct net_device *tdev; /* Device to other host */
Expand All @@ -1112,10 +1173,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
struct ipv6hdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int ret, local;
int tun_type, gso_type;

EnterFunction(10);

local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest,
local = __ip_vs_get_out_rt_v6(ipvs, cp->af, skb, cp->dest,
&cp->daddr.in6,
&saddr, ipvsh, 1,
IP_VS_RT_MODE_LOCAL |
Expand All @@ -1134,17 +1196,31 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
*/
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);

tun_type = cp->dest->tun_type;

if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
max_headroom += sizeof(struct udphdr) + sizeof(struct guehdr);

skb = ip_vs_prepare_tunneled_skb(skb, cp->af, max_headroom,
&next_protocol, &payload_len,
&dsfield, &ttl, NULL);
if (IS_ERR(skb))
goto tx_error;

if (iptunnel_handle_offloads(skb, __tun_gso_type_mask(AF_INET6, cp->af)))
gso_type = __tun_gso_type_mask(AF_INET6, cp->af);
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
gso_type |= SKB_GSO_UDP_TUNNEL;

if (iptunnel_handle_offloads(skb, gso_type))
goto tx_error;

skb->transport_header = skb->network_header;

skb_set_inner_ipproto(skb, next_protocol);

if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE)
ipvs_gue_encap(net, skb, cp, &next_protocol);

skb_push(skb, sizeof(struct ipv6hdr));
skb_reset_network_header(skb);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
Expand All @@ -1167,7 +1243,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,

ret = ip_vs_tunnel_xmit_prepare(skb, cp);
if (ret == NF_ACCEPT)
ip6_local_out(cp->ipvs->net, skb->sk, skb);
ip6_local_out(net, skb->sk, skb);
else if (ret == NF_DROP)
kfree_skb(skb);

Expand Down

0 comments on commit 84c0d5e

Please sign in to comment.