Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 327577
b: refs/heads/master
c: f2edb9f
h: refs/heads/master
i:
  327575: 1c20952
v: v3
  • Loading branch information
Julian Anastasov authored and Simon Horman committed Aug 10, 2012
1 parent 285c04c commit 60bd144
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 28 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 2b2d280817bd576e97ccd243b9b3a344d11ddd11
refs/heads/master: f2edb9f7706dcb2c0d9a362b2ba849efe3a97f5e
76 changes: 73 additions & 3 deletions trunk/net/netfilter/ipvs/ip_vs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1303,7 +1303,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
unsigned int offset, ihl, verdict;
unsigned int offset, offset2, ihl, verdict;
bool ipip;

*related = 1;

Expand Down Expand Up @@ -1345,6 +1346,21 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)

net = skb_net(skb);

/* Special case for errors for IPIP packets */
ipip = false;
if (cih->protocol == IPPROTO_IPIP) {
if (unlikely(cih->frag_off & htons(IP_OFFSET)))
return NF_ACCEPT;
/* Error for our IPIP must arrive at LOCAL_IN */
if (!(skb_rtable(skb)->rt_flags & RTCF_LOCAL))
return NF_ACCEPT;
offset += cih->ihl * 4;
cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph);
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
ipip = true;
}

pd = ip_vs_proto_data_get(net, cih->protocol);
if (!pd)
return NF_ACCEPT;
Expand All @@ -1358,11 +1374,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset,
"Checking incoming ICMP for");

offset2 = offset;
offset += cih->ihl * 4;

ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
/* The embedded headers contain source and dest in reverse order.
* For IPIP this is error for request, not for reply.
*/
cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, ipip ? 0 : 1);
if (!cp)
return NF_ACCEPT;

Expand All @@ -1376,6 +1395,57 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
goto out;
}

if (ipip) {
__be32 info = ic->un.gateway;

/* Update the MTU */
if (ic->type == ICMP_DEST_UNREACH &&
ic->code == ICMP_FRAG_NEEDED) {
struct ip_vs_dest *dest = cp->dest;
u32 mtu = ntohs(ic->un.frag.mtu);

/* Strip outer IP and ICMP, go to IPIP header */
__skb_pull(skb, ihl + sizeof(_icmph));
offset2 -= ihl + sizeof(_icmph);
skb_reset_network_header(skb);
IP_VS_DBG(12, "ICMP for IPIP %pI4->%pI4: mtu=%u\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, mtu);
rcu_read_lock();
ipv4_update_pmtu(skb, dev_net(skb->dev),
mtu, 0, 0, 0, 0);
rcu_read_unlock();
/* Client uses PMTUD? */
if (!(cih->frag_off & htons(IP_DF)))
goto ignore_ipip;
/* Prefer the resulting PMTU */
if (dest) {
spin_lock(&dest->dst_lock);
if (dest->dst_cache)
mtu = dst_mtu(dest->dst_cache);
spin_unlock(&dest->dst_lock);
}
if (mtu > 68 + sizeof(struct iphdr))
mtu -= sizeof(struct iphdr);
info = htonl(mtu);
}
/* Strip outer IP, ICMP and IPIP, go to IP header of
* original request.
*/
__skb_pull(skb, offset2);
skb_reset_network_header(skb);
IP_VS_DBG(12, "Sending ICMP for %pI4->%pI4: t=%u, c=%u, i=%u\n",
&ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
ic->type, ic->code, ntohl(info));
icmp_send(skb, ic->type, ic->code, info);
/* ICMP can be shorter but anyways, account it */
ip_vs_out_stats(cp, skb);

ignore_ipip:
consume_skb(skb);
verdict = NF_STOLEN;
goto out;
}

/* do the statistics and put it back */
ip_vs_in_stats(cp, skb);
if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
Expand Down
79 changes: 55 additions & 24 deletions trunk/net/netfilter/ipvs/ip_vs_xmit.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ enum {
IP_VS_RT_MODE_RDR = 4, /* Allow redirect from remote daddr to
* local
*/
IP_VS_RT_MODE_CONNECT = 8, /* Always bind route to saddr */
};

/*
Expand Down Expand Up @@ -84,6 +85,42 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos)
return dst;
}

/* Get route to daddr, update *saddr, optionally bind route to saddr */
static struct rtable *do_output_route4(struct net *net, __be32 daddr,
u32 rtos, int rt_mode, __be32 *saddr)
{
struct flowi4 fl4;
struct rtable *rt;
int loop = 0;

memset(&fl4, 0, sizeof(fl4));
fl4.daddr = daddr;
fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
fl4.flowi4_tos = rtos;

retry:
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt)) {
/* Invalid saddr ? */
if (PTR_ERR(rt) == -EINVAL && *saddr &&
rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
*saddr = 0;
flowi4_update_output(&fl4, 0, rtos, daddr, 0);
goto retry;
}
IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
return NULL;
} else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
ip_rt_put(rt);
*saddr = fl4.saddr;
flowi4_update_output(&fl4, 0, rtos, daddr, fl4.saddr);
loop++;
goto retry;
}
*saddr = fl4.saddr;
return rt;
}

/* Get route to destination or remote server */
static struct rtable *
__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
Expand All @@ -98,20 +135,13 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
spin_lock(&dest->dst_lock);
if (!(rt = (struct rtable *)
__ip_vs_dst_check(dest, rtos))) {
struct flowi4 fl4;

memset(&fl4, 0, sizeof(fl4));
fl4.daddr = dest->addr.ip;
fl4.flowi4_tos = rtos;
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt)) {
rt = do_output_route4(net, dest->addr.ip, rtos,
rt_mode, &dest->dst_saddr.ip);
if (!rt) {
spin_unlock(&dest->dst_lock);
IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
&dest->addr.ip);
return NULL;
}
__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0);
dest->dst_saddr.ip = fl4.saddr;
IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, "
"rtos=%X\n",
&dest->addr.ip, &dest->dst_saddr.ip,
Expand All @@ -122,19 +152,17 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest,
*ret_saddr = dest->dst_saddr.ip;
spin_unlock(&dest->dst_lock);
} else {
struct flowi4 fl4;
__be32 saddr = htonl(INADDR_ANY);

memset(&fl4, 0, sizeof(fl4));
fl4.daddr = daddr;
fl4.flowi4_tos = rtos;
rt = ip_route_output_key(net, &fl4);
if (IS_ERR(rt)) {
IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
&daddr);
/* For such unconfigured boxes avoid many route lookups
* for performance reasons because we do not remember saddr
*/
rt_mode &= ~IP_VS_RT_MODE_CONNECT;
rt = do_output_route4(net, daddr, rtos, rt_mode, &saddr);
if (!rt)
return NULL;
}
if (ret_saddr)
*ret_saddr = fl4.saddr;
*ret_saddr = saddr;
}

local = rt->rt_flags & RTCF_LOCAL;
Expand Down Expand Up @@ -331,6 +359,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
old_dst = dest->dst_cache;
dest->dst_cache = NULL;
dst_release(old_dst);
dest->dst_saddr.ip = 0;
}

#define IP_VS_XMIT_TUNNEL(skb, cp) \
Expand Down Expand Up @@ -771,7 +800,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
struct net_device *tdev; /* Device to other host */
struct iphdr *old_iph = ip_hdr(skb);
u8 tos = old_iph->tos;
__be16 df = old_iph->frag_off;
__be16 df;
struct iphdr *iph; /* Our new IP header */
unsigned int max_headroom; /* The extra header space needed */
int mtu;
Expand All @@ -781,7 +810,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,

if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip,
RT_TOS(tos), IP_VS_RT_MODE_LOCAL |
IP_VS_RT_MODE_NON_LOCAL,
IP_VS_RT_MODE_NON_LOCAL |
IP_VS_RT_MODE_CONNECT,
&saddr)))
goto tx_error_icmp;
if (rt->rt_flags & RTCF_LOCAL) {
Expand All @@ -796,10 +826,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
goto tx_error_put;
}
if (skb_dst(skb))
if (rt_is_output_route(skb_rtable(skb)))
skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);

df |= (old_iph->frag_off & htons(IP_DF));
/* Copy DF, reset fragment offset and MF */
df = old_iph->frag_off & htons(IP_DF);

if ((old_iph->frag_off & htons(IP_DF) &&
mtu < ntohs(old_iph->tot_len) && !skb_is_gso(skb))) {
Expand Down

0 comments on commit 60bd144

Please sign in to comment.