Skip to content

Commit

Permalink
ipv4: ICMP packet inspection for multipath
Browse files Browse the repository at this point in the history
ICMP packets are inspected to let them route together with the flow they
belong to, minimizing the chance that a problematic path will affect flows
on other paths, and so that anycast environments can work with ECMP.

Signed-off-by: Peter Nørlund <pch@ordbogen.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Peter Nørlund authored and David S. Miller committed Oct 5, 2015
1 parent 0e884c7 commit 79a1315
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 9 deletions.
11 changes: 10 additions & 1 deletion include/net/route.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <net/inetpeer.h>
#include <net/flow.h>
#include <net/inet_sock.h>
#include <net/ip_fib.h>
#include <net/l3mdev.h>
#include <linux/in_route.h>
#include <linux/rtnetlink.h>
Expand Down Expand Up @@ -113,7 +114,15 @@ struct in_device;
int ip_rt_init(void);
void rt_cache_flush(struct net *net);
void rt_flush_dev(struct net_device *dev);
struct rtable *__ip_route_output_key(struct net *, struct flowi4 *flp);
struct rtable *__ip_route_output_key_hash(struct net *, struct flowi4 *flp,
int mp_hash);

static inline struct rtable *__ip_route_output_key(struct net *net,
struct flowi4 *flp)
{
return __ip_route_output_key_hash(net, flp, -1);
}

struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp,
const struct sock *sk);
struct dst_entry *ipv4_blackhole_route(struct net *net,
Expand Down
19 changes: 18 additions & 1 deletion net/ipv4/icmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,22 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb)
icmp_xmit_unlock(sk);
}

#ifdef CONFIG_IP_ROUTE_MULTIPATH

/* Source and destination is swapped. See ip_multipath_icmp_hash */
static int icmp_multipath_hash_skb(const struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);

return fib_multipath_hash(iph->daddr, iph->saddr);
}

#else

#define icmp_multipath_hash_skb(skb) (-1)

#endif

static struct rtable *icmp_route_lookup(struct net *net,
struct flowi4 *fl4,
struct sk_buff *skb_in,
Expand All @@ -464,7 +480,8 @@ static struct rtable *icmp_route_lookup(struct net *net,
fl4->flowi4_oif = l3mdev_master_ifindex(skb_in->dev);

security_skb_classify_flow(skb_in, flowi4_to_flowi(fl4));
rt = __ip_route_output_key(net, fl4);
rt = __ip_route_output_key_hash(net, fl4,
icmp_multipath_hash_skb(skb_in));
if (IS_ERR(rt))
return rt;

Expand Down
59 changes: 52 additions & 7 deletions net/ipv4/route.c
Original file line number Diff line number Diff line change
Expand Up @@ -1651,6 +1651,48 @@ static int __mkroute_input(struct sk_buff *skb,
return err;
}

#ifdef CONFIG_IP_ROUTE_MULTIPATH

/* To make ICMP packets follow the right flow, the multipath hash is
* calculated from the inner IP addresses in reverse order.
*/
static int ip_multipath_icmp_hash(struct sk_buff *skb)
{
const struct iphdr *outer_iph = ip_hdr(skb);
struct icmphdr _icmph;
const struct icmphdr *icmph;
struct iphdr _inner_iph;
const struct iphdr *inner_iph;

if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0))
goto standard_hash;

icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph),
&_icmph);
if (!icmph)
goto standard_hash;

if (icmph->type != ICMP_DEST_UNREACH &&
icmph->type != ICMP_REDIRECT &&
icmph->type != ICMP_TIME_EXCEEDED &&
icmph->type != ICMP_PARAMETERPROB) {
goto standard_hash;
}

inner_iph = skb_header_pointer(skb,
outer_iph->ihl * 4 + sizeof(_icmph),
sizeof(_inner_iph), &_inner_iph);
if (!inner_iph)
goto standard_hash;

return fib_multipath_hash(inner_iph->daddr, inner_iph->saddr);

standard_hash:
return fib_multipath_hash(outer_iph->saddr, outer_iph->daddr);
}

#endif /* CONFIG_IP_ROUTE_MULTIPATH */

static int ip_mkroute_input(struct sk_buff *skb,
struct fib_result *res,
const struct flowi4 *fl4,
Expand All @@ -1661,7 +1703,10 @@ static int ip_mkroute_input(struct sk_buff *skb,
if (res->fi && res->fi->fib_nhs > 1) {
int h;

h = fib_multipath_hash(saddr, daddr);
if (unlikely(ip_hdr(skb)->protocol == IPPROTO_ICMP))
h = ip_multipath_icmp_hash(skb);
else
h = fib_multipath_hash(saddr, daddr);
fib_select_multipath(res, h);
}
#endif
Expand Down Expand Up @@ -2030,7 +2075,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
* Major route resolver routine.
*/

struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4,
int mp_hash)
{
struct net_device *dev_out = NULL;
__u8 tos = RT_FL_TOS(fl4);
Expand Down Expand Up @@ -2194,10 +2240,9 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)

#ifdef CONFIG_IP_ROUTE_MULTIPATH
if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0) {
int h;

h = fib_multipath_hash(fl4->saddr, fl4->daddr);
fib_select_multipath(&res, h);
if (mp_hash < 0)
mp_hash = fib_multipath_hash(fl4->saddr, fl4->daddr);
fib_select_multipath(&res, mp_hash);
}
else
#endif
Expand All @@ -2220,7 +2265,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
rcu_read_unlock();
return rth;
}
EXPORT_SYMBOL_GPL(__ip_route_output_key);
EXPORT_SYMBOL_GPL(__ip_route_output_key_hash);

static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
{
Expand Down

0 comments on commit 79a1315

Please sign in to comment.