Skip to content

Commit

Permalink
ipv4: tcp: dont cache unconfirmed intput dst
Browse files Browse the repository at this point in the history
DDOS synflood attacks hit badly IP route cache.

On typical machines, this cache is allowed to hold up to 8 Millions dst
entries, 256 bytes for each, for a total of 2GB of memory.

rt_garbage_collect() triggers and tries to cleanup things.

Eventually route cache is disabled but machine is under fire and might
OOM and crash.

This patch exploits the new TCP early demux, to set a nocache
boolean in case incoming TCP frame is for a not yet ESTABLISHED or
TIMEWAIT socket.

This 'nocache' boolean is then used in case dst entry is not found in
route cache, to create an unhashed dst entry (DST_NOCACHE)

SYN-cookie-ACK sent use a similar mechanism (ipv4: tcp: dont cache
output dst for syncookies), so after this patch, a machine is able to
absorb a DDOS synflood attack without polluting its IP route cache.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Hans Schillstrom <hans.schillstrom@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Eric Dumazet authored and David S. Miller committed Jun 27, 2012
1 parent 93040ae commit c074da2
Show file tree
Hide file tree
Showing 9 changed files with 20 additions and 15 deletions.
2 changes: 1 addition & 1 deletion include/net/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

/* This is used to register protocols. */
struct net_protocol {
int (*early_demux)(struct sk_buff *skb);
int (*early_demux)(struct sk_buff *skb, bool *nocache);
int (*handler)(struct sk_buff *skb);
void (*err_handler)(struct sk_buff *skb, u32 info);
int (*gso_send_check)(struct sk_buff *skb);
Expand Down
8 changes: 4 additions & 4 deletions include/net/route.h
Original file line number Diff line number Diff line change
Expand Up @@ -201,18 +201,18 @@ static inline struct rtable *ip_route_output_gre(struct net *net, struct flowi4
}

extern int ip_route_input_common(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin, bool noref);
u8 tos, struct net_device *devin, bool noref, bool nocache);

static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin)
{
return ip_route_input_common(skb, dst, src, tos, devin, false);
return ip_route_input_common(skb, dst, src, tos, devin, false, false);
}

static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
u8 tos, struct net_device *devin)
u8 tos, struct net_device *devin, bool nocache)
{
return ip_route_input_common(skb, dst, src, tos, devin, true);
return ip_route_input_common(skb, dst, src, tos, devin, true, nocache);
}

extern void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
Expand Down
2 changes: 1 addition & 1 deletion include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ extern void tcp_v4_err(struct sk_buff *skb, u32);

extern void tcp_shutdown (struct sock *sk, int how);

extern int tcp_v4_early_demux(struct sk_buff *skb);
extern int tcp_v4_early_demux(struct sk_buff *skb, bool *nocache);
extern int tcp_v4_rcv(struct sk_buff *skb);

extern struct inet_peer *tcp_v4_get_peer(struct sock *sk);
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/arp.c
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,7 @@ static int arp_process(struct sk_buff *skb)
}

if (arp->ar_op == htons(ARPOP_REQUEST) &&
ip_route_input_noref(skb, tip, sip, 0, dev) == 0) {
ip_route_input_noref(skb, tip, sip, 0, dev, false) == 0) {

rt = skb_rtable(skb);
addr_type = rt->rt_type;
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/ip_fragment.c
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,7 @@ static void ip_expire(unsigned long arg)
skb_dst_drop(head);
iph = ip_hdr(head);
err = ip_route_input_noref(head, iph->daddr, iph->saddr,
iph->tos, head->dev);
iph->tos, head->dev, false);
if (err)
goto out_rcu_unlock;

Expand Down
5 changes: 3 additions & 2 deletions net/ipv4/ip_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
*/
if (skb_dst(skb) == NULL) {
int err = -ENOENT;
bool nocache = false;

if (sysctl_ip_early_demux) {
const struct net_protocol *ipprot;
Expand All @@ -334,13 +335,13 @@ static int ip_rcv_finish(struct sk_buff *skb)
rcu_read_lock();
ipprot = rcu_dereference(inet_protos[protocol]);
if (ipprot && ipprot->early_demux)
err = ipprot->early_demux(skb);
err = ipprot->early_demux(skb, &nocache);
rcu_read_unlock();
}

if (err) {
err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
iph->tos, skb->dev);
iph->tos, skb->dev, nocache);
if (unlikely(err)) {
if (err == -EXDEV)
NET_INC_STATS_BH(dev_net(skb->dev),
Expand Down
8 changes: 5 additions & 3 deletions net/ipv4/route.c
Original file line number Diff line number Diff line change
Expand Up @@ -2214,7 +2214,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
*/

static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev)
u8 tos, struct net_device *dev, bool nocache)
{
struct fib_result res;
struct in_device *in_dev = __in_dev_get_rcu(dev);
Expand Down Expand Up @@ -2353,6 +2353,8 @@ out: return err;
rth->dst.error= -err;
rth->rt_flags &= ~RTCF_LOCAL;
}
if (nocache)
rth->dst.flags |= DST_NOCACHE;
hash = rt_hash(daddr, saddr, fl4.flowi4_iif, rt_genid(net));
rth = rt_intern_hash(hash, rth, skb, fl4.flowi4_iif);
err = 0;
Expand Down Expand Up @@ -2395,7 +2397,7 @@ out: return err;
}

int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
u8 tos, struct net_device *dev, bool noref)
u8 tos, struct net_device *dev, bool noref, bool nocache)
{
struct rtable *rth;
unsigned int hash;
Expand Down Expand Up @@ -2471,7 +2473,7 @@ int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
rcu_read_unlock();
return -EINVAL;
}
res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
res = ip_route_input_slow(skb, daddr, saddr, tos, dev, nocache);
rcu_read_unlock();
return res;
}
Expand Down
4 changes: 3 additions & 1 deletion net/ipv4/tcp_ipv4.c
Original file line number Diff line number Diff line change
Expand Up @@ -1673,7 +1673,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_v4_do_rcv);

int tcp_v4_early_demux(struct sk_buff *skb)
int tcp_v4_early_demux(struct sk_buff *skb, bool *no_dst_cache)
{
struct net *net = dev_net(skb->dev);
const struct iphdr *iph;
Expand Down Expand Up @@ -1719,6 +1719,8 @@ int tcp_v4_early_demux(struct sk_buff *skb)
}
}
}
} else {
*no_dst_cache = true;
}

out_err:
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/xfrm4_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);

if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
iph->tos, skb->dev))
iph->tos, skb->dev, false))
goto drop;
}
return dst_input(skb);
Expand Down

0 comments on commit c074da2

Please sign in to comment.