Skip to content

Commit

Permalink
ipv6: prevent fib6_run_gc() contention
Browse files Browse the repository at this point in the history
On a high-traffic router with many processors and many IPv6 dst
entries, soft lockup in fib6_run_gc() can occur when number of
entries reaches gc_thresh.

This happens because fib6_run_gc() uses fib6_gc_lock to allow
only one thread to run the garbage collector but ip6_dst_gc()
doesn't update net->ipv6.ip6_rt_last_gc until fib6_run_gc()
returns. On a system with many entries, this can take some time
so that in the meantime, other threads pass the tests in
ip6_dst_gc() (ip6_rt_last_gc is still not updated) and wait for
the lock. They then have to run the garbage collector one after
another which blocks them for quite long.

Resolve this by replacing special value ~0UL of expire parameter
to fib6_run_gc() by explicit "force" parameter to choose between
spin_lock_bh() and spin_trylock_bh() and call fib6_run_gc() with
force=false if gc_thresh is reached but not max_size.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Michal Kubeček authored and David S. Miller committed Aug 1, 2013
1 parent 1f1059f commit 2ac3ac8
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 16 deletions.
2 changes: 1 addition & 1 deletion include/net/ip6_fib.h
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ extern void inet6_rt_notify(int event, struct rt6_info *rt,
struct nl_info *info);

extern void fib6_run_gc(unsigned long expires,
struct net *net);
struct net *net, bool force);

extern void fib6_gc_cleanup(void);

Expand Down
19 changes: 8 additions & 11 deletions net/ipv6/ip6_fib.c
Original file line number Diff line number Diff line change
Expand Up @@ -1632,19 +1632,16 @@ static int fib6_age(struct rt6_info *rt, void *arg)

static DEFINE_SPINLOCK(fib6_gc_lock);

void fib6_run_gc(unsigned long expires, struct net *net)
void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
if (expires != ~0UL) {
if (force) {
spin_lock_bh(&fib6_gc_lock);
gc_args.timeout = expires ? (int)expires :
net->ipv6.sysctl.ip6_rt_gc_interval;
} else {
if (!spin_trylock_bh(&fib6_gc_lock)) {
mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
return;
}
gc_args.timeout = net->ipv6.sysctl.ip6_rt_gc_interval;
} else if (!spin_trylock_bh(&fib6_gc_lock)) {
mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
return;
}
gc_args.timeout = expires ? (int)expires :
net->ipv6.sysctl.ip6_rt_gc_interval;

gc_args.more = icmp6_dst_gc();

Expand All @@ -1661,7 +1658,7 @@ void fib6_run_gc(unsigned long expires, struct net *net)

static void fib6_gc_timer_cb(unsigned long arg)
{
fib6_run_gc(0, (struct net *)arg);
fib6_run_gc(0, (struct net *)arg, true);
}

static int __net_init fib6_net_init(struct net *net)
Expand Down
4 changes: 2 additions & 2 deletions net/ipv6/ndisc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1576,7 +1576,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
switch (event) {
case NETDEV_CHANGEADDR:
neigh_changeaddr(&nd_tbl, dev);
fib6_run_gc(~0UL, net);
fib6_run_gc(0, net, false);
idev = in6_dev_get(dev);
if (!idev)
break;
Expand All @@ -1586,7 +1586,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event,
break;
case NETDEV_DOWN:
neigh_ifdown(&nd_tbl, dev);
fib6_run_gc(~0UL, net);
fib6_run_gc(0, net, false);
break;
case NETDEV_NOTIFY_PEERS:
ndisc_send_unsol_na(dev);
Expand Down
4 changes: 2 additions & 2 deletions net/ipv6/route.c
Original file line number Diff line number Diff line change
Expand Up @@ -1326,7 +1326,7 @@ static int ip6_dst_gc(struct dst_ops *ops)
goto out;

net->ipv6.ip6_rt_gc_expire++;
fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net);
fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size);
net->ipv6.ip6_rt_last_gc = now;
entries = dst_entries_get_slow(ops);
if (entries < ops->gc_thresh)
Expand Down Expand Up @@ -2827,7 +2827,7 @@ int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
net = (struct net *)ctl->extra1;
delay = net->ipv6.sysctl.flush_delay;
proc_dointvec(ctl, write, buffer, lenp, ppos);
fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
fib6_run_gc(delay <= 0 ? 0 : (unsigned long)delay, net, delay > 0);
return 0;
}

Expand Down

0 comments on commit 2ac3ac8

Please sign in to comment.