Skip to content

Commit

Permalink
Merge branch 'ipv6-per-netns-gc'
Browse files Browse the repository at this point in the history
Michal Kubecek says:

====================
ipv6: per netns FIB6 walkers and garbage collector

Commit 2ac3ac8 ("ipv6: prevent fib6_run_gc() contention") reduced
the risk of contention on FIB6 garbage collector lock on systems with
many CPUs. However, one of our customers can still observe heavy
contention on fib6_gc_lock which can even trigger the soft lockup
detector.

This is caused by garbage collector running in forced mode from a timer.
While there is one timer per network namespace, the instances of
fib6_run_gc() running from them are protected by one global spinlock so
that only one garbage collector can run at any moment and other
namespaces have to wait. As most relevant data structures are separated
per netns, there is little reason for garbage collectors blocking each
other.

Similar problem exists for walkers: changes in one tree do not need to
adjust (and block) walkers traversing FIB trees in other namespaces.

This series separates both the walkers infrastructure and garbage
collector so that they work independently in network namespaces.

v2: get rid of ifdef in ipv6_route_seq_setup_walk(), pass net from
callers instead
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Mar 8, 2016
2 parents 02daec7 + 3dc94f9 commit 8aba8b8
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 43 deletions.
3 changes: 3 additions & 0 deletions include/net/netns/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ struct netns_ipv6 {
struct timer_list ip6_fib_timer;
struct hlist_head *fib_table_hash;
struct fib6_table *fib6_main_tbl;
struct list_head fib6_walkers;
struct dst_ops ip6_dst_ops;
rwlock_t fib6_walker_lock;
spinlock_t fib6_gc_lock;
unsigned int ip6_rt_gc_expire;
unsigned long ip6_rt_last_gc;
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
Expand Down
91 changes: 48 additions & 43 deletions net/ipv6/ip6_fib.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,6 @@ struct fib6_cleaner {
void *arg;
};

static DEFINE_RWLOCK(fib6_walker_lock);

#ifdef CONFIG_IPV6_SUBTREES
#define FWS_INIT FWS_S
#else
Expand All @@ -66,7 +64,7 @@ static DEFINE_RWLOCK(fib6_walker_lock);
static void fib6_prune_clones(struct net *net, struct fib6_node *fn);
static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);
static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);
static int fib6_walk(struct fib6_walker *w);
static int fib6_walk(struct net *net, struct fib6_walker *w);
static int fib6_walk_continue(struct fib6_walker *w);

/*
Expand All @@ -78,21 +76,21 @@ static int fib6_walk_continue(struct fib6_walker *w);

static void fib6_gc_timer_cb(unsigned long arg);

static LIST_HEAD(fib6_walkers);
#define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh)
#define FOR_WALKERS(net, w) \
list_for_each_entry(w, &(net)->ipv6.fib6_walkers, lh)

static void fib6_walker_link(struct fib6_walker *w)
static void fib6_walker_link(struct net *net, struct fib6_walker *w)
{
write_lock_bh(&fib6_walker_lock);
list_add(&w->lh, &fib6_walkers);
write_unlock_bh(&fib6_walker_lock);
write_lock_bh(&net->ipv6.fib6_walker_lock);
list_add(&w->lh, &net->ipv6.fib6_walkers);
write_unlock_bh(&net->ipv6.fib6_walker_lock);
}

static void fib6_walker_unlink(struct fib6_walker *w)
static void fib6_walker_unlink(struct net *net, struct fib6_walker *w)
{
write_lock_bh(&fib6_walker_lock);
write_lock_bh(&net->ipv6.fib6_walker_lock);
list_del(&w->lh);
write_unlock_bh(&fib6_walker_lock);
write_unlock_bh(&net->ipv6.fib6_walker_lock);
}

static int fib6_new_sernum(struct net *net)
Expand Down Expand Up @@ -325,12 +323,13 @@ static int fib6_dump_node(struct fib6_walker *w)

static void fib6_dump_end(struct netlink_callback *cb)
{
struct net *net = sock_net(cb->skb->sk);
struct fib6_walker *w = (void *)cb->args[2];

if (w) {
if (cb->args[4]) {
cb->args[4] = 0;
fib6_walker_unlink(w);
fib6_walker_unlink(net, w);
}
cb->args[2] = 0;
kfree(w);
Expand All @@ -348,6 +347,7 @@ static int fib6_dump_done(struct netlink_callback *cb)
static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct fib6_walker *w;
int res;

Expand All @@ -359,7 +359,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
w->skip = 0;

read_lock_bh(&table->tb6_lock);
res = fib6_walk(w);
res = fib6_walk(net, w);
read_unlock_bh(&table->tb6_lock);
if (res > 0) {
cb->args[4] = 1;
Expand All @@ -379,7 +379,7 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
res = fib6_walk_continue(w);
read_unlock_bh(&table->tb6_lock);
if (res <= 0) {
fib6_walker_unlink(w);
fib6_walker_unlink(net, w);
cb->args[4] = 0;
}
}
Expand Down Expand Up @@ -1340,8 +1340,8 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
}
#endif

read_lock(&fib6_walker_lock);
FOR_WALKERS(w) {
read_lock(&net->ipv6.fib6_walker_lock);
FOR_WALKERS(net, w) {
if (!child) {
if (w->root == fn) {
w->root = w->node = NULL;
Expand All @@ -1368,7 +1368,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net,
}
}
}
read_unlock(&fib6_walker_lock);
read_unlock(&net->ipv6.fib6_walker_lock);

node_free(fn);
if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn))
Expand Down Expand Up @@ -1411,16 +1411,16 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
}

/* Adjust walkers */
read_lock(&fib6_walker_lock);
FOR_WALKERS(w) {
read_lock(&net->ipv6.fib6_walker_lock);
FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
RT6_TRACE("walker %p adjusted by delroute\n", w);
w->leaf = rt->dst.rt6_next;
if (!w->leaf)
w->state = FWS_U;
}
}
read_unlock(&fib6_walker_lock);
read_unlock(&net->ipv6.fib6_walker_lock);

rt->dst.rt6_next = NULL;

Expand Down Expand Up @@ -1588,17 +1588,17 @@ static int fib6_walk_continue(struct fib6_walker *w)
}
}

static int fib6_walk(struct fib6_walker *w)
static int fib6_walk(struct net *net, struct fib6_walker *w)
{
int res;

w->state = FWS_INIT;
w->node = w->root;

fib6_walker_link(w);
fib6_walker_link(net, w);
res = fib6_walk_continue(w);
if (res <= 0)
fib6_walker_unlink(w);
fib6_walker_unlink(net, w);
return res;
}

Expand Down Expand Up @@ -1668,7 +1668,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,
c.arg = arg;
c.net = net;

fib6_walk(&c.w);
fib6_walk(net, &c.w);
}

static void __fib6_clean_all(struct net *net,
Expand Down Expand Up @@ -1725,14 +1725,15 @@ static void fib6_flush_trees(struct net *net)
* Garbage collection
*/

static struct fib6_gc_args
struct fib6_gc_args
{
int timeout;
int more;
} gc_args;
};

static int fib6_age(struct rt6_info *rt, void *arg)
{
struct fib6_gc_args *gc_args = arg;
unsigned long now = jiffies;

/*
Expand All @@ -1748,10 +1749,10 @@ static int fib6_age(struct rt6_info *rt, void *arg)
RT6_TRACE("expiring %p\n", rt);
return -1;
}
gc_args.more++;
gc_args->more++;
} else if (rt->rt6i_flags & RTF_CACHE) {
if (atomic_read(&rt->dst.__refcnt) == 0 &&
time_after_eq(now, rt->dst.lastuse + gc_args.timeout)) {
time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
RT6_TRACE("aging clone %p\n", rt);
return -1;
} else if (rt->rt6i_flags & RTF_GATEWAY) {
Expand All @@ -1769,21 +1770,20 @@ static int fib6_age(struct rt6_info *rt, void *arg)
return -1;
}
}
gc_args.more++;
gc_args->more++;
}

return 0;
}

static DEFINE_SPINLOCK(fib6_gc_lock);

void fib6_run_gc(unsigned long expires, struct net *net, bool force)
{
struct fib6_gc_args gc_args;
unsigned long now;

if (force) {
spin_lock_bh(&fib6_gc_lock);
} else if (!spin_trylock_bh(&fib6_gc_lock)) {
spin_lock_bh(&net->ipv6.fib6_gc_lock);
} else if (!spin_trylock_bh(&net->ipv6.fib6_gc_lock)) {
mod_timer(&net->ipv6.ip6_fib_timer, jiffies + HZ);
return;
}
Expand All @@ -1792,7 +1792,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)

gc_args.more = icmp6_dst_gc();

fib6_clean_all(net, fib6_age, NULL);
fib6_clean_all(net, fib6_age, &gc_args);
now = jiffies;
net->ipv6.ip6_rt_last_gc = now;

Expand All @@ -1802,7 +1802,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)
+ net->ipv6.sysctl.ip6_rt_gc_interval));
else
del_timer(&net->ipv6.ip6_fib_timer);
spin_unlock_bh(&fib6_gc_lock);
spin_unlock_bh(&net->ipv6.fib6_gc_lock);
}

static void fib6_gc_timer_cb(unsigned long arg)
Expand All @@ -1814,6 +1814,9 @@ static int __net_init fib6_net_init(struct net *net)
{
size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ;

spin_lock_init(&net->ipv6.fib6_gc_lock);
rwlock_init(&net->ipv6.fib6_walker_lock);
INIT_LIST_HEAD(&net->ipv6.fib6_walkers);
setup_timer(&net->ipv6.ip6_fib_timer, fib6_gc_timer_cb, (unsigned long)net);

net->ipv6.rt6_stats = kzalloc(sizeof(*net->ipv6.rt6_stats), GFP_KERNEL);
Expand Down Expand Up @@ -1974,7 +1977,8 @@ static int ipv6_route_yield(struct fib6_walker *w)
return 0;
}

static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter)
static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
struct net *net)
{
memset(&iter->w, 0, sizeof(iter->w));
iter->w.func = ipv6_route_yield;
Expand All @@ -1984,7 +1988,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter)
iter->w.args = iter;
iter->sernum = iter->w.root->fn_sernum;
INIT_LIST_HEAD(&iter->w.lh);
fib6_walker_link(&iter->w);
fib6_walker_link(net, &iter->w);
}

static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
Expand Down Expand Up @@ -2045,16 +2049,16 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++*pos;
return iter->w.leaf;
} else if (r < 0) {
fib6_walker_unlink(&iter->w);
fib6_walker_unlink(net, &iter->w);
return NULL;
}
fib6_walker_unlink(&iter->w);
fib6_walker_unlink(net, &iter->w);

iter->tbl = ipv6_route_seq_next_table(iter->tbl, net);
if (!iter->tbl)
return NULL;

ipv6_route_seq_setup_walk(iter);
ipv6_route_seq_setup_walk(iter, net);
goto iter_table;
}

Expand All @@ -2069,7 +2073,7 @@ static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos)
iter->skip = *pos;

if (iter->tbl) {
ipv6_route_seq_setup_walk(iter);
ipv6_route_seq_setup_walk(iter, net);
return ipv6_route_seq_next(seq, NULL, pos);
} else {
return NULL;
Expand All @@ -2085,10 +2089,11 @@ static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
__releases(RCU_BH)
{
struct net *net = seq_file_net(seq);
struct ipv6_route_iter *iter = seq->private;

if (ipv6_route_iter_active(iter))
fib6_walker_unlink(&iter->w);
fib6_walker_unlink(net, &iter->w);

rcu_read_unlock_bh();
}
Expand Down

0 comments on commit 8aba8b8

Please sign in to comment.