Skip to content

Commit

Permalink
Merge branch 'net-speedup-netns-dismantles'
Browse files Browse the repository at this point in the history
Eric Dumazet says:

====================
net: speedup netns dismantles

From: Eric Dumazet <edumazet@google.com>

In this series, I made network namespace deletions more scalable,
by 4x on the little benchmark described in this cover letter.

- Remove bottleneck on ipv6 addrconf, by replacing a global
  hash table to a per netns one.

- Rework many (struct pernet_operations)->exit() handlers to
  exit_batch() ones. This removes many rtnl acquisitions,
  and gives to cleanup_net() kind of a priority over rtnl
  ownership.

Tested on a host with 24 cpus (48 HT)

Test script:

for nr in {1..10}
do
  (for i in {1..10000}; do unshare -n /bin/bash -c "ifconfig lo up"; done) &
done
wait

for i in {1..10}
do
  sleep 1
  echo 3 >/proc/sys/vm/drop_caches
  grep net_namespace /proc/slabinfo
done

Before: We can see host struggles to clean the netns, even after there are no new creations.
Memory cost is high, because each netns consumes a good amount of memory.

time ./unshare10.sh
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      82634  82634   3968    1    1 : tunables   24   12    8 : slabdata  82634  82634      0
net_namespace      37214  37792   3968    1    1 : tunables   24   12    8 : slabdata  37214  37792    192

real	6m57.766s
user	3m37.277s
sys	40m4.826s

After: We can see the script completes much faster,
the kernel thread doing the cleanup_net() keeps up just fine.
Memory cost is not too big.

time ./unshare10.sh
net_namespace       9945   9945   4096    1    1 : tunables   24   12    8 : slabdata   9945   9945      0
net_namespace       4087   4665   4096    1    1 : tunables   24   12    8 : slabdata   4087   4665    192
net_namespace       4082   4607   4096    1    1 : tunables   24   12    8 : slabdata   4082   4607    192
net_namespace        234    761   4096    1    1 : tunables   24   12    8 : slabdata    234    761    192
net_namespace        224    751   4096    1    1 : tunables   24   12    8 : slabdata    224    751    192
net_namespace        218    745   4096    1    1 : tunables   24   12    8 : slabdata    218    745    192
net_namespace        193    667   4096    1    1 : tunables   24   12    8 : slabdata    193    667    172
net_namespace        167    609   4096    1    1 : tunables   24   12    8 : slabdata    167    609    152
net_namespace        167    609   4096    1    1 : tunables   24   12    8 : slabdata    167    609    152
net_namespace        157    609   4096    1    1 : tunables   24   12    8 : slabdata    157    609    152

real    1m43.876s
user    3m39.728s
sys 7m36.342s
====================

Link: https://lore.kernel.org/r/20220208045038.2635826-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Feb 9, 2022
2 parents b2309a7 + ee40324 commit 4caaf75
Show file tree
Hide file tree
Showing 11 changed files with 172 additions and 113 deletions.
27 changes: 19 additions & 8 deletions drivers/net/bonding/bond_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -6048,27 +6048,38 @@ static int __net_init bond_net_init(struct net *net)
return 0;
}

static void __net_exit bond_net_exit(struct net *net)
static void __net_exit bond_net_exit_batch(struct list_head *net_list)
{
struct bond_net *bn = net_generic(net, bond_net_id);
struct bonding *bond, *tmp_bond;
struct bond_net *bn;
struct net *net;
LIST_HEAD(list);

bond_destroy_sysfs(bn);
list_for_each_entry(net, net_list, exit_list) {
bn = net_generic(net, bond_net_id);
bond_destroy_sysfs(bn);
}

/* Kill off any bonds created after unregistering bond rtnl ops */
rtnl_lock();
list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
unregister_netdevice_queue(bond->dev, &list);
list_for_each_entry(net, net_list, exit_list) {
struct bonding *bond, *tmp_bond;

bn = net_generic(net, bond_net_id);
list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
unregister_netdevice_queue(bond->dev, &list);
}
unregister_netdevice_many(&list);
rtnl_unlock();

bond_destroy_proc_dir(bn);
list_for_each_entry(net, net_list, exit_list) {
bn = net_generic(net, bond_net_id);
bond_destroy_proc_dir(bn);
}
}

static struct pernet_operations bond_net_ops = {
.init = bond_net_init,
.exit = bond_net_exit,
.exit_batch = bond_net_exit_batch,
.id = &bond_net_id,
.size = sizeof(struct bond_net),
};
Expand Down
1 change: 0 additions & 1 deletion drivers/net/bonding/bond_procfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,6 @@ void __net_init bond_create_proc_dir(struct bond_net *bn)
}

/* Destroy the bonding directory under /proc/net, if empty.
* Caller must hold rtnl_lock.
*/
void __net_exit bond_destroy_proc_dir(struct bond_net *bn)
{
Expand Down
5 changes: 5 additions & 0 deletions include/net/netns/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ struct netns_ipv6 {
struct sock *tcp_sk;
struct sock *igmp_sk;
struct sock *mc_autojoin_sk;

struct hlist_head *inet6_addr_lst;
spinlock_t addrconf_hash_lock;
struct delayed_work addr_chk_work;

#ifdef CONFIG_IPV6_MROUTE
#ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
struct mr_table *mrt6;
Expand Down
9 changes: 6 additions & 3 deletions net/can/gw.c
Original file line number Diff line number Diff line change
Expand Up @@ -1239,16 +1239,19 @@ static int __net_init cangw_pernet_init(struct net *net)
return 0;
}

static void __net_exit cangw_pernet_exit(struct net *net)
static void __net_exit cangw_pernet_exit_batch(struct list_head *net_list)
{
struct net *net;

rtnl_lock();
cgw_remove_all_jobs(net);
list_for_each_entry(net, net_list, exit_list)
cgw_remove_all_jobs(net);
rtnl_unlock();
}

static struct pernet_operations cangw_pernet_ops = {
.init = cangw_pernet_init,
.exit = cangw_pernet_exit,
.exit_batch = cangw_pernet_exit_batch,
};

static __init int cgw_module_init(void)
Expand Down
22 changes: 14 additions & 8 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -10850,14 +10850,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
.exit = netdev_exit,
};

static void __net_exit default_device_exit(struct net *net)
static void __net_exit default_device_exit_net(struct net *net)
{
struct net_device *dev, *aux;
/*
* Push all migratable network devices back to the
* initial network namespace
*/
rtnl_lock();
ASSERT_RTNL();
for_each_netdev_safe(net, dev, aux) {
int err;
char fb_name[IFNAMSIZ];
Expand All @@ -10881,22 +10881,22 @@ static void __net_exit default_device_exit(struct net *net)
BUG();
}
}
rtnl_unlock();
}

static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
{
/* Return with the rtnl_lock held when there are no network
/* Return (with the rtnl_lock held) when there are no network
* devices unregistering in any network namespace in net_list.
*/
struct net *net;
bool unregistering;
DEFINE_WAIT_FUNC(wait, woken_wake_function);
bool unregistering;
struct net *net;

ASSERT_RTNL();
add_wait_queue(&netdev_unregistering_wq, &wait);
for (;;) {
unregistering = false;
rtnl_lock();

list_for_each_entry(net, net_list, exit_list) {
if (net->dev_unreg_count > 0) {
unregistering = true;
Expand All @@ -10908,6 +10908,7 @@ static void __net_exit rtnl_lock_unregistering(struct list_head *net_list)
__rtnl_unlock();

wait_woken(&wait, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
rtnl_lock();
}
remove_wait_queue(&netdev_unregistering_wq, &wait);
}
Expand All @@ -10923,6 +10924,11 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
struct net *net;
LIST_HEAD(dev_kill_list);

rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
default_device_exit_net(net);
cond_resched();
}
/* To prevent network device cleanup code from dereferencing
* loopback devices or network devices that have been freed
* wait here for all pending unregistrations to complete,
Expand All @@ -10935,6 +10941,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
* default_device_exit_batch.
*/
rtnl_lock_unregistering(net_list);

list_for_each_entry(net, net_list, exit_list) {
for_each_netdev_reverse(net, dev) {
if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink)
Expand All @@ -10948,7 +10955,6 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
}

static struct pernet_operations __net_initdata default_device_ops = {
.exit = default_device_exit,
.exit_batch = default_device_exit_batch,
};

Expand Down
19 changes: 16 additions & 3 deletions net/ipv4/fib_frontend.c
Original file line number Diff line number Diff line change
Expand Up @@ -1556,7 +1556,7 @@ static void ip_fib_net_exit(struct net *net)
{
int i;

rtnl_lock();
ASSERT_RTNL();
#ifdef CONFIG_IP_MULTIPLE_TABLES
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL);
Expand All @@ -1581,7 +1581,7 @@ static void ip_fib_net_exit(struct net *net)
#ifdef CONFIG_IP_MULTIPLE_TABLES
fib4_rules_exit(net);
#endif
rtnl_unlock();

kfree(net->ipv4.fib_table_hash);
fib4_notifier_exit(net);
}
Expand All @@ -1608,20 +1608,33 @@ static int __net_init fib_net_init(struct net *net)
out_proc:
nl_fib_lookup_exit(net);
out_nlfl:
rtnl_lock();
ip_fib_net_exit(net);
rtnl_unlock();
goto out;
}

static void __net_exit fib_net_exit(struct net *net)
{
fib_proc_exit(net);
nl_fib_lookup_exit(net);
ip_fib_net_exit(net);
}

static void __net_exit fib_net_exit_batch(struct list_head *net_list)
{
struct net *net;

rtnl_lock();
list_for_each_entry(net, net_list, exit_list)
ip_fib_net_exit(net);

rtnl_unlock();
}

static struct pernet_operations fib_net_ops = {
.init = fib_net_init,
.exit = fib_net_exit,
.exit_batch = fib_net_exit_batch,
};

void __init ip_fib_init(void)
Expand Down
20 changes: 15 additions & 5 deletions net/ipv4/ipmr.c
Original file line number Diff line number Diff line change
Expand Up @@ -266,13 +266,12 @@ static void __net_exit ipmr_rules_exit(struct net *net)
{
struct mr_table *mrt, *next;

rtnl_lock();
ASSERT_RTNL();
list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
list_del(&mrt->list);
ipmr_free_table(mrt);
}
fib_rules_unregister(net->ipv4.mr_rules_ops);
rtnl_unlock();
}

static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
Expand Down Expand Up @@ -328,10 +327,9 @@ static int __net_init ipmr_rules_init(struct net *net)

static void __net_exit ipmr_rules_exit(struct net *net)
{
rtnl_lock();
ASSERT_RTNL();
ipmr_free_table(net->ipv4.mrt);
net->ipv4.mrt = NULL;
rtnl_unlock();
}

static int ipmr_rules_dump(struct net *net, struct notifier_block *nb,
Expand Down Expand Up @@ -3075,7 +3073,9 @@ static int __net_init ipmr_net_init(struct net *net)
proc_cache_fail:
remove_proc_entry("ip_mr_vif", net->proc_net);
proc_vif_fail:
rtnl_lock();
ipmr_rules_exit(net);
rtnl_unlock();
#endif
ipmr_rules_fail:
ipmr_notifier_exit(net);
Expand All @@ -3090,12 +3090,22 @@ static void __net_exit ipmr_net_exit(struct net *net)
remove_proc_entry("ip_mr_vif", net->proc_net);
#endif
ipmr_notifier_exit(net);
ipmr_rules_exit(net);
}

static void __net_exit ipmr_net_exit_batch(struct list_head *net_list)
{
struct net *net;

rtnl_lock();
list_for_each_entry(net, net_list, exit_list)
ipmr_rules_exit(net);
rtnl_unlock();
}

static struct pernet_operations ipmr_net_ops = {
.init = ipmr_net_init,
.exit = ipmr_net_exit,
.exit_batch = ipmr_net_exit_batch,
};

int __init ip_mr_init(void)
Expand Down
12 changes: 8 additions & 4 deletions net/ipv4/nexthop.c
Original file line number Diff line number Diff line change
Expand Up @@ -3733,12 +3733,16 @@ void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets,
}
EXPORT_SYMBOL(nexthop_res_grp_activity_update);

static void __net_exit nexthop_net_exit(struct net *net)
static void __net_exit nexthop_net_exit_batch(struct list_head *net_list)
{
struct net *net;

rtnl_lock();
flush_all_nexthops(net);
list_for_each_entry(net, net_list, exit_list) {
flush_all_nexthops(net);
kfree(net->nexthop.devhash);
}
rtnl_unlock();
kfree(net->nexthop.devhash);
}

static int __net_init nexthop_net_init(struct net *net)
Expand All @@ -3756,7 +3760,7 @@ static int __net_init nexthop_net_init(struct net *net)

static struct pernet_operations nexthop_net_ops = {
.init = nexthop_net_init,
.exit = nexthop_net_exit,
.exit_batch = nexthop_net_exit_batch,
};

static int __init nexthop_init(void)
Expand Down
Loading

0 comments on commit 4caaf75

Please sign in to comment.