Skip to content

Commit

Permalink
Merge branch 'net-more-factorization-in-cleanup_net-paths'
Browse files Browse the repository at this point in the history
Eric Dumazet says:

====================
net: more factorization in cleanup_net() paths

This series is inspired by recent syzbot reports hinting to RTNL and
workqueue abuses.

rtnl_lock() is unfair to (single threaded) cleanup_net(), because
many threads can cause contention on it.

This series adds a new (struct pernet_operations) method,
so that cleanup_net() can hold RTNL longer once it finally
acquires it.

It also factorizes unregister_netdevice_many(), to further
reduce stalls in cleanup_net().

Link: https://lore.kernel.org/netdev/CANn89iLJrrJs+6Vc==Un4rVKcpV0Eof4F_4w1_wQGxUCE2FWAg@mail.gmail.com/T/#u
https://lore.kernel.org/netdev/170688415193.5216.10499830272732622816@kwain/
====================

Link: https://lore.kernel.org/r/20240206144313.2050392-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Feb 8, 2024
2 parents a1e55f5 + 8962dac commit b6b6145
Show file tree
Hide file tree
Showing 20 changed files with 190 additions and 157 deletions.
13 changes: 4 additions & 9 deletions drivers/net/bareudp.c
Original file line number Diff line number Diff line change
Expand Up @@ -760,23 +760,18 @@ static void bareudp_destroy_tunnels(struct net *net, struct list_head *head)
unregister_netdevice_queue(bareudp->dev, head);
}

static void __net_exit bareudp_exit_batch_net(struct list_head *net_list)
static void __net_exit bareudp_exit_batch_rtnl(struct list_head *net_list,
struct list_head *dev_kill_list)
{
struct net *net;
LIST_HEAD(list);

rtnl_lock();
list_for_each_entry(net, net_list, exit_list)
bareudp_destroy_tunnels(net, &list);

/* unregister the devices gathered above */
unregister_netdevice_many(&list);
rtnl_unlock();
bareudp_destroy_tunnels(net, dev_kill_list);
}

static struct pernet_operations bareudp_net_ops = {
.init = bareudp_init_net,
.exit_batch = bareudp_exit_batch_net,
.exit_batch_rtnl = bareudp_exit_batch_rtnl,
.id = &bareudp_net_id,
.size = sizeof(struct bareudp_net),
};
Expand Down
37 changes: 26 additions & 11 deletions drivers/net/bonding/bond_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -6416,28 +6416,41 @@ static int __net_init bond_net_init(struct net *net)
return 0;
}

static void __net_exit bond_net_exit_batch(struct list_head *net_list)
/* According to commit 69b0216ac255 ("bonding: fix bonding_masters
* race condition in bond unloading") we need to remove sysfs files
* before we remove our devices (done later in bond_net_exit_batch_rtnl())
*/
static void __net_exit bond_net_pre_exit(struct net *net)
{
struct bond_net *bn = net_generic(net, bond_net_id);

bond_destroy_sysfs(bn);
}

static void __net_exit bond_net_exit_batch_rtnl(struct list_head *net_list,
struct list_head *dev_kill_list)
{
struct bond_net *bn;
struct net *net;
LIST_HEAD(list);

list_for_each_entry(net, net_list, exit_list) {
bn = net_generic(net, bond_net_id);
bond_destroy_sysfs(bn);
}

/* Kill off any bonds created after unregistering bond rtnl ops */
rtnl_lock();
list_for_each_entry(net, net_list, exit_list) {
struct bonding *bond, *tmp_bond;

bn = net_generic(net, bond_net_id);
list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
unregister_netdevice_queue(bond->dev, &list);
unregister_netdevice_queue(bond->dev, dev_kill_list);
}
unregister_netdevice_many(&list);
rtnl_unlock();
}

/* According to commit 23fa5c2caae0 ("bonding: destroy proc directory
* only after all bonds are gone") bond_destroy_proc_dir() is called
* after bond_net_exit_batch_rtnl() has completed.
*/
static void __net_exit bond_net_exit_batch(struct list_head *net_list)
{
struct bond_net *bn;
struct net *net;

list_for_each_entry(net, net_list, exit_list) {
bn = net_generic(net, bond_net_id);
Expand All @@ -6447,6 +6460,8 @@ static void __net_exit bond_net_exit_batch(struct list_head *net_list)

static struct pernet_operations bond_net_ops = {
.init = bond_net_init,
.pre_exit = bond_net_pre_exit,
.exit_batch_rtnl = bond_net_exit_batch_rtnl,
.exit_batch = bond_net_exit_batch,
.id = &bond_net_id,
.size = sizeof(struct bond_net),
Expand Down
23 changes: 10 additions & 13 deletions drivers/net/geneve.c
Original file line number Diff line number Diff line change
Expand Up @@ -1900,29 +1900,26 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
}
}

static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
static void __net_exit geneve_exit_batch_rtnl(struct list_head *net_list,
struct list_head *dev_to_kill)
{
struct net *net;
LIST_HEAD(list);

rtnl_lock();
list_for_each_entry(net, net_list, exit_list)
geneve_destroy_tunnels(net, &list);

/* unregister the devices gathered above */
unregister_netdevice_many(&list);
rtnl_unlock();
geneve_destroy_tunnels(net, dev_to_kill);
}

list_for_each_entry(net, net_list, exit_list) {
const struct geneve_net *gn = net_generic(net, geneve_net_id);
static void __net_exit geneve_exit_net(struct net *net)
{
const struct geneve_net *gn = net_generic(net, geneve_net_id);

WARN_ON_ONCE(!list_empty(&gn->sock_list));
}
WARN_ON_ONCE(!list_empty(&gn->sock_list));
}

static struct pernet_operations geneve_net_ops = {
.init = geneve_init_net,
.exit_batch = geneve_exit_batch_net,
.exit_batch_rtnl = geneve_exit_batch_rtnl,
.exit = geneve_exit_net,
.id = &geneve_net_id,
.size = sizeof(struct geneve_net),
};
Expand Down
20 changes: 10 additions & 10 deletions drivers/net/gtp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1876,23 +1876,23 @@ static int __net_init gtp_net_init(struct net *net)
return 0;
}

static void __net_exit gtp_net_exit(struct net *net)
static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list,
struct list_head *dev_to_kill)
{
struct gtp_net *gn = net_generic(net, gtp_net_id);
struct gtp_dev *gtp;
LIST_HEAD(list);
struct net *net;

rtnl_lock();
list_for_each_entry(gtp, &gn->gtp_dev_list, list)
gtp_dellink(gtp->dev, &list);
list_for_each_entry(net, net_list, exit_list) {
struct gtp_net *gn = net_generic(net, gtp_net_id);
struct gtp_dev *gtp;

unregister_netdevice_many(&list);
rtnl_unlock();
list_for_each_entry(gtp, &gn->gtp_dev_list, list)
gtp_dellink(gtp->dev, dev_to_kill);
}
}

static struct pernet_operations gtp_net_ops = {
.init = gtp_net_init,
.exit = gtp_net_exit,
.exit_batch_rtnl = gtp_net_exit_batch_rtnl,
.id = &gtp_net_id,
.size = sizeof(struct gtp_net),
};
Expand Down
50 changes: 19 additions & 31 deletions drivers/net/vxlan/vxlan_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -4826,55 +4826,43 @@ static __net_init int vxlan_init_net(struct net *net)
NULL);
}

static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
static void __net_exit vxlan_destroy_tunnels(struct vxlan_net *vn,
struct list_head *dev_to_kill)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_dev *vxlan, *next;
struct net_device *dev, *aux;

for_each_netdev_safe(net, dev, aux)
if (dev->rtnl_link_ops == &vxlan_link_ops)
unregister_netdevice_queue(dev, head);

list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) {
/* If vxlan->dev is in the same netns, it has already been added
* to the list by the previous loop.
*/
if (!net_eq(dev_net(vxlan->dev), net))
unregister_netdevice_queue(vxlan->dev, head);
}

list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next)
vxlan_dellink(vxlan->dev, dev_to_kill);
}

static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
static void __net_exit vxlan_exit_batch_rtnl(struct list_head *net_list,
struct list_head *dev_to_kill)
{
struct net *net;
LIST_HEAD(list);
unsigned int h;

ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list) {
struct vxlan_net *vn = net_generic(net, vxlan_net_id);

unregister_nexthop_notifier(net, &vn->nexthop_notifier_block);
}
rtnl_lock();
list_for_each_entry(net, net_list, exit_list)
vxlan_destroy_tunnels(net, &list);
__unregister_nexthop_notifier(net, &vn->nexthop_notifier_block);

unregister_netdevice_many(&list);
rtnl_unlock();
vxlan_destroy_tunnels(vn, dev_to_kill);
}
}

list_for_each_entry(net, net_list, exit_list) {
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
static void __net_exit vxlan_exit_net(struct net *net)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
unsigned int h;

for (h = 0; h < PORT_HASH_SIZE; ++h)
WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
}
for (h = 0; h < PORT_HASH_SIZE; ++h)
WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
}

static struct pernet_operations vxlan_net_ops = {
.init = vxlan_init_net,
.exit_batch = vxlan_exit_batch_net,
.exit_batch_rtnl = vxlan_exit_batch_rtnl,
.exit = vxlan_exit_net,
.id = &vxlan_net_id,
.size = sizeof(struct vxlan_net),
};
Expand Down
3 changes: 2 additions & 1 deletion include/net/ip_tunnels.h
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,8 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname);

void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id,
struct rtnl_link_ops *ops);
struct rtnl_link_ops *ops,
struct list_head *dev_to_kill);

void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, const u8 protocol);
Expand Down
3 changes: 3 additions & 0 deletions include/net/net_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,9 @@ struct pernet_operations {
void (*pre_exit)(struct net *net);
void (*exit)(struct net *net);
void (*exit_batch)(struct list_head *net_exit_list);
/* Following method is called with RTNL held. */
void (*exit_batch_rtnl)(struct list_head *net_exit_list,
struct list_head *dev_kill_list);
unsigned int *id;
size_t size;
};
Expand Down
1 change: 1 addition & 0 deletions include/net/nexthop.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ struct nh_notifier_info {

int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
struct netlink_ext_ack *extack);
int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap);
void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index,
Expand Down
15 changes: 5 additions & 10 deletions net/bridge/br.c
Original file line number Diff line number Diff line change
Expand Up @@ -356,26 +356,21 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on)
clear_bit(opt, &br->options);
}

static void __net_exit br_net_exit_batch(struct list_head *net_list)
static void __net_exit br_net_exit_batch_rtnl(struct list_head *net_list,
struct list_head *dev_to_kill)
{
struct net_device *dev;
struct net *net;
LIST_HEAD(list);

rtnl_lock();

ASSERT_RTNL();
list_for_each_entry(net, net_list, exit_list)
for_each_netdev(net, dev)
if (netif_is_bridge_master(dev))
br_dev_delete(dev, &list);

unregister_netdevice_many(&list);

rtnl_unlock();
br_dev_delete(dev, dev_to_kill);
}

static struct pernet_operations br_net_ops = {
.exit_batch = br_net_exit_batch,
.exit_batch_rtnl = br_net_exit_batch_rtnl,
};

static const struct stp_proto br_stp_proto = {
Expand Down
31 changes: 30 additions & 1 deletion net/core/net_namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -318,8 +318,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
/* Must be called with pernet_ops_rwsem held */
const struct pernet_operations *ops, *saved_ops;
int error = 0;
LIST_HEAD(net_exit_list);
LIST_HEAD(dev_kill_list);
int error = 0;

refcount_set(&net->ns.count, 1);
ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
Expand Down Expand Up @@ -357,6 +358,15 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)

synchronize_rcu();

ops = saved_ops;
rtnl_lock();
list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
if (ops->exit_batch_rtnl)
ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
}
unregister_netdevice_many(&dev_kill_list);
rtnl_unlock();

ops = saved_ops;
list_for_each_entry_continue_reverse(ops, &pernet_list, list)
ops_exit_list(ops, &net_exit_list);
Expand Down Expand Up @@ -573,6 +583,7 @@ static void cleanup_net(struct work_struct *work)
struct net *net, *tmp, *last;
struct llist_node *net_kill_list;
LIST_HEAD(net_exit_list);
LIST_HEAD(dev_kill_list);

/* Atomically snapshot the list of namespaces to cleanup */
net_kill_list = llist_del_all(&cleanup_list);
Expand Down Expand Up @@ -613,6 +624,14 @@ static void cleanup_net(struct work_struct *work)
*/
synchronize_rcu();

rtnl_lock();
list_for_each_entry_reverse(ops, &pernet_list, list) {
if (ops->exit_batch_rtnl)
ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list);
}
unregister_netdevice_many(&dev_kill_list);
rtnl_unlock();

/* Run all of the network namespace exit methods */
list_for_each_entry_reverse(ops, &pernet_list, list)
ops_exit_list(ops, &net_exit_list);
Expand Down Expand Up @@ -1193,7 +1212,17 @@ static void free_exit_list(struct pernet_operations *ops, struct list_head *net_
{
ops_pre_exit_list(ops, net_exit_list);
synchronize_rcu();

if (ops->exit_batch_rtnl) {
LIST_HEAD(dev_kill_list);

rtnl_lock();
ops->exit_batch_rtnl(net_exit_list, &dev_kill_list);
unregister_netdevice_many(&dev_kill_list);
rtnl_unlock();
}
ops_exit_list(ops, net_exit_list);

ops_free_list(ops, net_exit_list);
}

Expand Down
Loading

0 comments on commit b6b6145

Please sign in to comment.