From fd4f101edbd9f99567ab2adb1f2169579ede7c13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:42:57 +0000 Subject: [PATCH 01/15] net: add exit_batch_rtnl() method Many (struct pernet_operations)->exit_batch() methods have to acquire rtnl. In presence of rtnl mutex pressure, this makes cleanup_net() very slow. This patch adds a new exit_batch_rtnl() method to reduce number of rtnl acquisitions from cleanup_net(). exit_batch_rtnl() handlers are called while rtnl is locked, and devices to be killed can be queued in a list provided as their second argument. A single unregister_netdevice_many() is called right before rtnl is released. exit_batch_rtnl() handlers are called before ->exit() and ->exit_batch() handlers. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/net_namespace.h | 3 +++ net/core/net_namespace.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index cd0c2eedbb5e9..20c34bd7a0778 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -448,6 +448,9 @@ struct pernet_operations { void (*pre_exit)(struct net *net); void (*exit)(struct net *net); void (*exit_batch)(struct list_head *net_exit_list); + /* Following method is called with RTNL held. */ + void (*exit_batch_rtnl)(struct list_head *net_exit_list, + struct list_head *dev_kill_list); unsigned int *id; size_t size; }; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 72799533426b6..233ec0cdd0111 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -318,8 +318,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { /* Must be called with pernet_ops_rwsem held */ const struct pernet_operations *ops, *saved_ops; - int error = 0; LIST_HEAD(net_exit_list); + LIST_HEAD(dev_kill_list); + int error = 0; refcount_set(&net->ns.count, 1); ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt"); @@ -357,6 +358,15 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) synchronize_rcu(); + ops = saved_ops; + rtnl_lock(); + list_for_each_entry_continue_reverse(ops, &pernet_list, list) { + if (ops->exit_batch_rtnl) + ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); + } + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + ops = saved_ops; list_for_each_entry_continue_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); @@ -573,6 +583,7 @@ static void cleanup_net(struct work_struct *work) struct net *net, *tmp, *last; struct llist_node *net_kill_list; LIST_HEAD(net_exit_list); + LIST_HEAD(dev_kill_list); /* Atomically snapshot the list of namespaces to cleanup */ net_kill_list = llist_del_all(&cleanup_list); @@ -613,6 +624,14 @@ static void cleanup_net(struct work_struct *work) */ synchronize_rcu(); + rtnl_lock(); + list_for_each_entry_reverse(ops, &pernet_list, list) { + if (ops->exit_batch_rtnl) + ops->exit_batch_rtnl(&net_exit_list, &dev_kill_list); + } + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + /* Run all of the network namespace exit methods */ list_for_each_entry_reverse(ops, &pernet_list, list) ops_exit_list(ops, &net_exit_list); @@ -1193,7 +1212,17 @@ static void free_exit_list(struct pernet_operations *ops, struct list_head *net_ { ops_pre_exit_list(ops, net_exit_list); synchronize_rcu(); + + if (ops->exit_batch_rtnl) { + LIST_HEAD(dev_kill_list); + + rtnl_lock(); + ops->exit_batch_rtnl(net_exit_list, &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); + } ops_exit_list(ops, net_exit_list); + ops_free_list(ops, net_exit_list); } From a7ec2512ad7b23340059f59f3fd710cab056791a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:42:59 +0000 Subject: [PATCH 02/15] nexthop: convert nexthop_net_exit_batch to exit_batch_rtnl method exit_batch_rtnl() is called while RTNL is held. This saves one rtnl_lock()/rtnl_unlock() pair. We also need to create nexthop_net_exit() to make sure net->nexthop.devhash is not freed too soon, otherwise we will not be able to unregister netdev from exit_batch_rtnl() methods. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-4-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/nexthop.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index bbff68b5b5d4a..7270a8631406c 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3737,16 +3737,20 @@ void nexthop_res_grp_activity_update(struct net *net, u32 id, u16 num_buckets, } EXPORT_SYMBOL(nexthop_res_grp_activity_update); -static void __net_exit nexthop_net_exit_batch(struct list_head *net_list) +static void __net_exit nexthop_net_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net *net; - rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) { + ASSERT_RTNL(); + list_for_each_entry(net, net_list, exit_list) flush_all_nexthops(net); - kfree(net->nexthop.devhash); - } - rtnl_unlock(); +} + +static void __net_exit nexthop_net_exit(struct net *net) +{ + kfree(net->nexthop.devhash); + net->nexthop.devhash = NULL; } static int __net_init nexthop_net_init(struct net *net) @@ -3764,7 +3768,8 @@ static int __net_init nexthop_net_init(struct net *net) static struct pernet_operations nexthop_net_ops = { .init = nexthop_net_init, - .exit_batch = nexthop_net_exit_batch, + .exit = nexthop_net_exit, + .exit_batch_rtnl = nexthop_net_exit_batch_rtnl, }; static int __init nexthop_init(void) From 422b5ae9c5e507f913118d086431c46022aa50c2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:00 +0000 Subject: [PATCH 03/15] bareudp: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair, and one unregister_netdevice_many() call. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-5-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/bareudp.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 31377bb1cc97c..4db6122c9b430 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -760,23 +760,18 @@ static void bareudp_destroy_tunnels(struct net *net, struct list_head *head) unregister_netdevice_queue(bareudp->dev, head); } -static void __net_exit bareudp_exit_batch_net(struct list_head *net_list) +static void __net_exit bareudp_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_kill_list) { struct net *net; - LIST_HEAD(list); - rtnl_lock(); list_for_each_entry(net, net_list, exit_list) - bareudp_destroy_tunnels(net, &list); - - /* unregister the devices gathered above */ - unregister_netdevice_many(&list); - rtnl_unlock(); + bareudp_destroy_tunnels(net, dev_kill_list); } static struct pernet_operations bareudp_net_ops = { .init = bareudp_init_net, - .exit_batch = bareudp_exit_batch_net, + .exit_batch_rtnl = bareudp_exit_batch_rtnl, .id = &bareudp_net_id, .size = sizeof(struct bareudp_net), }; From 669966bc94d82e614d894899328486613769f0c6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:01 +0000 Subject: [PATCH 04/15] bonding: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair, and one unregister_netdevice_many() call. v2: Added bond_net_pre_exit() method to make sure bond_destroy_sysfs() is called before we unregister the devices in bond_net_exit_batch_rtnl (Antoine Tenart : https://lore.kernel.org/netdev/170688415193.5216.10499830272732622816@kwain/) Signed-off-by: Eric Dumazet Acked-by: Jay Vosburgh Cc: Andy Gospodarek Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-6-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/bonding/bond_main.c | 37 +++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ae9d32c0faf40..cb67ece47328c 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -6416,28 +6416,41 @@ static int __net_init bond_net_init(struct net *net) return 0; } -static void __net_exit bond_net_exit_batch(struct list_head *net_list) +/* According to commit 69b0216ac255 ("bonding: fix bonding_masters + * race condition in bond unloading") we need to remove sysfs files + * before we remove our devices (done later in bond_net_exit_batch_rtnl()) + */ +static void __net_exit bond_net_pre_exit(struct net *net) +{ + struct bond_net *bn = net_generic(net, bond_net_id); + + bond_destroy_sysfs(bn); +} + +static void __net_exit bond_net_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_kill_list) { struct bond_net *bn; struct net *net; - LIST_HEAD(list); - - list_for_each_entry(net, net_list, exit_list) { - bn = net_generic(net, bond_net_id); - bond_destroy_sysfs(bn); - } /* Kill off any bonds created after unregistering bond rtnl ops */ - rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { struct bonding *bond, *tmp_bond; bn = net_generic(net, bond_net_id); list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list) - unregister_netdevice_queue(bond->dev, &list); + unregister_netdevice_queue(bond->dev, dev_kill_list); } - unregister_netdevice_many(&list); - rtnl_unlock(); +} + +/* According to commit 23fa5c2caae0 ("bonding: destroy proc directory + * only after all bonds are gone") bond_destroy_proc_dir() is called + * after bond_net_exit_batch_rtnl() has completed. + */ +static void __net_exit bond_net_exit_batch(struct list_head *net_list) +{ + struct bond_net *bn; + struct net *net; list_for_each_entry(net, net_list, exit_list) { bn = net_generic(net, bond_net_id); @@ -6447,6 +6460,8 @@ static void __net_exit bond_net_exit_batch(struct list_head *net_list) static struct pernet_operations bond_net_ops = { .init = bond_net_init, + .pre_exit = bond_net_pre_exit, + .exit_batch_rtnl = bond_net_exit_batch_rtnl, .exit_batch = bond_net_exit_batch, .id = &bond_net_id, .size = sizeof(struct bond_net), From f4b57b9dc96bbdb5beb2c93619c7904e4909b366 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:02 +0000 Subject: [PATCH 05/15] geneve: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair, and one unregister_netdevice_many() call. Note: it should be possible to remove the synchronize_net() call from geneve_sock_release() in a future patch. v4: move WARN_ON_ONCE(!list_empty(&gn->sock_list)) into geneve_exit_net(), after devices have been unregistered. (Antoine Tenart feedback) Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-7-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/geneve.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 32c51c244153b..23e97c2e4f6fc 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1900,29 +1900,26 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head) } } -static void __net_exit geneve_exit_batch_net(struct list_head *net_list) +static void __net_exit geneve_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net *net; - LIST_HEAD(list); - rtnl_lock(); list_for_each_entry(net, net_list, exit_list) - geneve_destroy_tunnels(net, &list); - - /* unregister the devices gathered above */ - unregister_netdevice_many(&list); - rtnl_unlock(); + geneve_destroy_tunnels(net, dev_to_kill); +} - list_for_each_entry(net, net_list, exit_list) { - const struct geneve_net *gn = net_generic(net, geneve_net_id); +static void __net_exit geneve_exit_net(struct net *net) +{ + const struct geneve_net *gn = net_generic(net, geneve_net_id); - WARN_ON_ONCE(!list_empty(&gn->sock_list)); - } + WARN_ON_ONCE(!list_empty(&gn->sock_list)); } static struct pernet_operations geneve_net_ops = { .init = geneve_init_net, - .exit_batch = geneve_exit_batch_net, + .exit_batch_rtnl = geneve_exit_batch_rtnl, + .exit = geneve_exit_net, .id = &geneve_net_id, .size = sizeof(struct geneve_net), }; From 6eedda01b2bfdcf427b37759e053dc27232f3af1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:03 +0000 Subject: [PATCH 06/15] gtp: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair per netns and one unregister_netdevice_many() call per netns. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-8-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/gtp.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index b1919278e931f..62c601d9f7528 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1876,23 +1876,23 @@ static int __net_init gtp_net_init(struct net *net) return 0; } -static void __net_exit gtp_net_exit(struct net *net) +static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { - struct gtp_net *gn = net_generic(net, gtp_net_id); - struct gtp_dev *gtp; - LIST_HEAD(list); + struct net *net; - rtnl_lock(); - list_for_each_entry(gtp, &gn->gtp_dev_list, list) - gtp_dellink(gtp->dev, &list); + list_for_each_entry(net, net_list, exit_list) { + struct gtp_net *gn = net_generic(net, gtp_net_id); + struct gtp_dev *gtp; - unregister_netdevice_many(&list); - rtnl_unlock(); + list_for_each_entry(gtp, &gn->gtp_dev_list, list) + gtp_dellink(gtp->dev, dev_to_kill); + } } static struct pernet_operations gtp_net_ops = { .init = gtp_net_init, - .exit = gtp_net_exit, + .exit_batch_rtnl = gtp_net_exit_batch_rtnl, .id = >p_net_id, .size = sizeof(struct gtp_net), }; From 70f16ea2e4f673fc769fd13c00c20a32b4fe238a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:04 +0000 Subject: [PATCH 07/15] ipv4: add __unregister_nexthop_notifier() unregister_nexthop_notifier() assumes the caller does not hold rtnl. We need in the following patch to use it from a context already holding rtnl. Add __unregister_nexthop_notifier(). unregister_nexthop_notifier() becomes a wrapper. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-9-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/nexthop.h | 1 + net/ipv4/nexthop.c | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/include/net/nexthop.h b/include/net/nexthop.h index d92046a4a0782..6647ad509faa0 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -218,6 +218,7 @@ struct nh_notifier_info { int register_nexthop_notifier(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack); +int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb); void nexthop_set_hw_flags(struct net *net, u32 id, bool offload, bool trap); void nexthop_bucket_set_hw_flags(struct net *net, u32 id, u16 bucket_index, diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 7270a8631406c..70509da4f0806 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -3631,17 +3631,24 @@ int register_nexthop_notifier(struct net *net, struct notifier_block *nb, } EXPORT_SYMBOL(register_nexthop_notifier); -int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) +int __unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) { int err; - rtnl_lock(); err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain, nb); - if (err) - goto unlock; - nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL); -unlock: + if (!err) + nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL); + return err; +} +EXPORT_SYMBOL(__unregister_nexthop_notifier); + +int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) +{ + int err; + + rtnl_lock(); + err = __unregister_nexthop_notifier(net, nb); rtnl_unlock(); return err; } From 110d3047a3ec033de00322b1a8068b1215efa97a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:05 +0000 Subject: [PATCH 08/15] vxlan: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair per netns and one unregister_netdevice_many() call. v4: (Paolo feedback : https://netdev-3.bots.linux.dev/vmksft-net/results/453141/17-udpgro-fwd-sh/stdout ) - Changed vxlan_destroy_tunnels() to use vxlan_dellink() instead of unregister_netdevice_queue to propely remove devices from vn->vxlan_list. - vxlan_destroy_tunnels() can simply iterate one list (vn->vxlan_list) to find all devices in the most efficient way. - Moved sanity checks in a separate vxlan_exit_net() method. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-10-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/vxlan/vxlan_core.c | 50 +++++++++++++--------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 16106e088c630..11707647afb98 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -4826,55 +4826,43 @@ static __net_init int vxlan_init_net(struct net *net) NULL); } -static void vxlan_destroy_tunnels(struct net *net, struct list_head *head) +static void __net_exit vxlan_destroy_tunnels(struct vxlan_net *vn, + struct list_head *dev_to_kill) { - struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan, *next; - struct net_device *dev, *aux; - - for_each_netdev_safe(net, dev, aux) - if (dev->rtnl_link_ops == &vxlan_link_ops) - unregister_netdevice_queue(dev, head); - - list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) { - /* If vxlan->dev is in the same netns, it has already been added - * to the list by the previous loop. - */ - if (!net_eq(dev_net(vxlan->dev), net)) - unregister_netdevice_queue(vxlan->dev, head); - } + list_for_each_entry_safe(vxlan, next, &vn->vxlan_list, next) + vxlan_dellink(vxlan->dev, dev_to_kill); } -static void __net_exit vxlan_exit_batch_net(struct list_head *net_list) +static void __net_exit vxlan_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net *net; - LIST_HEAD(list); - unsigned int h; + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) { struct vxlan_net *vn = net_generic(net, vxlan_net_id); - unregister_nexthop_notifier(net, &vn->nexthop_notifier_block); - } - rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) - vxlan_destroy_tunnels(net, &list); + __unregister_nexthop_notifier(net, &vn->nexthop_notifier_block); - unregister_netdevice_many(&list); - rtnl_unlock(); + vxlan_destroy_tunnels(vn, dev_to_kill); + } +} - list_for_each_entry(net, net_list, exit_list) { - struct vxlan_net *vn = net_generic(net, vxlan_net_id); +static void __net_exit vxlan_exit_net(struct net *net) +{ + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + unsigned int h; - for (h = 0; h < PORT_HASH_SIZE; ++h) - WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h])); - } + for (h = 0; h < PORT_HASH_SIZE; ++h) + WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h])); } static struct pernet_operations vxlan_net_ops = { .init = vxlan_init_net, - .exit_batch = vxlan_exit_batch_net, + .exit_batch_rtnl = vxlan_exit_batch_rtnl, + .exit = vxlan_exit_net, .id = &vxlan_net_id, .size = sizeof(struct vxlan_net), }; From bc50c535c3a011605f4e0d219431a8e42249e71e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:06 +0000 Subject: [PATCH 09/15] ip6_gre: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair and one unregister_netdevice_many() call. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-11-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_gre.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 070d87abf7c02..428f03e9da45a 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1632,21 +1632,19 @@ static int __net_init ip6gre_init_net(struct net *net) return err; } -static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list) +static void __net_exit ip6gre_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) - ip6gre_destroy_tunnels(net, &list); - unregister_netdevice_many(&list); - rtnl_unlock(); + ip6gre_destroy_tunnels(net, dev_to_kill); } static struct pernet_operations ip6gre_net_ops = { .init = ip6gre_init_net, - .exit_batch = ip6gre_exit_batch_net, + .exit_batch_rtnl = ip6gre_exit_batch_rtnl, .id = &ip6gre_net_id, .size = sizeof(struct ip6gre_net), }; From a1fab9aff5c05589ece2893a8d312f16eb0ff5e6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:07 +0000 Subject: [PATCH 10/15] ip6_tunnel: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair and one unregister_netdevice_many() call. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-12-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_tunnel.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9bbabf750a21e..bfb0a6c601c11 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -2282,21 +2282,19 @@ static int __net_init ip6_tnl_init_net(struct net *net) return err; } -static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list) +static void __net_exit ip6_tnl_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) - ip6_tnl_destroy_tunnels(net, &list); - unregister_netdevice_many(&list); - rtnl_unlock(); + ip6_tnl_destroy_tunnels(net, dev_to_kill); } static struct pernet_operations ip6_tnl_net_ops = { .init = ip6_tnl_init_net, - .exit_batch = ip6_tnl_exit_batch_net, + .exit_batch_rtnl = ip6_tnl_exit_batch_rtnl, .id = &ip6_tnl_net_id, .size = sizeof(struct ip6_tnl_net), }; From 7a99f3c1994b2c3add235090d8721bfbb2b95320 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:08 +0000 Subject: [PATCH 11/15] ip6_vti: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair and one unregister_netdevice_many() call. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-13-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/ip6_vti.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index e550240c85e1c..cfe1b1ad4d85d 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -1174,24 +1174,22 @@ static int __net_init vti6_init_net(struct net *net) return err; } -static void __net_exit vti6_exit_batch_net(struct list_head *net_list) +static void __net_exit vti6_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct vti6_net *ip6n; struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) { ip6n = net_generic(net, vti6_net_id); - vti6_destroy_tunnels(ip6n, &list); + vti6_destroy_tunnels(ip6n, dev_to_kill); } - unregister_netdevice_many(&list); - rtnl_unlock(); } static struct pernet_operations vti6_net_ops = { .init = vti6_init_net, - .exit_batch = vti6_exit_batch_net, + .exit_batch_rtnl = vti6_exit_batch_rtnl, .id = &vti6_net_id, .size = sizeof(struct vti6_net), }; From de02deab27fd63063e056811d0473ad5b9a56e5e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:09 +0000 Subject: [PATCH 12/15] sit: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair and one unregister_netdevice_many() call. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-14-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv6/sit.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index cc24cefdb85c0..61b2b71fa8bed 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1875,22 +1875,19 @@ static int __net_init sit_init_net(struct net *net) return err; } -static void __net_exit sit_exit_batch_net(struct list_head *net_list) +static void __net_exit sit_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { - LIST_HEAD(list); struct net *net; - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) - sit_destroy_tunnels(net, &list); - - unregister_netdevice_many(&list); - rtnl_unlock(); + sit_destroy_tunnels(net, dev_to_kill); } static struct pernet_operations sit_net_ops = { .init = sit_init_net, - .exit_batch = sit_exit_batch_net, + .exit_batch_rtnl = sit_exit_batch_rtnl, .id = &sit_net_id, .size = sizeof(struct sit_net), }; From 9b5b36374ed6953f3efcc82e7cb4c353b9869faf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:10 +0000 Subject: [PATCH 13/15] ip_tunnel: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair and one unregister_netdevice_many() call. This patch takes care of ipip, ip_vti, and ip_gre tunnels. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-15-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/net/ip_tunnels.h | 3 ++- net/ipv4/ip_gre.c | 24 +++++++++++++++--------- net/ipv4/ip_tunnel.c | 10 ++++------ net/ipv4/ip_vti.c | 8 +++++--- net/ipv4/ipip.c | 8 +++++--- 5 files changed, 31 insertions(+), 22 deletions(-) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 2d746f4c9a0a4..5cd64bb2104df 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -284,7 +284,8 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id, struct rtnl_link_ops *ops, char *devname); void ip_tunnel_delete_nets(struct list_head *list_net, unsigned int id, - struct rtnl_link_ops *ops); + struct rtnl_link_ops *ops, + struct list_head *dev_to_kill); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5169c3c72cffe..aad5125b7a65e 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1025,14 +1025,16 @@ static int __net_init ipgre_init_net(struct net *net) return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); } -static void __net_exit ipgre_exit_batch_net(struct list_head *list_net) +static void __net_exit ipgre_exit_batch_rtnl(struct list_head *list_net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops); + ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops, + dev_to_kill); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, - .exit_batch = ipgre_exit_batch_net, + .exit_batch_rtnl = ipgre_exit_batch_rtnl, .id = &ipgre_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -1697,14 +1699,16 @@ static int __net_init ipgre_tap_init_net(struct net *net) return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); } -static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net) +static void __net_exit ipgre_tap_exit_batch_rtnl(struct list_head *list_net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops); + ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops, + dev_to_kill); } static struct pernet_operations ipgre_tap_net_ops = { .init = ipgre_tap_init_net, - .exit_batch = ipgre_tap_exit_batch_net, + .exit_batch_rtnl = ipgre_tap_exit_batch_rtnl, .id = &gre_tap_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -1715,14 +1719,16 @@ static int __net_init erspan_init_net(struct net *net) &erspan_link_ops, "erspan0"); } -static void __net_exit erspan_exit_batch_net(struct list_head *net_list) +static void __net_exit erspan_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops); + ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops, + dev_to_kill); } static struct pernet_operations erspan_net_ops = { .init = erspan_init_net, - .exit_batch = erspan_exit_batch_net, + .exit_batch_rtnl = erspan_exit_batch_rtnl, .id = &erspan_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index beeae624c412d..00da0b80320fb 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -1130,19 +1130,17 @@ static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn, } void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, - struct rtnl_link_ops *ops) + struct rtnl_link_ops *ops, + struct list_head *dev_to_kill) { struct ip_tunnel_net *itn; struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) { itn = net_generic(net, id); - ip_tunnel_destroy(net, itn, &list, ops); + ip_tunnel_destroy(net, itn, dev_to_kill, ops); } - unregister_netdevice_many(&list); - rtnl_unlock(); } EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets); diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 9ab9b3ebe0cd1..fb1f52d213112 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -510,14 +510,16 @@ static int __net_init vti_init_net(struct net *net) return 0; } -static void __net_exit vti_exit_batch_net(struct list_head *list_net) +static void __net_exit vti_exit_batch_rtnl(struct list_head *list_net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops); + ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops, + dev_to_kill); } static struct pernet_operations vti_net_ops = { .init = vti_init_net, - .exit_batch = vti_exit_batch_net, + .exit_batch_rtnl = vti_exit_batch_rtnl, .id = &vti_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 27b8f83c6ea20..0151eea06cc50 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -592,14 +592,16 @@ static int __net_init ipip_init_net(struct net *net) return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0"); } -static void __net_exit ipip_exit_batch_net(struct list_head *list_net) +static void __net_exit ipip_exit_batch_rtnl(struct list_head *list_net, + struct list_head *dev_to_kill) { - ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops); + ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops, + dev_to_kill); } static struct pernet_operations ipip_net_ops = { .init = ipip_init_net, - .exit_batch = ipip_exit_batch_net, + .exit_batch_rtnl = ipip_exit_batch_rtnl, .id = &ipip_net_id, .size = sizeof(struct ip_tunnel_net), }; From 806b67850787bae3df1bc2e5a30e8c658c579d80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:11 +0000 Subject: [PATCH 14/15] bridge: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair per netns and one unregister_netdevice_many() call. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-16-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/bridge/br.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/net/bridge/br.c b/net/bridge/br.c index ac19b797dbece..2cab878e0a39c 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -356,26 +356,21 @@ void br_opt_toggle(struct net_bridge *br, enum net_bridge_opts opt, bool on) clear_bit(opt, &br->options); } -static void __net_exit br_net_exit_batch(struct list_head *net_list) +static void __net_exit br_net_exit_batch_rtnl(struct list_head *net_list, + struct list_head *dev_to_kill) { struct net_device *dev; struct net *net; - LIST_HEAD(list); - - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_list, exit_list) for_each_netdev(net, dev) if (netif_is_bridge_master(dev)) - br_dev_delete(dev, &list); - - unregister_netdevice_many(&list); - - rtnl_unlock(); + br_dev_delete(dev, dev_to_kill); } static struct pernet_operations br_net_ops = { - .exit_batch = br_net_exit_batch, + .exit_batch_rtnl = br_net_exit_batch_rtnl, }; static const struct stp_proto br_stp_proto = { From 8962daccc2d32812fe24bd21496c036eb4f454b0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 6 Feb 2024 14:43:12 +0000 Subject: [PATCH 15/15] xfrm: interface: use exit_batch_rtnl() method exit_batch_rtnl() is called while RTNL is held, and devices to be unregistered can be queued in the dev_kill_list. This saves one rtnl_lock()/rtnl_unlock() pair per netns and one unregister_netdevice_many() call. Signed-off-by: Eric Dumazet Reviewed-by: Antoine Tenart Link: https://lore.kernel.org/r/20240206144313.2050392-17-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/xfrm/xfrm_interface_core.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c index 21d50d75c2608..dafefef3cf51a 100644 --- a/net/xfrm/xfrm_interface_core.c +++ b/net/xfrm/xfrm_interface_core.c @@ -957,12 +957,12 @@ static struct rtnl_link_ops xfrmi_link_ops __read_mostly = { .get_link_net = xfrmi_get_link_net, }; -static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list) +static void __net_exit xfrmi_exit_batch_rtnl(struct list_head *net_exit_list, + struct list_head *dev_to_kill) { struct net *net; - LIST_HEAD(list); - rtnl_lock(); + ASSERT_RTNL(); list_for_each_entry(net, net_exit_list, exit_list) { struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); struct xfrm_if __rcu **xip; @@ -973,18 +973,16 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list) for (xip = &xfrmn->xfrmi[i]; (xi = rtnl_dereference(*xip)) != NULL; xip = &xi->next) - unregister_netdevice_queue(xi->dev, &list); + unregister_netdevice_queue(xi->dev, dev_to_kill); } xi = rtnl_dereference(xfrmn->collect_md_xfrmi); if (xi) - unregister_netdevice_queue(xi->dev, &list); + unregister_netdevice_queue(xi->dev, dev_to_kill); } - unregister_netdevice_many(&list); - rtnl_unlock(); } static struct pernet_operations xfrmi_net_ops = { - .exit_batch = xfrmi_exit_batch_net, + .exit_batch_rtnl = xfrmi_exit_batch_rtnl, .id = &xfrmi_net_id, .size = sizeof(struct xfrmi_net), };