diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst index ebb868f50ac2..6c2d8945f597 100644 --- a/Documentation/networking/netdevices.rst +++ b/Documentation/networking/netdevices.rst @@ -343,6 +343,29 @@ there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g., acquiring the instance lock themselves, while the ``netif_xxx`` functions assume that the driver has already acquired the instance lock. +Notifiers and netdev instance lock +================================== + +For device drivers that implement shaping or queue management APIs, +some of the notifiers (``enum netdev_cmd``) are running under the netdev +instance lock. + +For devices with locked ops, currently only the following notifiers are +running under the lock: +* ``NETDEV_REGISTER`` +* ``NETDEV_UP`` + +The following notifiers are running without the lock: +* ``NETDEV_UNREGISTER`` + +There are no clear expectations for the remaining notifiers. Notifiers not on +the list may run with or without the instance lock, potentially even invoking +the same notifier type with and without the lock from different code paths. +The goal is to eventually ensure that all (or most, with a few documented +exceptions) notifiers run under the instance lock. Please extend this +documentation whenever you make explicit assumption about lock being held +from a notifier. + NETDEV_INTERNAL symbol namespace ================================ diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index a4938c6a5ebb..d6bdad4baadd 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -105,6 +105,7 @@ static void dummy_setup(struct net_device *dev) dev->netdev_ops = &dummy_netdev_ops; dev->ethtool_ops = &dummy_ethtool_ops; dev->needs_free_netdev = true; + dev->request_ops_lock = true; /* Fill in device structure with ethernet-generic values. */ dev->flags |= IFF_NOARP; diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index b67af4651185..ddda0c1e7a6d 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -939,6 +939,7 @@ static int nsim_init_netdevsim(struct netdevsim *ns) ns->netdev->netdev_ops = &nsim_netdev_ops; ns->netdev->stat_ops = &nsim_stat_ops; ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops; + netdev_lockdep_set_classes(ns->netdev); err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev); if (err) @@ -960,6 +961,14 @@ static int nsim_init_netdevsim(struct netdevsim *ns) if (err) goto err_ipsec_teardown; rtnl_unlock(); + + if (IS_ENABLED(CONFIG_DEBUG_NET)) { + ns->nb.notifier_call = netdev_debug_event; + if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb, + &ns->nn)) + ns->nb.notifier_call = NULL; + } + return 0; err_ipsec_teardown: @@ -1043,6 +1052,10 @@ void nsim_destroy(struct netdevsim *ns) debugfs_remove(ns->qr_dfs); debugfs_remove(ns->pp_dfs); + if (ns->nb.notifier_call) + unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb, + &ns->nn); + rtnl_lock(); peer = rtnl_dereference(ns->peer); if (peer) diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 665020d18f29..d04401f0bdf7 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -144,6 +144,9 @@ struct netdevsim { struct nsim_ethtool ethtool; struct netdevsim __rcu *peer; + + struct notifier_block nb; + struct netdev_net_notifier nn; }; struct netdevsim * diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fa79145518d1..cf3b6445817b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4192,7 +4192,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags, int netif_set_alias(struct net_device *dev, const char *alias, size_t len); int dev_set_alias(struct net_device *, const char *, size_t); int dev_get_alias(const struct net_device *, char *, size_t); -int netif_change_net_namespace(struct net_device *dev, struct net *net, +int __dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat, int new_ifindex, struct netlink_ext_ack *extack); int dev_change_net_namespace(struct net_device *dev, struct net *net, diff --git a/include/net/ip.h b/include/net/ip.h index 8a48ade24620..47ed6d23853d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -667,14 +667,6 @@ static inline void ip_ipgre_mc_map(__be32 naddr, const unsigned char *broadcast, memcpy(buf, &naddr, sizeof(naddr)); } -#if IS_MODULE(CONFIG_IPV6) -#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X) -#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X) -#else -#define EXPORT_IPV6_MOD(X) -#define EXPORT_IPV6_MOD_GPL(X) -#endif - #if IS_ENABLED(CONFIG_IPV6) #include #endif @@ -694,6 +686,14 @@ static __inline__ void inet_reset_saddr(struct sock *sk) #endif +#if IS_MODULE(CONFIG_IPV6) +#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X) +#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X) +#else +#define EXPORT_IPV6_MOD(X) +#define EXPORT_IPV6_MOD_GPL(X) +#endif + static inline unsigned int ipv4_addr_hash(__be32 ip) { return (__force unsigned int) ip; diff --git a/include/net/netdev_lock.h b/include/net/netdev_lock.h index 1c0c9a94cc22..c316b551df8d 100644 --- a/include/net/netdev_lock.h +++ b/include/net/netdev_lock.h @@ -98,4 +98,7 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a, &qdisc_xmit_lock_key); \ } +int netdev_debug_event(struct notifier_block *nb, unsigned long event, + void *ptr); + #endif diff --git a/net/core/Makefile b/net/core/Makefile index a10c3bd96798..b2a76ce33932 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -45,5 +45,5 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_OF) += of_net.o obj-$(CONFIG_NET_TEST) += net_test.o obj-$(CONFIG_NET_DEVMEM) += devmem.o -obj-$(CONFIG_DEBUG_NET_SMALL_RTNL) += rtnl_net_debug.o +obj-$(CONFIG_DEBUG_NET) += lock_debug.o obj-$(CONFIG_FAIL_SKB_REALLOC) += skb_fault_injection.o diff --git a/net/core/dev.c b/net/core/dev.c index 5d20ff226d5e..0608605cfc24 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1771,6 +1771,7 @@ void netif_disable_lro(struct net_device *dev) netdev_unlock_ops(lower_dev); } } +EXPORT_IPV6_MOD(netif_disable_lro); /** * dev_disable_gro_hw - disable HW Generic Receive Offload on a device @@ -1858,7 +1859,9 @@ static int call_netdevice_register_net_notifiers(struct notifier_block *nb, int err; for_each_netdev(net, dev) { + netdev_lock_ops(dev); err = call_netdevice_register_notifiers(nb, dev); + netdev_unlock_ops(dev); if (err) goto rollback; } @@ -11047,7 +11050,9 @@ int register_netdevice(struct net_device *dev) memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len); /* Notify protocols, that a new device appeared. */ + netdev_lock_ops(dev); ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); + netdev_unlock_ops(dev); ret = notifier_to_errno(ret); if (ret) { /* Expect explicit free_netdev() on failure */ @@ -12059,7 +12064,7 @@ void unregister_netdev(struct net_device *dev) } EXPORT_SYMBOL(unregister_netdev); -int netif_change_net_namespace(struct net_device *dev, struct net *net, +int __dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat, int new_ifindex, struct netlink_ext_ack *extack) { @@ -12144,11 +12149,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net, * And now a mini version of register_netdevice unregister_netdevice. */ + netdev_lock_ops(dev); /* If device is running close it first. */ netif_close(dev); - /* And unlink it from device chain */ unlist_netdevice(dev); + netdev_unlock_ops(dev); synchronize_net(); @@ -12210,11 +12216,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net, err = netdev_change_owner(dev, net_old, net); WARN_ON(err); + netdev_lock_ops(dev); /* Add the device back in the hashes */ list_netdevice(dev); - /* Notify protocols, that a new device appeared. */ call_netdevice_notifiers(NETDEV_REGISTER, dev); + netdev_unlock_ops(dev); /* * Prevent userspace races by waiting until the network diff --git a/net/core/dev_api.c b/net/core/dev_api.c index 8dbc60612100..90bafb0b1b8c 100644 --- a/net/core/dev_api.c +++ b/net/core/dev_api.c @@ -117,13 +117,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) { - int ret; - - netdev_lock_ops(dev); - ret = netif_change_net_namespace(dev, net, pat, 0, NULL); - netdev_unlock_ops(dev); - - return ret; + return __dev_change_net_namespace(dev, net, pat, 0, NULL); } EXPORT_SYMBOL_GPL(dev_change_net_namespace); diff --git a/net/core/rtnl_net_debug.c b/net/core/lock_debug.c similarity index 89% rename from net/core/rtnl_net_debug.c rename to net/core/lock_debug.c index f3272b09c255..b7f22dc92a6f 100644 --- a/net/core/rtnl_net_debug.c +++ b/net/core/lock_debug.c @@ -6,10 +6,11 @@ #include #include #include +#include #include -static int rtnl_net_debug_event(struct notifier_block *nb, - unsigned long event, void *ptr) +int netdev_debug_event(struct notifier_block *nb, unsigned long event, + void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); @@ -17,11 +18,13 @@ static int rtnl_net_debug_event(struct notifier_block *nb, /* Keep enum and don't add default to trigger -Werror=switch */ switch (cmd) { + case NETDEV_REGISTER: case NETDEV_UP: + netdev_ops_assert_locked(dev); + fallthrough; case NETDEV_DOWN: case NETDEV_REBOOT: case NETDEV_CHANGE: - case NETDEV_REGISTER: case NETDEV_UNREGISTER: case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: @@ -66,6 +69,7 @@ static int rtnl_net_debug_event(struct notifier_block *nb, return NOTIFY_DONE; } +EXPORT_SYMBOL_NS_GPL(netdev_debug_event, "NETDEV_INTERNAL"); static int rtnl_net_debug_net_id; @@ -74,7 +78,7 @@ static int __net_init rtnl_net_debug_net_init(struct net *net) struct notifier_block *nb; nb = net_generic(net, rtnl_net_debug_net_id); - nb->notifier_call = rtnl_net_debug_event; + nb->notifier_call = netdev_debug_event; return register_netdevice_notifier_net(net, nb); } @@ -95,7 +99,7 @@ static struct pernet_operations rtnl_net_debug_net_ops __net_initdata = { }; static struct notifier_block rtnl_net_debug_block = { - .notifier_call = rtnl_net_debug_event, + .notifier_call = netdev_debug_event, }; static int __init rtnl_net_debug_init(void) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 334db17be37d..c23852835050 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3025,8 +3025,6 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, char ifname[IFNAMSIZ]; int err; - netdev_lock_ops(dev); - err = validate_linkmsg(dev, tb, extack); if (err < 0) goto errout; @@ -3042,14 +3040,16 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev, new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0); - err = netif_change_net_namespace(dev, tgt_net, pat, + err = __dev_change_net_namespace(dev, tgt_net, pat, new_ifindex, extack); if (err) - goto errout; + return err; status |= DO_SETLINK_MODIFIED; } + netdev_lock_ops(dev); + if (tb[IFLA_MAP]) { struct rtnl_link_ifmap *u_map; struct ifmap k_map; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 754f60fb6e25..77e5705ac799 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -281,7 +281,7 @@ static struct in_device *inetdev_init(struct net_device *dev) if (!in_dev->arp_parms) goto out_kfree; if (IPV4_DEVCONF(in_dev->cnf, FORWARDING)) - dev_disable_lro(dev); + netif_disable_lro(dev); /* Reference in_dev->dev */ netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL); /* Account for reference dev->ip_ptr (below) */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 54a8ea004da2..c3b908fccbc1 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -377,6 +378,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) int err = -ENOMEM; ASSERT_RTNL(); + netdev_ops_assert_locked(dev); if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev) return ERR_PTR(-EINVAL); @@ -402,7 +404,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) return ERR_PTR(err); } if (ndev->cnf.forwarding) - dev_disable_lro(dev); + netif_disable_lro(dev); /* We refer to the device */ netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL); @@ -3152,10 +3154,12 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) rtnl_net_lock(net); dev = __dev_get_by_index(net, ireq.ifr6_ifindex); + netdev_lock_ops(dev); if (dev) err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL); else err = -ENODEV; + netdev_unlock_ops(dev); rtnl_net_unlock(net); return err; } @@ -5026,9 +5030,10 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, if (!dev) { NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface"); err = -ENODEV; - goto unlock; + goto unlock_rtnl; } + netdev_lock_ops(dev); idev = ipv6_find_idev(dev); if (IS_ERR(idev)) { err = PTR_ERR(idev); @@ -5065,6 +5070,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, in6_ifa_put(ifa); unlock: + netdev_unlock_ops(dev); +unlock_rtnl: rtnl_net_unlock(net); return err; @@ -6516,7 +6523,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write, if (idev->cnf.addr_gen_mode != new_val) { WRITE_ONCE(idev->cnf.addr_gen_mode, new_val); + netdev_lock_ops(idev->dev); addrconf_init_auto_addrs(idev->dev); + netdev_unlock_ops(idev->dev); } } else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) { struct net_device *dev; @@ -6528,7 +6537,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write, idev->cnf.addr_gen_mode != new_val) { WRITE_ONCE(idev->cnf.addr_gen_mode, new_val); + netdev_lock_ops(idev->dev); addrconf_init_auto_addrs(idev->dev); + netdev_unlock_ops(idev->dev); } } } diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 975be4fdbcdb..701905eeff66 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -222,6 +222,31 @@ setup_ns() NS_LIST+=("${ns_list[@]}") } +# Create netdevsim with given id and net namespace. +create_netdevsim() { + local id="$1" + local ns="$2" + + modprobe netdevsim &> /dev/null + udevadm settle + + echo "$id 1" | ip netns exec $ns tee /sys/bus/netdevsim/new_device >/dev/null + local dev=$(ip netns exec $ns ls /sys/bus/netdevsim/devices/netdevsim$id/net) + ip -netns $ns link set dev $dev name nsim$id + ip -netns $ns link set dev nsim$id up + + echo nsim$id +} + +# Remove netdevsim with given id. +cleanup_netdevsim() { + local id="$1" + + if [ -d "/sys/bus/netdevsim/devices/netdevsim$id/net" ]; then + echo "$id" > /sys/bus/netdevsim/del_device + fi +} + tc_rule_stats_get() { local dev=$1; shift diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh index 0be1905d1f2f..38871bdef67f 100755 --- a/tools/testing/selftests/net/netns-name.sh +++ b/tools/testing/selftests/net/netns-name.sh @@ -7,10 +7,12 @@ set -o pipefail DEV=dummy-dev0 DEV2=dummy-dev1 ALT_NAME=some-alt-name +NSIM_ADDR=2025 RET_CODE=0 cleanup() { + cleanup_netdevsim $NSIM_ADDR cleanup_ns $NS $test_ns } @@ -25,12 +27,15 @@ setup_ns NS test_ns # # Test basic move without a rename +# Use netdevsim because it has extra asserts for notifiers. # -ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns $test_ns || + +nsim=$(create_netdevsim $NSIM_ADDR $NS) +ip -netns $NS link set dev $nsim netns $test_ns || fail "Can't perform a netns move" -ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" -ip -netns $test_ns link del $DEV || fail +ip -netns $test_ns link show dev $nsim >> /dev/null || + fail "Device not found after move" +cleanup_netdevsim $NSIM_ADDR # # Test move with a conflict