Skip to content

Commit

Permalink
Merge branch 'net-hold-instance-lock-during-netdev_up-register'
Browse files Browse the repository at this point in the history
Stanislav Fomichev says:

====================
net: hold instance lock during NETDEV_UP/REGISTER

Solving the issue reported by Cosmin in [0] requires consistent
lock during NETDEV_UP/REGISTER notifiers. This series
addresses that (along with some other fixes in net/ipv4/devinet.c
and net/ipv6/addrconf.c) and appends the patches from Jakub
that were conditional on consistent locking in NETDEV_UNREGISTER.

0: https://lore.kernel.org/700fa36b94cbd57cfea2622029b087643c80cbc9.camel@nvidia.com
====================

Link: https://patch.msgid.link/20250401163452.622454-1-sdf@fomichev.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Apr 3, 2025
2 parents 09bccf5 + 56c8a23 commit 8ea7c1b
Show file tree
Hide file tree
Showing 16 changed files with 125 additions and 36 deletions.
23 changes: 23 additions & 0 deletions Documentation/networking/netdevices.rst
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,29 @@ there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g.,
acquiring the instance lock themselves, while the ``netif_xxx`` functions
assume that the driver has already acquired the instance lock.

Notifiers and netdev instance lock
==================================

For device drivers that implement shaping or queue management APIs,
some of the notifiers (``enum netdev_cmd``) are running under the netdev
instance lock.

For devices with locked ops, currently only the following notifiers are
running under the lock:
* ``NETDEV_REGISTER``
* ``NETDEV_UP``

The following notifiers are running without the lock:
* ``NETDEV_UNREGISTER``

There are no clear expectations for the remaining notifiers. Notifiers not on
the list may run with or without the instance lock, potentially even invoking
the same notifier type with and without the lock from different code paths.
The goal is to eventually ensure that all (or most, with a few documented
exceptions) notifiers run under the instance lock. Please extend this
documentation whenever you make explicit assumption about lock being held
from a notifier.

NETDEV_INTERNAL symbol namespace
================================

Expand Down
1 change: 1 addition & 0 deletions drivers/net/dummy.c
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ static void dummy_setup(struct net_device *dev)
dev->netdev_ops = &dummy_netdev_ops;
dev->ethtool_ops = &dummy_ethtool_ops;
dev->needs_free_netdev = true;
dev->request_ops_lock = true;

/* Fill in device structure with ethernet-generic values. */
dev->flags |= IFF_NOARP;
Expand Down
13 changes: 13 additions & 0 deletions drivers/net/netdevsim/netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -939,6 +939,7 @@ static int nsim_init_netdevsim(struct netdevsim *ns)
ns->netdev->netdev_ops = &nsim_netdev_ops;
ns->netdev->stat_ops = &nsim_stat_ops;
ns->netdev->queue_mgmt_ops = &nsim_queue_mgmt_ops;
netdev_lockdep_set_classes(ns->netdev);

err = nsim_udp_tunnels_info_create(ns->nsim_dev, ns->netdev);
if (err)
Expand All @@ -960,6 +961,14 @@ static int nsim_init_netdevsim(struct netdevsim *ns)
if (err)
goto err_ipsec_teardown;
rtnl_unlock();

if (IS_ENABLED(CONFIG_DEBUG_NET)) {
ns->nb.notifier_call = netdev_debug_event;
if (register_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
&ns->nn))
ns->nb.notifier_call = NULL;
}

return 0;

err_ipsec_teardown:
Expand Down Expand Up @@ -1043,6 +1052,10 @@ void nsim_destroy(struct netdevsim *ns)
debugfs_remove(ns->qr_dfs);
debugfs_remove(ns->pp_dfs);

if (ns->nb.notifier_call)
unregister_netdevice_notifier_dev_net(ns->netdev, &ns->nb,
&ns->nn);

rtnl_lock();
peer = rtnl_dereference(ns->peer);
if (peer)
Expand Down
3 changes: 3 additions & 0 deletions drivers/net/netdevsim/netdevsim.h
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ struct netdevsim {

struct nsim_ethtool ethtool;
struct netdevsim __rcu *peer;

struct notifier_block nb;
struct netdev_net_notifier nn;
};

struct netdevsim *
Expand Down
2 changes: 1 addition & 1 deletion include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -4192,7 +4192,7 @@ int dev_change_flags(struct net_device *dev, unsigned int flags,
int netif_set_alias(struct net_device *dev, const char *alias, size_t len);
int dev_set_alias(struct net_device *, const char *, size_t);
int dev_get_alias(const struct net_device *, char *, size_t);
int netif_change_net_namespace(struct net_device *dev, struct net *net,
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex,
struct netlink_ext_ack *extack);
int dev_change_net_namespace(struct net_device *dev, struct net *net,
Expand Down
16 changes: 8 additions & 8 deletions include/net/ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -667,14 +667,6 @@ static inline void ip_ipgre_mc_map(__be32 naddr, const unsigned char *broadcast,
memcpy(buf, &naddr, sizeof(naddr));
}

#if IS_MODULE(CONFIG_IPV6)
#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X)
#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X)
#else
#define EXPORT_IPV6_MOD(X)
#define EXPORT_IPV6_MOD_GPL(X)
#endif

#if IS_ENABLED(CONFIG_IPV6)
#include <linux/ipv6.h>
#endif
Expand All @@ -694,6 +686,14 @@ static __inline__ void inet_reset_saddr(struct sock *sk)

#endif

#if IS_MODULE(CONFIG_IPV6)
#define EXPORT_IPV6_MOD(X) EXPORT_SYMBOL(X)
#define EXPORT_IPV6_MOD_GPL(X) EXPORT_SYMBOL_GPL(X)
#else
#define EXPORT_IPV6_MOD(X)
#define EXPORT_IPV6_MOD_GPL(X)
#endif

static inline unsigned int ipv4_addr_hash(__be32 ip)
{
return (__force unsigned int) ip;
Expand Down
3 changes: 3 additions & 0 deletions include/net/netdev_lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,4 +98,7 @@ static inline int netdev_lock_cmp_fn(const struct lockdep_map *a,
&qdisc_xmit_lock_key); \
}

int netdev_debug_event(struct notifier_block *nb, unsigned long event,
void *ptr);

#endif
2 changes: 1 addition & 1 deletion net/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,5 @@ obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o
obj-$(CONFIG_OF) += of_net.o
obj-$(CONFIG_NET_TEST) += net_test.o
obj-$(CONFIG_NET_DEVMEM) += devmem.o
obj-$(CONFIG_DEBUG_NET_SMALL_RTNL) += rtnl_net_debug.o
obj-$(CONFIG_DEBUG_NET) += lock_debug.o
obj-$(CONFIG_FAIL_SKB_REALLOC) += skb_fault_injection.o
13 changes: 10 additions & 3 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1771,6 +1771,7 @@ void netif_disable_lro(struct net_device *dev)
netdev_unlock_ops(lower_dev);
}
}
EXPORT_IPV6_MOD(netif_disable_lro);

/**
* dev_disable_gro_hw - disable HW Generic Receive Offload on a device
Expand Down Expand Up @@ -1858,7 +1859,9 @@ static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
int err;

for_each_netdev(net, dev) {
netdev_lock_ops(dev);
err = call_netdevice_register_notifiers(nb, dev);
netdev_unlock_ops(dev);
if (err)
goto rollback;
}
Expand Down Expand Up @@ -11047,7 +11050,9 @@ int register_netdevice(struct net_device *dev)
memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);

/* Notify protocols, that a new device appeared. */
netdev_lock_ops(dev);
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
netdev_unlock_ops(dev);
ret = notifier_to_errno(ret);
if (ret) {
/* Expect explicit free_netdev() on failure */
Expand Down Expand Up @@ -12059,7 +12064,7 @@ void unregister_netdev(struct net_device *dev)
}
EXPORT_SYMBOL(unregister_netdev);

int netif_change_net_namespace(struct net_device *dev, struct net *net,
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex,
struct netlink_ext_ack *extack)
{
Expand Down Expand Up @@ -12144,11 +12149,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net,
* And now a mini version of register_netdevice unregister_netdevice.
*/

netdev_lock_ops(dev);
/* If device is running close it first. */
netif_close(dev);

/* And unlink it from device chain */
unlist_netdevice(dev);
netdev_unlock_ops(dev);

synchronize_net();

Expand Down Expand Up @@ -12210,11 +12216,12 @@ int netif_change_net_namespace(struct net_device *dev, struct net *net,
err = netdev_change_owner(dev, net_old, net);
WARN_ON(err);

netdev_lock_ops(dev);
/* Add the device back in the hashes */
list_netdevice(dev);

/* Notify protocols, that a new device appeared. */
call_netdevice_notifiers(NETDEV_REGISTER, dev);
netdev_unlock_ops(dev);

/*
* Prevent userspace races by waiting until the network
Expand Down
8 changes: 1 addition & 7 deletions net/core/dev_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,7 @@ EXPORT_SYMBOL(dev_set_mac_address_user);
int dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat)
{
int ret;

netdev_lock_ops(dev);
ret = netif_change_net_namespace(dev, net, pat, 0, NULL);
netdev_unlock_ops(dev);

return ret;
return __dev_change_net_namespace(dev, net, pat, 0, NULL);
}
EXPORT_SYMBOL_GPL(dev_change_net_namespace);

Expand Down
14 changes: 9 additions & 5 deletions net/core/rtnl_net_debug.c → net/core/lock_debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,25 @@
#include <linux/notifier.h>
#include <linux/rtnetlink.h>
#include <net/net_namespace.h>
#include <net/netdev_lock.h>
#include <net/netns/generic.h>

static int rtnl_net_debug_event(struct notifier_block *nb,
unsigned long event, void *ptr)
int netdev_debug_event(struct notifier_block *nb, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
enum netdev_cmd cmd = event;

/* Keep enum and don't add default to trigger -Werror=switch */
switch (cmd) {
case NETDEV_REGISTER:
case NETDEV_UP:
netdev_ops_assert_locked(dev);
fallthrough;
case NETDEV_DOWN:
case NETDEV_REBOOT:
case NETDEV_CHANGE:
case NETDEV_REGISTER:
case NETDEV_UNREGISTER:
case NETDEV_CHANGEMTU:
case NETDEV_CHANGEADDR:
Expand Down Expand Up @@ -66,6 +69,7 @@ static int rtnl_net_debug_event(struct notifier_block *nb,

return NOTIFY_DONE;
}
EXPORT_SYMBOL_NS_GPL(netdev_debug_event, "NETDEV_INTERNAL");

static int rtnl_net_debug_net_id;

Expand All @@ -74,7 +78,7 @@ static int __net_init rtnl_net_debug_net_init(struct net *net)
struct notifier_block *nb;

nb = net_generic(net, rtnl_net_debug_net_id);
nb->notifier_call = rtnl_net_debug_event;
nb->notifier_call = netdev_debug_event;

return register_netdevice_notifier_net(net, nb);
}
Expand All @@ -95,7 +99,7 @@ static struct pernet_operations rtnl_net_debug_net_ops __net_initdata = {
};

static struct notifier_block rtnl_net_debug_block = {
.notifier_call = rtnl_net_debug_event,
.notifier_call = netdev_debug_event,
};

static int __init rtnl_net_debug_init(void)
Expand Down
8 changes: 4 additions & 4 deletions net/core/rtnetlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -3025,8 +3025,6 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev,
char ifname[IFNAMSIZ];
int err;

netdev_lock_ops(dev);

err = validate_linkmsg(dev, tb, extack);
if (err < 0)
goto errout;
Expand All @@ -3042,14 +3040,16 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev,

new_ifindex = nla_get_s32_default(tb[IFLA_NEW_IFINDEX], 0);

err = netif_change_net_namespace(dev, tgt_net, pat,
err = __dev_change_net_namespace(dev, tgt_net, pat,
new_ifindex, extack);
if (err)
goto errout;
return err;

status |= DO_SETLINK_MODIFIED;
}

netdev_lock_ops(dev);

if (tb[IFLA_MAP]) {
struct rtnl_link_ifmap *u_map;
struct ifmap k_map;
Expand Down
2 changes: 1 addition & 1 deletion net/ipv4/devinet.c
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ static struct in_device *inetdev_init(struct net_device *dev)
if (!in_dev->arp_parms)
goto out_kfree;
if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
dev_disable_lro(dev);
netif_disable_lro(dev);
/* Reference in_dev->dev */
netdev_hold(dev, &in_dev->dev_tracker, GFP_KERNEL);
/* Account for reference dev->ip_ptr (below) */
Expand Down
15 changes: 13 additions & 2 deletions net/ipv6/addrconf.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/l3mdev.h>
#include <net/netdev_lock.h>
#include <linux/if_tunnel.h>
#include <linux/rtnetlink.h>
#include <linux/netconf.h>
Expand Down Expand Up @@ -377,6 +378,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
int err = -ENOMEM;

ASSERT_RTNL();
netdev_ops_assert_locked(dev);

if (dev->mtu < IPV6_MIN_MTU && dev != blackhole_netdev)
return ERR_PTR(-EINVAL);
Expand All @@ -402,7 +404,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
return ERR_PTR(err);
}
if (ndev->cnf.forwarding)
dev_disable_lro(dev);
netif_disable_lro(dev);
/* We refer to the device */
netdev_hold(dev, &ndev->dev_tracker, GFP_KERNEL);

Expand Down Expand Up @@ -3152,10 +3154,12 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)

rtnl_net_lock(net);
dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
netdev_lock_ops(dev);
if (dev)
err = inet6_addr_add(net, dev, &cfg, 0, 0, NULL);
else
err = -ENODEV;
netdev_unlock_ops(dev);
rtnl_net_unlock(net);
return err;
}
Expand Down Expand Up @@ -5026,9 +5030,10 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (!dev) {
NL_SET_ERR_MSG_MOD(extack, "Unable to find the interface");
err = -ENODEV;
goto unlock;
goto unlock_rtnl;
}

netdev_lock_ops(dev);
idev = ipv6_find_idev(dev);
if (IS_ERR(idev)) {
err = PTR_ERR(idev);
Expand Down Expand Up @@ -5065,6 +5070,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,

in6_ifa_put(ifa);
unlock:
netdev_unlock_ops(dev);
unlock_rtnl:
rtnl_net_unlock(net);

return err;
Expand Down Expand Up @@ -6516,7 +6523,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,

if (idev->cnf.addr_gen_mode != new_val) {
WRITE_ONCE(idev->cnf.addr_gen_mode, new_val);
netdev_lock_ops(idev->dev);
addrconf_init_auto_addrs(idev->dev);
netdev_unlock_ops(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
struct net_device *dev;
Expand All @@ -6528,7 +6537,9 @@ static int addrconf_sysctl_addr_gen_mode(const struct ctl_table *ctl, int write,
idev->cnf.addr_gen_mode != new_val) {
WRITE_ONCE(idev->cnf.addr_gen_mode,
new_val);
netdev_lock_ops(idev->dev);
addrconf_init_auto_addrs(idev->dev);
netdev_unlock_ops(idev->dev);
}
}
}
Expand Down
Loading

0 comments on commit 8ea7c1b

Please sign in to comment.