Skip to content

Commit

Permalink
net: hold instance lock during NETDEV_CHANGE
Browse files Browse the repository at this point in the history
Cosmin reports an issue with ipv6_add_dev being called from
NETDEV_CHANGE notifier:

[ 3455.008776]  ? ipv6_add_dev+0x370/0x620
[ 3455.010097]  ipv6_find_idev+0x96/0xe0
[ 3455.010725]  addrconf_add_dev+0x1e/0xa0
[ 3455.011382]  addrconf_init_auto_addrs+0xb0/0x720
[ 3455.013537]  addrconf_notify+0x35f/0x8d0
[ 3455.014214]  notifier_call_chain+0x38/0xf0
[ 3455.014903]  netdev_state_change+0x65/0x90
[ 3455.015586]  linkwatch_do_dev+0x5a/0x70
[ 3455.016238]  rtnl_getlink+0x241/0x3e0
[ 3455.019046]  rtnetlink_rcv_msg+0x177/0x5e0

Similarly, linkwatch might get to ipv6_add_dev without ops lock:
[ 3456.656261]  ? ipv6_add_dev+0x370/0x620
[ 3456.660039]  ipv6_find_idev+0x96/0xe0
[ 3456.660445]  addrconf_add_dev+0x1e/0xa0
[ 3456.660861]  addrconf_init_auto_addrs+0xb0/0x720
[ 3456.661803]  addrconf_notify+0x35f/0x8d0
[ 3456.662236]  notifier_call_chain+0x38/0xf0
[ 3456.662676]  netdev_state_change+0x65/0x90
[ 3456.663112]  linkwatch_do_dev+0x5a/0x70
[ 3456.663529]  __linkwatch_run_queue+0xeb/0x200
[ 3456.663990]  linkwatch_event+0x21/0x30
[ 3456.664399]  process_one_work+0x211/0x610
[ 3456.664828]  worker_thread+0x1cc/0x380
[ 3456.665691]  kthread+0xf4/0x210

Reclassify NETDEV_CHANGE as a notifier that consistently runs under the
instance lock.

Link: https://lore.kernel.org/netdev/aac073de8beec3e531c86c101b274d434741c28e.camel@nvidia.com/
Reported-by: Cosmin Ratiu <cratiu@nvidia.com>
Tested-by: Cosmin Ratiu <cratiu@nvidia.com>
Fixes: ad7c7b2 ("net: hold netdev instance lock during sysfs operations")
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250404161122.3907628-1-sdf@fomichev.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Stanislav Fomichev authored and Jakub Kicinski committed Apr 7, 2025
1 parent 54f5faf commit 04efcee
Show file tree
Hide file tree
Showing 10 changed files with 63 additions and 31 deletions.
10 changes: 6 additions & 4 deletions Documentation/networking/netdevices.rst
Original file line number Diff line number Diff line change
Expand Up @@ -338,10 +338,11 @@ operations directly under the netdev instance lock.
Devices drivers are encouraged to rely on the instance lock where possible.

For the (mostly software) drivers that need to interact with the core stack,
there are two sets of interfaces: ``dev_xxx`` and ``netif_xxx`` (e.g.,
``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx`` functions handle
acquiring the instance lock themselves, while the ``netif_xxx`` functions
assume that the driver has already acquired the instance lock.
there are two sets of interfaces: ``dev_xxx``/``netdev_xxx`` and ``netif_xxx``
(e.g., ``dev_set_mtu`` and ``netif_set_mtu``). The ``dev_xxx``/``netdev_xxx``
functions handle acquiring the instance lock themselves, while the
``netif_xxx`` functions assume that the driver has already acquired
the instance lock.

Notifiers and netdev instance lock
==================================
Expand All @@ -354,6 +355,7 @@ For devices with locked ops, currently only the following notifiers are
running under the lock:
* ``NETDEV_REGISTER``
* ``NETDEV_UP``
* ``NETDEV_CHANGE``

The following notifiers are running without the lock:
* ``NETDEV_UNREGISTER``
Expand Down
2 changes: 2 additions & 0 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -4429,6 +4429,7 @@ void linkwatch_fire_event(struct net_device *dev);
* pending work list (if queued).
*/
void linkwatch_sync_dev(struct net_device *dev);
void __linkwatch_sync_dev(struct net_device *dev);

/**
* netif_carrier_ok - test if carrier present
Expand Down Expand Up @@ -4974,6 +4975,7 @@ void dev_set_rx_mode(struct net_device *dev);
int dev_set_promiscuity(struct net_device *dev, int inc);
int netif_set_allmulti(struct net_device *dev, int inc, bool notify);
int dev_set_allmulti(struct net_device *dev, int inc);
void netif_state_change(struct net_device *dev);
void netdev_state_change(struct net_device *dev);
void __netdev_notify_peers(struct net_device *dev);
void netdev_notify_peers(struct net_device *dev);
Expand Down
2 changes: 1 addition & 1 deletion include/linux/rtnetlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,6 @@ rtnl_notify_needed(const struct net *net, u16 nlflags, u32 group)
return (nlflags & NLM_F_ECHO) || rtnl_has_listeners(net, group);
}

void netdev_set_operstate(struct net_device *dev, int newstate);
void netif_set_operstate(struct net_device *dev, int newstate);

#endif /* __LINUX_RTNETLINK_H */
11 changes: 1 addition & 10 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -1518,15 +1518,7 @@ void netdev_features_change(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_features_change);

/**
* netdev_state_change - device changes state
* @dev: device to cause notification
*
* Called to indicate a device has changed state. This function calls
* the notifier chains for netdev_chain and sends a NEWLINK message
* to the routing socket.
*/
void netdev_state_change(struct net_device *dev)
void netif_state_change(struct net_device *dev)
{
if (dev->flags & IFF_UP) {
struct netdev_notifier_change_info change_info = {
Expand All @@ -1538,7 +1530,6 @@ void netdev_state_change(struct net_device *dev)
rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL, 0, NULL);
}
}
EXPORT_SYMBOL(netdev_state_change);

/**
* __netdev_notify_peers - notify network peers about existence of @dev,
Expand Down
16 changes: 16 additions & 0 deletions net/core/dev_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -327,3 +327,19 @@ int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf)
return ret;
}
EXPORT_SYMBOL_GPL(dev_xdp_propagate);

/**
* netdev_state_change() - device changes state
* @dev: device to cause notification
*
* Called to indicate a device has changed state. This function calls
* the notifier chains for netdev_chain and sends a NEWLINK message
* to the routing socket.
*/
void netdev_state_change(struct net_device *dev)
{
netdev_lock_ops(dev);
netif_state_change(dev);
netdev_unlock_ops(dev);
}
EXPORT_SYMBOL(netdev_state_change);
28 changes: 23 additions & 5 deletions net/core/link_watch.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ static void linkwatch_do_dev(struct net_device *dev)
else
dev_deactivate(dev);

netdev_state_change(dev);
netif_state_change(dev);
}
/* Note: our callers are responsible for calling netdev_tracker_free().
* This is the reason we use __dev_put() instead of dev_put().
Expand Down Expand Up @@ -240,7 +240,9 @@ static void __linkwatch_run_queue(int urgent_only)
*/
netdev_tracker_free(dev, &dev->linkwatch_dev_tracker);
spin_unlock_irq(&lweventlist_lock);
netdev_lock_ops(dev);
linkwatch_do_dev(dev);
netdev_unlock_ops(dev);
do_dev--;
spin_lock_irq(&lweventlist_lock);
}
Expand All @@ -253,25 +255,41 @@ static void __linkwatch_run_queue(int urgent_only)
spin_unlock_irq(&lweventlist_lock);
}

void linkwatch_sync_dev(struct net_device *dev)
static bool linkwatch_clean_dev(struct net_device *dev)
{
unsigned long flags;
int clean = 0;
bool clean = false;

spin_lock_irqsave(&lweventlist_lock, flags);
if (!list_empty(&dev->link_watch_list)) {
list_del_init(&dev->link_watch_list);
clean = 1;
clean = true;
/* We must release netdev tracker under
* the spinlock protection.
*/
netdev_tracker_free(dev, &dev->linkwatch_dev_tracker);
}
spin_unlock_irqrestore(&lweventlist_lock, flags);
if (clean)

return clean;
}

void __linkwatch_sync_dev(struct net_device *dev)
{
netdev_ops_assert_locked(dev);

if (linkwatch_clean_dev(dev))
linkwatch_do_dev(dev);
}

void linkwatch_sync_dev(struct net_device *dev)
{
if (linkwatch_clean_dev(dev)) {
netdev_lock_ops(dev);
linkwatch_do_dev(dev);
netdev_unlock_ops(dev);
}
}

/* Must be called with the rtnl semaphore held */
void linkwatch_run_queue(void)
Expand Down
2 changes: 1 addition & 1 deletion net/core/lock_debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ int netdev_debug_event(struct notifier_block *nb, unsigned long event,
switch (cmd) {
case NETDEV_REGISTER:
case NETDEV_UP:
case NETDEV_CHANGE:
netdev_ops_assert_locked(dev);
fallthrough;
case NETDEV_DOWN:
case NETDEV_REBOOT:
case NETDEV_CHANGE:
case NETDEV_UNREGISTER:
case NETDEV_CHANGEMTU:
case NETDEV_CHANGEADDR:
Expand Down
15 changes: 9 additions & 6 deletions net/core/rtnetlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -1043,7 +1043,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
}
EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo);

void netdev_set_operstate(struct net_device *dev, int newstate)
void netif_set_operstate(struct net_device *dev, int newstate)
{
unsigned int old = READ_ONCE(dev->operstate);

Expand All @@ -1052,9 +1052,9 @@ void netdev_set_operstate(struct net_device *dev, int newstate)
return;
} while (!try_cmpxchg(&dev->operstate, &old, newstate));

netdev_state_change(dev);
netif_state_change(dev);
}
EXPORT_SYMBOL(netdev_set_operstate);
EXPORT_SYMBOL(netif_set_operstate);

static void set_operstate(struct net_device *dev, unsigned char transition)
{
Expand All @@ -1080,7 +1080,7 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
break;
}

netdev_set_operstate(dev, operstate);
netif_set_operstate(dev, operstate);
}

static unsigned int rtnl_dev_get_flags(const struct net_device *dev)
Expand Down Expand Up @@ -3396,7 +3396,7 @@ static int do_setlink(const struct sk_buff *skb, struct net_device *dev,
errout:
if (status & DO_SETLINK_MODIFIED) {
if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY)
netdev_state_change(dev);
netif_state_change(dev);

if (err < 0)
net_warn_ratelimited("A link change request failed with some changes committed already. Interface %s may have been left with an inconsistent configuration, please check.\n",
Expand Down Expand Up @@ -3676,8 +3676,11 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
nla_len(tb[IFLA_BROADCAST]));
if (tb[IFLA_TXQLEN])
dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
if (tb[IFLA_OPERSTATE])
if (tb[IFLA_OPERSTATE]) {
netdev_lock_ops(dev);
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
netdev_unlock_ops(dev);
}
if (tb[IFLA_LINKMODE])
dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]);
if (tb[IFLA_GROUP])
Expand Down
2 changes: 1 addition & 1 deletion net/ethtool/ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ static struct devlink *netdev_to_devlink_get(struct net_device *dev)
u32 ethtool_op_get_link(struct net_device *dev)
{
/* Synchronize carrier state with link watch, see also rtnl_getlink() */
linkwatch_sync_dev(dev);
__linkwatch_sync_dev(dev);

return netif_carrier_ok(dev) ? 1 : 0;
}
Expand Down
6 changes: 3 additions & 3 deletions net/hsr/hsr_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,14 +33,14 @@ static void hsr_set_operstate(struct hsr_port *master, bool has_carrier)
struct net_device *dev = master->dev;

if (!is_admin_up(dev)) {
netdev_set_operstate(dev, IF_OPER_DOWN);
netif_set_operstate(dev, IF_OPER_DOWN);
return;
}

if (has_carrier)
netdev_set_operstate(dev, IF_OPER_UP);
netif_set_operstate(dev, IF_OPER_UP);
else
netdev_set_operstate(dev, IF_OPER_LOWERLAYERDOWN);
netif_set_operstate(dev, IF_OPER_LOWERLAYERDOWN);
}

static bool hsr_check_carrier(struct hsr_port *master)
Expand Down

0 comments on commit 04efcee

Please sign in to comment.