Skip to content

Commit

Permalink
net: hold netdev instance lock during rtnetlink operations
Browse files Browse the repository at this point in the history
To preserve the atomicity, hold the lock while applying multiple
attributes. The major issue with a full conversion to the instance
lock are software nesting devices (bonding/team/vrf/etc). Those
devices call into the core stack for their lower (potentially
real hw) devices. To avoid explicitly wrapping all those places
into instance lock/unlock, introduce new API boundaries:

- (some) existing dev_xxx calls are now considered "external"
  (to drivers) APIs and they transparently grab the instance
  lock if needed (dev_api.c)
- new netif_xxx calls are internal core stack API (naming is
  sketchy, I've tried netdev_xxx_locked per Jakub's suggestion,
  but it feels a bit verbose; but happy to get back to this
  naming scheme if this is the preference)

This avoids touching most of the existing ioctl/sysfs/drivers paths.

Note the special handling of ndo_xxx_slave operations: I exploit
the fact that none of the drivers that call these functions
need/use instance lock. At the same time, they use dev_xxx
APIs, so the lower device has to be unlocked.

Changes in unregister_netdevice_many_notify (to protect dev->state
with instance lock) trigger lockdep - the loop over close_list
(mostly from cleanup_net) introduces spurious ordering issues.
netdev_lock_cmp_fn has a justification on why it's ok to suppress
for now.

Cc: Saeed Mahameed <saeed@kernel.org>
Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
Link: https://patch.msgid.link/20250305163732.2766420-7-sdf@fomichev.me
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Stanislav Fomichev authored and Jakub Kicinski committed Mar 6, 2025
1 parent cae03e5 commit 7e4d784
Show file tree
Hide file tree
Showing 6 changed files with 329 additions and 150 deletions.
40 changes: 34 additions & 6 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -2620,16 +2620,35 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
f(dev, &dev->_tx[i], arg);
}

static inline int netdev_lock_cmp_fn(const struct lockdep_map *a,
const struct lockdep_map *b)
{
/* Only lower devices currently grab the instance lock, so no
* real ordering issues can occur. In the near future, only
* hardware devices will grab instance lock which also does not
* involve any ordering. Suppress lockdep ordering warnings
* until (if) we start grabbing instance lock on pure SW
* devices (bond/team/veth/etc).
*/
if (a == b)
return 0;
return -1;
}

#define netdev_lockdep_set_classes(dev) \
{ \
static struct lock_class_key qdisc_tx_busylock_key; \
static struct lock_class_key qdisc_xmit_lock_key; \
static struct lock_class_key dev_addr_list_lock_key; \
static struct lock_class_key dev_instance_lock_key; \
unsigned int i; \
\
(dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \
lockdep_set_class(&(dev)->addr_list_lock, \
&dev_addr_list_lock_key); \
lockdep_set_class(&(dev)->lock, \
&dev_instance_lock_key); \
lock_set_cmp_fn(&dev->lock, netdev_lock_cmp_fn, NULL); \
for (i = 0; i < (dev)->num_tx_queues; i++) \
lockdep_set_class(&(dev)->_tx[i]._xmit_lock, \
&qdisc_xmit_lock_key); \
Expand Down Expand Up @@ -2776,6 +2795,12 @@ static inline void netdev_unlock_ops(struct net_device *dev)
netdev_unlock(dev);
}

static inline void netdev_ops_assert_locked(struct net_device *dev)
{
if (netdev_need_ops_lock(dev))
lockdep_assert_held(&dev->lock);
}

void netif_napi_set_irq_locked(struct napi_struct *napi, int irq);

static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
Expand Down Expand Up @@ -3350,7 +3375,9 @@ struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
struct net_device *__dev_get_by_name(struct net *net, const char *name);
bool netdev_name_in_use(struct net *net, const char *name);
int dev_alloc_name(struct net_device *dev, const char *name);
int netif_open(struct net_device *dev, struct netlink_ext_ack *extack);
int dev_open(struct net_device *dev, struct netlink_ext_ack *extack);
void netif_close(struct net_device *dev);
void dev_close(struct net_device *dev);
void dev_close_many(struct list_head *head, bool unlink);
int dev_setup_tc(struct net_device *dev, enum tc_setup_type type,
Expand Down Expand Up @@ -4211,25 +4238,26 @@ int dev_ethtool(struct net *net, struct ifreq *ifr, void __user *userdata);
unsigned int dev_get_flags(const struct net_device *);
int __dev_change_flags(struct net_device *dev, unsigned int flags,
struct netlink_ext_ack *extack);
int netif_change_flags(struct net_device *dev, unsigned int flags,
struct netlink_ext_ack *extack);
int dev_change_flags(struct net_device *dev, unsigned int flags,
struct netlink_ext_ack *extack);
int netif_set_alias(struct net_device *dev, const char *alias, size_t len);
int dev_set_alias(struct net_device *, const char *, size_t);
int dev_get_alias(const struct net_device *, char *, size_t);
int __dev_change_net_namespace(struct net_device *dev, struct net *net,
int netif_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat, int new_ifindex,
struct netlink_ext_ack *extack);
static inline
int dev_change_net_namespace(struct net_device *dev, struct net *net,
const char *pat)
{
return __dev_change_net_namespace(dev, net, pat, 0, NULL);
}
const char *pat);
int __dev_set_mtu(struct net_device *, int);
int dev_set_mtu(struct net_device *, int);
int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
struct netlink_ext_ack *extack);
int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
struct netlink_ext_ack *extack);
int netif_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
struct netlink_ext_ack *extack);
int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa,
struct netlink_ext_ack *extack);
int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name);
Expand Down
2 changes: 1 addition & 1 deletion net/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \

obj-$(CONFIG_SYSCTL) += sysctl_net_core.o

obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
obj-y += dev.o dev_api.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
fib_notifier.o xdp.o flow_offload.o gro.o \
Expand Down
Loading

0 comments on commit 7e4d784

Please sign in to comment.