Skip to content

Commit

Permalink
rtnetlink: Add per-netns RTNL.
Browse files Browse the repository at this point in the history
The goal is to break RTNL down into per-netns mutex.

This patch adds per-netns mutex and its helper functions, rtnl_net_lock()
and rtnl_net_unlock().

rtnl_net_lock() acquires the global RTNL and per-netns RTNL mutex, and
rtnl_net_unlock() releases them.

We will replace 800+ rtnl_lock() with rtnl_net_lock() and finally removes
rtnl_lock() in rtnl_net_lock().

When we need to nest per-netns RTNL mutex, we will use __rtnl_net_lock(),
and its locking order is defined by rtnl_net_lock_cmp_fn() as follows:

  1. init_net is first
  2. netns address ascending order

Note that the conversion will be done under CONFIG_DEBUG_NET_SMALL_RTNL
with LOCKDEP so that we can carefully add the extra mutex without slowing
down RTNL operations during conversion.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
  • Loading branch information
Kuniyuki Iwashima authored and Paolo Abeni committed Oct 8, 2024
1 parent ec763c2 commit 76aed95
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 0 deletions.
21 changes: 21 additions & 0 deletions include/linux/rtnetlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,27 @@ static inline bool lockdep_rtnl_is_held(void)
#define rcu_replace_pointer_rtnl(rp, p) \
rcu_replace_pointer(rp, p, lockdep_rtnl_is_held())

#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
void __rtnl_net_lock(struct net *net);
void __rtnl_net_unlock(struct net *net);
void rtnl_net_lock(struct net *net);
void rtnl_net_unlock(struct net *net);
int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b);
#else
static inline void __rtnl_net_lock(struct net *net) {}
static inline void __rtnl_net_unlock(struct net *net) {}

static inline void rtnl_net_lock(struct net *net)
{
rtnl_lock();
}

static inline void rtnl_net_unlock(struct net *net)
{
rtnl_unlock();
}
#endif

static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
{
return rtnl_dereference(dev->ingress_queue);
Expand Down
4 changes: 4 additions & 0 deletions include/net/net_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,10 @@ struct net {
#if IS_ENABLED(CONFIG_SMC)
struct netns_smc smc;
#endif
#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
/* Move to a better place when the config guard is removed. */
struct mutex rtnl_mutex;
#endif
} __randomize_layout;

#include <linux/seq_file_net.h>
Expand Down
15 changes: 15 additions & 0 deletions net/Kconfig.debug
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,18 @@ config DEBUG_NET
help
Enable extra sanity checks in networking.
This is mostly used by fuzzers, but is safe to select.

config DEBUG_NET_SMALL_RTNL
bool "Add extra per-netns mutex inside RTNL"
depends on DEBUG_KERNEL && NET && LOCK_DEBUGGING_SUPPORT
select PROVE_LOCKING
default n
help
rtnl_lock() is being replaced with rtnl_net_lock() that
acquires the global RTNL and a small per-netns RTNL mutex.

During the conversion, rtnl_net_lock() just adds an extra
mutex in every RTNL scope and slows down the operations.

Once the conversion completes, rtnl_lock() will be removed
and rtnetlink will gain per-netns scalability.
6 changes: 6 additions & 0 deletions net/core/net_namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,12 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_
idr_init(&net->netns_ids);
spin_lock_init(&net->nsid_lock);
mutex_init(&net->ipv4.ra_mutex);

#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
mutex_init(&net->rtnl_mutex);
lock_set_cmp_fn(&net->rtnl_mutex, rtnl_net_lock_cmp_fn, NULL);
#endif

preinit_net_sysctl(net);
}

Expand Down
58 changes: 58 additions & 0 deletions net/core/rtnetlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,64 @@ bool lockdep_rtnl_is_held(void)
EXPORT_SYMBOL(lockdep_rtnl_is_held);
#endif /* #ifdef CONFIG_PROVE_LOCKING */

#ifdef CONFIG_DEBUG_NET_SMALL_RTNL
void __rtnl_net_lock(struct net *net)
{
ASSERT_RTNL();

mutex_lock(&net->rtnl_mutex);
}
EXPORT_SYMBOL(__rtnl_net_lock);

void __rtnl_net_unlock(struct net *net)
{
ASSERT_RTNL();

mutex_unlock(&net->rtnl_mutex);
}
EXPORT_SYMBOL(__rtnl_net_unlock);

void rtnl_net_lock(struct net *net)
{
rtnl_lock();
__rtnl_net_lock(net);
}
EXPORT_SYMBOL(rtnl_net_lock);

void rtnl_net_unlock(struct net *net)
{
__rtnl_net_unlock(net);
rtnl_unlock();
}
EXPORT_SYMBOL(rtnl_net_unlock);

static int rtnl_net_cmp_locks(const struct net *net_a, const struct net *net_b)
{
if (net_eq(net_a, net_b))
return 0;

/* always init_net first */
if (net_eq(net_a, &init_net))
return -1;

if (net_eq(net_b, &init_net))
return 1;

/* otherwise lock in ascending order */
return net_a < net_b ? -1 : 1;
}

int rtnl_net_lock_cmp_fn(const struct lockdep_map *a, const struct lockdep_map *b)
{
const struct net *net_a, *net_b;

net_a = container_of(a, struct net, rtnl_mutex.dep_map);
net_b = container_of(b, struct net, rtnl_mutex.dep_map);

return rtnl_net_cmp_locks(net_a, net_b);
}
#endif

static struct rtnl_link __rcu *__rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];

static inline int rtm_msgindex(int msgtype)
Expand Down

0 comments on commit 76aed95

Please sign in to comment.