Skip to content

Commit

Permalink
Merge branch 'Replacing-net_mutex-with-rw_semaphore'
Browse files Browse the repository at this point in the history
Kirill Tkhai says:

====================
Replacing net_mutex with rw_semaphore

this is the third version of the patchset introducing net_sem
instead of net_mutex. The patchset adds net_sem in addition
to net_mutex and allows pernet_operations to be "async". This
flag means, the pernet_operations methods are safe to be
executed with any other pernet_operations (un)initializing
another net.

If there are only async pernet_operations in the system,
net_mutex is not used either for setup_net() or for cleanup_net().

The pernet_operations converted in this patchset allow
to create minimal .config to have network working, and
the changes improve the performance like you may see
below:

    %for i in {1..10000}; do unshare -n bash -c exit; done

    *before*
    real 1m40,377s
    user 0m9,672s
    sys 0m19,928s

    *after*
    real 0m17,007s
    user 0m5,311s
    sys 0m11,779

    (5.8 times faster)

In the future, when all pernet_operations become async,
we'll just remove this "async" field tree-wide.

All the new logic is concentrated in patches [1-5/32].
The rest of patches converts specific operations:
review, rationale of they can be converted, and setting
of async flag.

Kirill

v3: Improved patches descriptions. Added comment into [5/32].
Added [32/32] converting netlink_tap_net_ops (new pernet operations
introduced in 2018).

v2: Single patch -> patchset with rationale of every conversion
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Feb 13, 2018
2 parents cf19e5e + b86b47a commit 885842d
Show file tree
Hide file tree
Showing 45 changed files with 116 additions and 41 deletions.
1 change: 1 addition & 0 deletions drivers/net/loopback.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,4 +230,5 @@ static __net_init int loopback_net_init(struct net *net)
/* Registered in net/core/dev.c */
struct pernet_operations __net_initdata loopback_net_ops = {
.init = loopback_net_init,
.async = true,
};
1 change: 1 addition & 0 deletions fs/proc/proc_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ static __net_exit void proc_net_ns_exit(struct net *net)
static struct pernet_operations __net_initdata proc_net_ns_ops = {
.init = proc_net_ns_init,
.exit = proc_net_ns_exit,
.async = true,
};

int __init proc_net_init(void)
Expand Down
1 change: 1 addition & 0 deletions include/linux/rtnetlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ extern int rtnl_is_locked(void);

extern wait_queue_head_t netdev_unregistering_wq;
extern struct mutex net_mutex;
extern struct rw_semaphore net_sem;

#ifdef CONFIG_PROVE_LOCKING
extern bool lockdep_rtnl_is_held(void);
Expand Down
6 changes: 6 additions & 0 deletions include/net/net_namespace.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,12 @@ struct pernet_operations {
void (*exit_batch)(struct list_head *net_exit_list);
unsigned int *id;
size_t size;
/*
* Indicates above methods are allowed to be executed in parallel
* with methods of any other pernet_operations, i.e. they are not
* need synchronization via net_mutex.
*/
bool async;
};

/*
Expand Down
1 change: 1 addition & 0 deletions kernel/audit.c
Original file line number Diff line number Diff line change
Expand Up @@ -1526,6 +1526,7 @@ static struct pernet_operations audit_net_ops __net_initdata = {
.exit = audit_net_exit,
.id = &audit_net_id,
.size = sizeof(struct audit_net),
.async = true,
};

/* Initialize audit support at boot time. */
Expand Down
1 change: 1 addition & 0 deletions lib/kobject_uevent.c
Original file line number Diff line number Diff line change
Expand Up @@ -650,6 +650,7 @@ static void uevent_net_exit(struct net *net)
static struct pernet_operations uevent_net_ops = {
.init = uevent_net_init,
.exit = uevent_net_exit,
.async = true,
};

static int __init kobject_uevent_init(void)
Expand Down
2 changes: 2 additions & 0 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -8833,6 +8833,7 @@ static void __net_exit netdev_exit(struct net *net)
static struct pernet_operations __net_initdata netdev_net_ops = {
.init = netdev_init,
.exit = netdev_exit,
.async = true,
};

static void __net_exit default_device_exit(struct net *net)
Expand Down Expand Up @@ -8933,6 +8934,7 @@ static void __net_exit default_device_exit_batch(struct list_head *net_list)
static struct pernet_operations __net_initdata default_device_ops = {
.exit = default_device_exit,
.exit_batch = default_device_exit_batch,
.async = true,
};

/*
Expand Down
1 change: 1 addition & 0 deletions net/core/fib_notifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ static void __net_exit fib_notifier_net_exit(struct net *net)
static struct pernet_operations fib_notifier_net_ops = {
.init = fib_notifier_net_init,
.exit = fib_notifier_net_exit,
.async = true,
};

static int __init fib_notifier_init(void)
Expand Down
1 change: 1 addition & 0 deletions net/core/fib_rules.c
Original file line number Diff line number Diff line change
Expand Up @@ -1030,6 +1030,7 @@ static void __net_exit fib_rules_net_exit(struct net *net)
static struct pernet_operations fib_rules_net_ops = {
.init = fib_rules_net_init,
.exit = fib_rules_net_exit,
.async = true,
};

static int __init fib_rules_init(void)
Expand Down
2 changes: 2 additions & 0 deletions net/core/net-procfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ static void __net_exit dev_proc_net_exit(struct net *net)
static struct pernet_operations __net_initdata dev_proc_ops = {
.init = dev_proc_net_init,
.exit = dev_proc_net_exit,
.async = true,
};

static int dev_mc_seq_show(struct seq_file *seq, void *v)
Expand Down Expand Up @@ -405,6 +406,7 @@ static void __net_exit dev_mc_net_exit(struct net *net)
static struct pernet_operations __net_initdata dev_mc_net_ops = {
.init = dev_mc_net_init,
.exit = dev_mc_net_exit,
.async = true,
};

int __init dev_proc_init(void)
Expand Down
95 changes: 56 additions & 39 deletions net/core/net_namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
/* Used only if there are !async pernet_operations registered */
DEFINE_MUTEX(net_mutex);

LIST_HEAD(net_namespace_list);
Expand All @@ -41,6 +42,12 @@ struct net init_net = {
EXPORT_SYMBOL(init_net);

static bool init_net_initialized;
static unsigned nr_sync_pernet_ops;
/*
* net_sem: protects: pernet_list, net_generic_ids, nr_sync_pernet_ops,
* init_net_initialized and first_device pointer.
*/
DECLARE_RWSEM(net_sem);

#define MIN_PERNET_OPS_ID \
((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
Expand All @@ -65,11 +72,10 @@ static int net_assign_generic(struct net *net, unsigned int id, void *data)
{
struct net_generic *ng, *old_ng;

BUG_ON(!mutex_is_locked(&net_mutex));
BUG_ON(id < MIN_PERNET_OPS_ID);

old_ng = rcu_dereference_protected(net->gen,
lockdep_is_held(&net_mutex));
lockdep_is_held(&net_sem));
if (old_ng->s.len > id) {
old_ng->ptr[id] = data;
return 0;
Expand Down Expand Up @@ -286,7 +292,7 @@ struct net *get_net_ns_by_id(struct net *net, int id)
*/
static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
{
/* Must be called with net_mutex held */
/* Must be called with net_sem held */
const struct pernet_operations *ops, *saved_ops;
int error = 0;
LIST_HEAD(net_exit_list);
Expand All @@ -303,6 +309,9 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
if (error < 0)
goto out_undo;
}
rtnl_lock();
list_add_tail_rcu(&net->list, &net_namespace_list);
rtnl_unlock();
out:
return error;

Expand Down Expand Up @@ -331,6 +340,7 @@ static int __net_init net_defaults_init_net(struct net *net)

static struct pernet_operations net_defaults_ops = {
.init = net_defaults_init_net,
.async = true,
};

static __init int net_defaults_init(void)
Expand Down Expand Up @@ -408,32 +418,32 @@ struct net *copy_net_ns(unsigned long flags,

net = net_alloc();
if (!net) {
dec_net_namespaces(ucounts);
return ERR_PTR(-ENOMEM);
rv = -ENOMEM;
goto dec_ucounts;
}

refcount_set(&net->passive, 1);
net->ucounts = ucounts;
get_user_ns(user_ns);

rv = mutex_lock_killable(&net_mutex);
if (rv < 0) {
net_free(net);
dec_net_namespaces(ucounts);
put_user_ns(user_ns);
return ERR_PTR(rv);
rv = down_read_killable(&net_sem);
if (rv < 0)
goto put_userns;
if (nr_sync_pernet_ops) {
rv = mutex_lock_killable(&net_mutex);
if (rv < 0)
goto up_read;
}

net->ucounts = ucounts;
rv = setup_net(net, user_ns);
if (rv == 0) {
rtnl_lock();
list_add_tail_rcu(&net->list, &net_namespace_list);
rtnl_unlock();
}
mutex_unlock(&net_mutex);
if (nr_sync_pernet_ops)
mutex_unlock(&net_mutex);
up_read:
up_read(&net_sem);
if (rv < 0) {
dec_net_namespaces(ucounts);
put_userns:
put_user_ns(user_ns);
net_drop_ns(net);
dec_ucounts:
dec_net_namespaces(ucounts);
return ERR_PTR(rv);
}
return net;
Expand Down Expand Up @@ -481,7 +491,9 @@ static void cleanup_net(struct work_struct *work)
list_replace_init(&cleanup_list, &net_kill_list);
spin_unlock_irq(&cleanup_list_lock);

mutex_lock(&net_mutex);
down_read(&net_sem);
if (nr_sync_pernet_ops)
mutex_lock(&net_mutex);

/* Don't let anyone else find us. */
rtnl_lock();
Expand Down Expand Up @@ -516,11 +528,14 @@ static void cleanup_net(struct work_struct *work)
list_for_each_entry_reverse(ops, &pernet_list, list)
ops_exit_list(ops, &net_exit_list);

if (nr_sync_pernet_ops)
mutex_unlock(&net_mutex);

/* Free the net generic variables */
list_for_each_entry_reverse(ops, &pernet_list, list)
ops_free_list(ops, &net_exit_list);

mutex_unlock(&net_mutex);
up_read(&net_sem);

/* Ensure there are no outstanding rcu callbacks using this
* network namespace.
Expand All @@ -547,8 +562,10 @@ static void cleanup_net(struct work_struct *work)
*/
void net_ns_barrier(void)
{
down_write(&net_sem);
mutex_lock(&net_mutex);
mutex_unlock(&net_mutex);
up_write(&net_sem);
}
EXPORT_SYMBOL(net_ns_barrier);

Expand Down Expand Up @@ -633,6 +650,7 @@ static __net_exit void net_ns_net_exit(struct net *net)
static struct pernet_operations __net_initdata net_ns_ops = {
.init = net_ns_net_init,
.exit = net_ns_net_exit,
.async = true,
};

static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
Expand Down Expand Up @@ -875,17 +893,12 @@ static int __init net_ns_init(void)

rcu_assign_pointer(init_net.gen, ng);

mutex_lock(&net_mutex);
down_write(&net_sem);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");

init_net_initialized = true;

rtnl_lock();
list_add_tail_rcu(&init_net.list, &net_namespace_list);
rtnl_unlock();

mutex_unlock(&net_mutex);
up_write(&net_sem);

register_pernet_subsys(&net_ns_ops);

Expand Down Expand Up @@ -989,14 +1002,18 @@ static int register_pernet_operations(struct list_head *list,
rcu_barrier();
if (ops->id)
ida_remove(&net_generic_ids, *ops->id);
} else if (!ops->async) {
pr_info_once("Pernet operations %ps are sync.\n", ops);
nr_sync_pernet_ops++;
}

return error;
}

static void unregister_pernet_operations(struct pernet_operations *ops)
{

if (!ops->async)
BUG_ON(nr_sync_pernet_ops-- == 0);
__unregister_pernet_operations(ops);
rcu_barrier();
if (ops->id)
Expand Down Expand Up @@ -1025,9 +1042,9 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
int register_pernet_subsys(struct pernet_operations *ops)
{
int error;
mutex_lock(&net_mutex);
down_write(&net_sem);
error = register_pernet_operations(first_device, ops);
mutex_unlock(&net_mutex);
up_write(&net_sem);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);
Expand All @@ -1043,9 +1060,9 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
*/
void unregister_pernet_subsys(struct pernet_operations *ops)
{
mutex_lock(&net_mutex);
down_write(&net_sem);
unregister_pernet_operations(ops);
mutex_unlock(&net_mutex);
up_write(&net_sem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);

Expand All @@ -1071,11 +1088,11 @@ EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
int register_pernet_device(struct pernet_operations *ops)
{
int error;
mutex_lock(&net_mutex);
down_write(&net_sem);
error = register_pernet_operations(&pernet_list, ops);
if (!error && (first_device == &pernet_list))
first_device = &ops->list;
mutex_unlock(&net_mutex);
up_write(&net_sem);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);
Expand All @@ -1091,11 +1108,11 @@ EXPORT_SYMBOL_GPL(register_pernet_device);
*/
void unregister_pernet_device(struct pernet_operations *ops)
{
mutex_lock(&net_mutex);
down_write(&net_sem);
if (&ops->list == first_device)
first_device = first_device->next;
unregister_pernet_operations(ops);
mutex_unlock(&net_mutex);
up_write(&net_sem);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);

Expand Down
5 changes: 3 additions & 2 deletions net/core/rtnetlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -454,11 +454,11 @@ static void rtnl_lock_unregistering_all(void)
void rtnl_link_unregister(struct rtnl_link_ops *ops)
{
/* Close the race with cleanup_net() */
mutex_lock(&net_mutex);
down_write(&net_sem);
rtnl_lock_unregistering_all();
__rtnl_link_unregister(ops);
rtnl_unlock();
mutex_unlock(&net_mutex);
up_write(&net_sem);
}
EXPORT_SYMBOL_GPL(rtnl_link_unregister);

Expand Down Expand Up @@ -4724,6 +4724,7 @@ static void __net_exit rtnetlink_net_exit(struct net *net)
static struct pernet_operations rtnetlink_net_ops = {
.init = rtnetlink_net_init,
.exit = rtnetlink_net_exit,
.async = true,
};

void __init rtnetlink_init(void)
Expand Down
2 changes: 2 additions & 0 deletions net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -3112,6 +3112,7 @@ static void __net_exit sock_inuse_exit_net(struct net *net)
static struct pernet_operations net_inuse_ops = {
.init = sock_inuse_init_net,
.exit = sock_inuse_exit_net,
.async = true,
};

static __init int net_inuse_init(void)
Expand Down Expand Up @@ -3385,6 +3386,7 @@ static __net_exit void proto_exit_net(struct net *net)
static __net_initdata struct pernet_operations proto_net_ops = {
.init = proto_init_net,
.exit = proto_exit_net,
.async = true,
};

static int __init proto_init(void)
Expand Down
1 change: 1 addition & 0 deletions net/core/sock_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,7 @@ static void __net_exit diag_net_exit(struct net *net)
static struct pernet_operations diag_net_ops = {
.init = diag_net_init,
.exit = diag_net_exit,
.async = true,
};

static int __init sock_diag_init(void)
Expand Down
Loading

0 comments on commit 885842d

Please sign in to comment.