Skip to content

Commit

Permalink
Merge branch 'inet_diag-remove-three-mutexes-in-diag-dumps'
Browse files Browse the repository at this point in the history
Eric Dumazet says:

====================
inet_diag: remove three mutexes in diag dumps

Surprisingly, inet_diag operations are serialized over a stack
of three mutexes, giving legacy /proc based files an unfair
advantage on modern hosts.

This series removes all of them, making inet_diag operations
(eg iproute2/ss) fully parallel.

1-2) Two first patches are adding data-race annotations
     and can be backported to stable kernels.

3-4) inet_diag_table_mutex can be replaced with RCU protection,
     if we add corresponding protection against module unload.

5-7) sock_diag_table_mutex can be replaced with RCU protection,
     if we add corresponding protection against module unload.

 8)  sock_diag_mutex is removed, as the old bug it was
     working around has been fixed more elegantly.

 9)  inet_diag_dump_icsk() can skip over empty buckets to reduce
     spinlock contention.
====================

Link: https://lore.kernel.org/r/20240122112603.3270097-1-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
  • Loading branch information
Paolo Abeni committed Jan 23, 2024
2 parents 736b554 + 622a08e commit 2121c43
Show file tree
Hide file tree
Showing 17 changed files with 149 additions and 97 deletions.
1 change: 1 addition & 0 deletions include/linux/inet_diag.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
struct inet_hashinfo;

struct inet_diag_handler {
struct module *owner;
void (*dump)(struct sk_buff *skb,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r);
Expand Down
10 changes: 8 additions & 2 deletions include/linux/sock_diag.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ struct nlmsghdr;
struct sock;

struct sock_diag_handler {
struct module *owner;
__u8 family;
int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
int (*get_info)(struct sk_buff *skb, struct sock *sk);
Expand All @@ -22,8 +23,13 @@ struct sock_diag_handler {
int sock_diag_register(const struct sock_diag_handler *h);
void sock_diag_unregister(const struct sock_diag_handler *h);

void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
struct sock_diag_inet_compat {
struct module *owner;
int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh);
};

void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr);
void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr);

u64 __sock_gen_cookie(struct sock *sk);

Expand Down
120 changes: 68 additions & 52 deletions net/core/sock_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,10 @@
#include <linux/inet_diag.h>
#include <linux/sock_diag.h>

static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
static const struct sock_diag_handler __rcu *sock_diag_handlers[AF_MAX];

static struct sock_diag_inet_compat __rcu *inet_rcv_compat;

static struct workqueue_struct *broadcast_wq;

DEFINE_COOKIE(sock_cookie);
Expand Down Expand Up @@ -122,6 +123,24 @@ static size_t sock_diag_nlmsg_size(void)
+ nla_total_size_64bit(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
}

static const struct sock_diag_handler *sock_diag_lock_handler(int family)
{
const struct sock_diag_handler *handler;

rcu_read_lock();
handler = rcu_dereference(sock_diag_handlers[family]);
if (handler && !try_module_get(handler->owner))
handler = NULL;
rcu_read_unlock();

return handler;
}

static void sock_diag_unlock_handler(const struct sock_diag_handler *handler)
{
module_put(handler->owner);
}

static void sock_diag_broadcast_destroy_work(struct work_struct *work)
{
struct broadcast_sk *bsk =
Expand All @@ -138,12 +157,12 @@ static void sock_diag_broadcast_destroy_work(struct work_struct *work)
if (!skb)
goto out;

mutex_lock(&sock_diag_table_mutex);
hndl = sock_diag_handlers[sk->sk_family];
if (hndl && hndl->get_info)
err = hndl->get_info(skb, sk);
mutex_unlock(&sock_diag_table_mutex);

hndl = sock_diag_lock_handler(sk->sk_family);
if (hndl) {
if (hndl->get_info)
err = hndl->get_info(skb, sk);
sock_diag_unlock_handler(hndl);
}
if (!err)
nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group,
GFP_KERNEL);
Expand All @@ -166,51 +185,45 @@ void sock_diag_broadcast_destroy(struct sock *sk)
queue_work(broadcast_wq, &bsk->work);
}

void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
void sock_diag_register_inet_compat(const struct sock_diag_inet_compat *ptr)
{
mutex_lock(&sock_diag_table_mutex);
inet_rcv_compat = fn;
mutex_unlock(&sock_diag_table_mutex);
xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat,
ptr);
}
EXPORT_SYMBOL_GPL(sock_diag_register_inet_compat);

void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
void sock_diag_unregister_inet_compat(const struct sock_diag_inet_compat *ptr)
{
mutex_lock(&sock_diag_table_mutex);
inet_rcv_compat = NULL;
mutex_unlock(&sock_diag_table_mutex);
const struct sock_diag_inet_compat *old;

old = xchg((__force const struct sock_diag_inet_compat **)&inet_rcv_compat,
NULL);
WARN_ON_ONCE(old != ptr);
}
EXPORT_SYMBOL_GPL(sock_diag_unregister_inet_compat);

int sock_diag_register(const struct sock_diag_handler *hndl)
{
int err = 0;
int family = hndl->family;

if (hndl->family >= AF_MAX)
if (family >= AF_MAX)
return -EINVAL;

mutex_lock(&sock_diag_table_mutex);
if (sock_diag_handlers[hndl->family])
err = -EBUSY;
else
sock_diag_handlers[hndl->family] = hndl;
mutex_unlock(&sock_diag_table_mutex);

return err;
return !cmpxchg((const struct sock_diag_handler **)
&sock_diag_handlers[family],
NULL, hndl) ? 0 : -EBUSY;
}
EXPORT_SYMBOL_GPL(sock_diag_register);

void sock_diag_unregister(const struct sock_diag_handler *hnld)
void sock_diag_unregister(const struct sock_diag_handler *hndl)
{
int family = hnld->family;
int family = hndl->family;

if (family >= AF_MAX)
return;

mutex_lock(&sock_diag_table_mutex);
BUG_ON(sock_diag_handlers[family] != hnld);
sock_diag_handlers[family] = NULL;
mutex_unlock(&sock_diag_table_mutex);
xchg((const struct sock_diag_handler **)&sock_diag_handlers[family],
NULL);
}
EXPORT_SYMBOL_GPL(sock_diag_unregister);

Expand All @@ -227,41 +240,48 @@ static int __sock_diag_cmd(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EINVAL;
req->sdiag_family = array_index_nospec(req->sdiag_family, AF_MAX);

if (sock_diag_handlers[req->sdiag_family] == NULL)
if (!rcu_access_pointer(sock_diag_handlers[req->sdiag_family]))
sock_load_diag_module(req->sdiag_family, 0);

mutex_lock(&sock_diag_table_mutex);
hndl = sock_diag_handlers[req->sdiag_family];
hndl = sock_diag_lock_handler(req->sdiag_family);
if (hndl == NULL)
err = -ENOENT;
else if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
return -ENOENT;

if (nlh->nlmsg_type == SOCK_DIAG_BY_FAMILY)
err = hndl->dump(skb, nlh);
else if (nlh->nlmsg_type == SOCK_DESTROY && hndl->destroy)
err = hndl->destroy(skb, nlh);
else
err = -EOPNOTSUPP;
mutex_unlock(&sock_diag_table_mutex);
sock_diag_unlock_handler(hndl);

return err;
}

static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
const struct sock_diag_inet_compat *ptr;
int ret;

switch (nlh->nlmsg_type) {
case TCPDIAG_GETSOCK:
case DCCPDIAG_GETSOCK:
if (inet_rcv_compat == NULL)

if (!rcu_access_pointer(inet_rcv_compat))
sock_load_diag_module(AF_INET, 0);

mutex_lock(&sock_diag_table_mutex);
if (inet_rcv_compat != NULL)
ret = inet_rcv_compat(skb, nlh);
else
ret = -EOPNOTSUPP;
mutex_unlock(&sock_diag_table_mutex);
rcu_read_lock();
ptr = rcu_dereference(inet_rcv_compat);
if (ptr && !try_module_get(ptr->owner))
ptr = NULL;
rcu_read_unlock();

ret = -EOPNOTSUPP;
if (ptr) {
ret = ptr->fn(skb, nlh);
module_put(ptr->owner);
}

return ret;
case SOCK_DIAG_BY_FAMILY:
Expand All @@ -272,26 +292,22 @@ static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
}
}

static DEFINE_MUTEX(sock_diag_mutex);

static void sock_diag_rcv(struct sk_buff *skb)
{
mutex_lock(&sock_diag_mutex);
netlink_rcv_skb(skb, &sock_diag_rcv_msg);
mutex_unlock(&sock_diag_mutex);
}

static int sock_diag_bind(struct net *net, int group)
{
switch (group) {
case SKNLGRP_INET_TCP_DESTROY:
case SKNLGRP_INET_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET])
if (!rcu_access_pointer(sock_diag_handlers[AF_INET]))
sock_load_diag_module(AF_INET, 0);
break;
case SKNLGRP_INET6_TCP_DESTROY:
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
if (!rcu_access_pointer(sock_diag_handlers[AF_INET6]))
sock_load_diag_module(AF_INET6, 0);
break;
}
Expand Down
1 change: 1 addition & 0 deletions net/dccp/diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ static int dccp_diag_dump_one(struct netlink_callback *cb,
}

static const struct inet_diag_handler dccp_diag_handler = {
.owner = THIS_MODULE,
.dump = dccp_diag_dump,
.dump_one = dccp_diag_dump_one,
.idiag_get_info = dccp_diag_get_info,
Expand Down
Loading

0 comments on commit 2121c43

Please sign in to comment.