Skip to content

Commit

Permalink
Merge branch 'sock_diag_destruction_events'
Browse files Browse the repository at this point in the history
Craig Gallek says:

====================
Socket destruction events via netlink sock_diag

This series extends the netlink sock_diag interface to broadcast
socket information as they are being destroyed.  The current
interface is poll based and can not be used to retreive information
about sockets that are destroyed between poll intervals.

Only inet sockets are broadcast in this implementation, but other
families could easily be added as needed in the future.

If this patch set is accepted, a follow-up patch to the ss utility
in the iproute2 suite will also be submitted.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jun 16, 2015
2 parents 916035d + 35ac838 commit 24029a3
Show file tree
Hide file tree
Showing 12 changed files with 209 additions and 7 deletions.
1 change: 1 addition & 0 deletions include/linux/inet_diag.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ struct inet_diag_handler {
struct inet_diag_msg *r,
void *info);
__u16 idiag_type;
__u16 idiag_info_size;
};

struct inet_connection_sock;
Expand Down
42 changes: 42 additions & 0 deletions include/linux/sock_diag.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#ifndef __SOCK_DIAG_H__
#define __SOCK_DIAG_H__

#include <linux/netlink.h>
#include <linux/user_namespace.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <uapi/linux/sock_diag.h>

struct sk_buff;
Expand All @@ -11,6 +14,7 @@ struct sock;
struct sock_diag_handler {
__u8 family;
int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
int (*get_info)(struct sk_buff *skb, struct sock *sk);
};

int sock_diag_register(const struct sock_diag_handler *h);
Expand All @@ -26,4 +30,42 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
struct sk_buff *skb, int attrtype);

static inline
enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
{
switch (sk->sk_family) {
case AF_INET:
switch (sk->sk_protocol) {
case IPPROTO_TCP:
return SKNLGRP_INET_TCP_DESTROY;
case IPPROTO_UDP:
return SKNLGRP_INET_UDP_DESTROY;
default:
return SKNLGRP_NONE;
}
case AF_INET6:
switch (sk->sk_protocol) {
case IPPROTO_TCP:
return SKNLGRP_INET6_TCP_DESTROY;
case IPPROTO_UDP:
return SKNLGRP_INET6_UDP_DESTROY;
default:
return SKNLGRP_NONE;
}
default:
return SKNLGRP_NONE;
}
}

static inline
bool sock_diag_has_destroy_listeners(const struct sock *sk)
{
const struct net *n = sock_net(sk);
const enum sknetlink_groups group = sock_diag_destroy_group(sk);

return group != SKNLGRP_NONE && n->diag_nlsk &&
netlink_has_listeners(n->diag_nlsk, group);
}
void sock_diag_broadcast_destroy(struct sock *sk);

#endif
1 change: 1 addition & 0 deletions include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -1518,6 +1518,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow)
struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
struct proto *prot, int kern);
void sk_free(struct sock *sk);
void sk_destruct(struct sock *sk);
struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);

struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
Expand Down
3 changes: 2 additions & 1 deletion include/uapi/linux/inet_diag.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,9 +111,10 @@ enum {
INET_DIAG_SKMEMINFO,
INET_DIAG_SHUTDOWN,
INET_DIAG_DCTCPINFO,
INET_DIAG_PROTOCOL, /* response attribute only */
};

#define INET_DIAG_MAX INET_DIAG_DCTCPINFO
#define INET_DIAG_MAX INET_DIAG_PROTOCOL

/* INET_DIAG_MEM */

Expand Down
10 changes: 10 additions & 0 deletions include/uapi/linux/sock_diag.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,14 @@ enum {
SK_MEMINFO_VARS,
};

enum sknetlink_groups {
SKNLGRP_NONE,
SKNLGRP_INET_TCP_DESTROY,
SKNLGRP_INET_UDP_DESTROY,
SKNLGRP_INET6_TCP_DESTROY,
SKNLGRP_INET6_UDP_DESTROY,
__SKNLGRP_MAX,
};
#define SKNLGRP_MAX (__SKNLGRP_MAX - 1)

#endif /* _UAPI__SOCK_DIAG_H__ */
11 changes: 10 additions & 1 deletion net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@
#include <linux/ipsec.h>
#include <net/cls_cgroup.h>
#include <net/netprio_cgroup.h>
#include <linux/sock_diag.h>

#include <linux/filter.h>

Expand Down Expand Up @@ -1423,7 +1424,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
}
EXPORT_SYMBOL(sk_alloc);

static void __sk_free(struct sock *sk)
void sk_destruct(struct sock *sk)
{
struct sk_filter *filter;

Expand Down Expand Up @@ -1451,6 +1452,14 @@ static void __sk_free(struct sock *sk)
sk_prot_free(sk->sk_prot_creator, sk);
}

static void __sk_free(struct sock *sk)
{
if (unlikely(sock_diag_has_destroy_listeners(sk)))
sock_diag_broadcast_destroy(sk);
else
sk_destruct(sk);
}

void sk_free(struct sock *sk)
{
/*
Expand Down
85 changes: 85 additions & 0 deletions net/core/sock_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
#include <net/net_namespace.h>
#include <linux/module.h>
#include <net/sock.h>
#include <linux/kernel.h>
#include <linux/tcp.h>
#include <linux/workqueue.h>

#include <linux/inet_diag.h>
#include <linux/sock_diag.h>

static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
static DEFINE_MUTEX(sock_diag_table_mutex);
static struct workqueue_struct *broadcast_wq;

static u64 sock_gen_cookie(struct sock *sk)
{
Expand Down Expand Up @@ -101,6 +105,62 @@ int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
}
EXPORT_SYMBOL(sock_diag_put_filterinfo);

struct broadcast_sk {
struct sock *sk;
struct work_struct work;
};

static size_t sock_diag_nlmsg_size(void)
{
return NLMSG_ALIGN(sizeof(struct inet_diag_msg)
+ nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */
+ nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
}

static void sock_diag_broadcast_destroy_work(struct work_struct *work)
{
struct broadcast_sk *bsk =
container_of(work, struct broadcast_sk, work);
struct sock *sk = bsk->sk;
const struct sock_diag_handler *hndl;
struct sk_buff *skb;
const enum sknetlink_groups group = sock_diag_destroy_group(sk);
int err = -1;

WARN_ON(group == SKNLGRP_NONE);

skb = nlmsg_new(sock_diag_nlmsg_size(), GFP_KERNEL);
if (!skb)
goto out;

mutex_lock(&sock_diag_table_mutex);
hndl = sock_diag_handlers[sk->sk_family];
if (hndl && hndl->get_info)
err = hndl->get_info(skb, sk);
mutex_unlock(&sock_diag_table_mutex);

if (!err)
nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group,
GFP_KERNEL);
else
kfree_skb(skb);
out:
sk_destruct(sk);
kfree(bsk);
}

void sock_diag_broadcast_destroy(struct sock *sk)
{
/* Note, this function is often called from an interrupt context. */
struct broadcast_sk *bsk =
kmalloc(sizeof(struct broadcast_sk), GFP_ATOMIC);
if (!bsk)
return sk_destruct(sk);
bsk->sk = sk;
INIT_WORK(&bsk->work, sock_diag_broadcast_destroy_work);
queue_work(broadcast_wq, &bsk->work);
}

void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
{
mutex_lock(&sock_diag_table_mutex);
Expand Down Expand Up @@ -211,10 +271,32 @@ static void sock_diag_rcv(struct sk_buff *skb)
mutex_unlock(&sock_diag_mutex);
}

static int sock_diag_bind(struct net *net, int group)
{
switch (group) {
case SKNLGRP_INET_TCP_DESTROY:
case SKNLGRP_INET_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
NETLINK_SOCK_DIAG, AF_INET);
break;
case SKNLGRP_INET6_TCP_DESTROY:
case SKNLGRP_INET6_UDP_DESTROY:
if (!sock_diag_handlers[AF_INET6])
request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
NETLINK_SOCK_DIAG, AF_INET);
break;
}
return 0;
}

static int __net_init diag_net_init(struct net *net)
{
struct netlink_kernel_cfg cfg = {
.groups = SKNLGRP_MAX,
.input = sock_diag_rcv,
.bind = sock_diag_bind,
.flags = NL_CFG_F_NONROOT_RECV,
};

net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg);
Expand All @@ -234,12 +316,15 @@ static struct pernet_operations diag_net_ops = {

static int __init sock_diag_init(void)
{
broadcast_wq = alloc_workqueue("sock_diag_events", 0, 0);
BUG_ON(!broadcast_wq);
return register_pernet_subsys(&diag_net_ops);
}

static void __exit sock_diag_exit(void)
{
unregister_pernet_subsys(&diag_net_ops);
destroy_workqueue(broadcast_wq);
}

module_init(sock_diag_init);
Expand Down
1 change: 1 addition & 0 deletions net/dccp/diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ static const struct inet_diag_handler dccp_diag_handler = {
.dump_one = dccp_diag_dump_one,
.idiag_get_info = dccp_diag_get_info,
.idiag_type = IPPROTO_DCCP,
.idiag_info_size = sizeof(struct tcp_info),
};

static int __init dccp_diag_init(void)
Expand Down
50 changes: 48 additions & 2 deletions net/ipv4/inet_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
}
#undef EXPIRES_IN_MS

if (ext & (1 << (INET_DIAG_INFO - 1))) {
if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) {
attr = nla_reserve(skb, INET_DIAG_INFO,
sizeof(struct tcp_info));
handler->idiag_info_size);
if (!attr)
goto errout;

Expand Down Expand Up @@ -1078,14 +1078,60 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
return inet_diag_get_exact(skb, h, nlmsg_data(h));
}

static
int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
{
const struct inet_diag_handler *handler;
struct nlmsghdr *nlh;
struct nlattr *attr;
struct inet_diag_msg *r;
void *info = NULL;
int err = 0;

nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0);
if (!nlh)
return -ENOMEM;

r = nlmsg_data(nlh);
memset(r, 0, sizeof(*r));
inet_diag_msg_common_fill(r, sk);
r->idiag_state = sk->sk_state;

if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) {
nlmsg_cancel(skb, nlh);
return err;
}

handler = inet_diag_lock_handler(sk->sk_protocol);
if (IS_ERR(handler)) {
inet_diag_unlock_handler(handler);
nlmsg_cancel(skb, nlh);
return PTR_ERR(handler);
}

attr = handler->idiag_info_size
? nla_reserve(skb, INET_DIAG_INFO, handler->idiag_info_size)
: NULL;
if (attr)
info = nla_data(attr);

handler->idiag_get_info(sk, r, info);
inet_diag_unlock_handler(handler);

nlmsg_end(skb, nlh);
return 0;
}

static const struct sock_diag_handler inet_diag_handler = {
.family = AF_INET,
.dump = inet_diag_handler_dump,
.get_info = inet_diag_handler_get_info,
};

static const struct sock_diag_handler inet6_diag_handler = {
.family = AF_INET6,
.dump = inet_diag_handler_dump,
.get_info = inet_diag_handler_get_info,
};

int inet_diag_register(const struct inet_diag_handler *h)
Expand Down
4 changes: 3 additions & 1 deletion net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2624,13 +2624,15 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
/* Return information about state of tcp endpoint in API format. */
void tcp_get_info(struct sock *sk, struct tcp_info *info)
{
const struct tcp_sock *tp = tcp_sk(sk);
const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
unsigned int start;
u32 rate;

memset(info, 0, sizeof(*info));
if (sk->sk_type != SOCK_STREAM)
return;

info->tcpi_state = sk->sk_state;
info->tcpi_ca_state = icsk->icsk_ca_state;
Expand Down
6 changes: 4 additions & 2 deletions net/ipv4/tcp_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@
static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
void *_info)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_info *info = _info;

if (sk->sk_state == TCP_LISTEN) {
r->idiag_rqueue = sk->sk_ack_backlog;
r->idiag_wqueue = sk->sk_max_ack_backlog;
} else {
} else if (sk->sk_type == SOCK_STREAM) {
const struct tcp_sock *tp = tcp_sk(sk);

r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
r->idiag_wqueue = tp->write_seq - tp->snd_una;
}
Expand All @@ -50,6 +51,7 @@ static const struct inet_diag_handler tcp_diag_handler = {
.dump_one = tcp_diag_dump_one,
.idiag_get_info = tcp_diag_get_info,
.idiag_type = IPPROTO_TCP,
.idiag_info_size = sizeof(struct tcp_info),
};

static int __init tcp_diag_init(void)
Expand Down
Loading

0 comments on commit 24029a3

Please sign in to comment.