-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
xsk: add sock_diag interface for AF_XDP
This patch adds the sock_diag interface for querying sockets from user space. Tools like iproute2 ss(8) can use this interface to list open AF_XDP sockets. The user-space ABI is defined in linux/xdp_diag.h and includes netlink request and response structs. The request can query sockets and the response contains socket information about the rings, umems, inode and more. Signed-off-by: Björn Töpel <bjorn.topel@intel.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
- Loading branch information
Björn Töpel
authored and
Daniel Borkmann
committed
Jan 25, 2019
1 parent
50e74c0
commit a36b38a
Showing
6 changed files
with
285 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ | ||
/* | ||
* xdp_diag: interface for query/monitor XDP sockets | ||
* Copyright(c) 2019 Intel Corporation. | ||
*/ | ||
|
||
#ifndef _LINUX_XDP_DIAG_H | ||
#define _LINUX_XDP_DIAG_H | ||
|
||
#include <linux/types.h> | ||
|
||
struct xdp_diag_req { | ||
__u8 sdiag_family; | ||
__u8 sdiag_protocol; | ||
__u16 pad; | ||
__u32 xdiag_ino; | ||
__u32 xdiag_show; | ||
__u32 xdiag_cookie[2]; | ||
}; | ||
|
||
struct xdp_diag_msg { | ||
__u8 xdiag_family; | ||
__u8 xdiag_type; | ||
__u16 pad; | ||
__u32 xdiag_ino; | ||
__u32 xdiag_cookie[2]; | ||
}; | ||
|
||
#define XDP_SHOW_INFO (1 << 0) /* Basic information */ | ||
#define XDP_SHOW_RING_CFG (1 << 1) | ||
#define XDP_SHOW_UMEM (1 << 2) | ||
#define XDP_SHOW_MEMINFO (1 << 3) | ||
|
||
enum { | ||
XDP_DIAG_NONE, | ||
XDP_DIAG_INFO, | ||
XDP_DIAG_UID, | ||
XDP_DIAG_RX_RING, | ||
XDP_DIAG_TX_RING, | ||
XDP_DIAG_UMEM, | ||
XDP_DIAG_UMEM_FILL_RING, | ||
XDP_DIAG_UMEM_COMPLETION_RING, | ||
XDP_DIAG_MEMINFO, | ||
__XDP_DIAG_MAX, | ||
}; | ||
|
||
#define XDP_DIAG_MAX (__XDP_DIAG_MAX - 1) | ||
|
||
struct xdp_diag_info { | ||
__u32 ifindex; | ||
__u32 queue_id; | ||
}; | ||
|
||
struct xdp_diag_ring { | ||
__u32 entries; /*num descs */ | ||
}; | ||
|
||
#define XDP_DU_F_ZEROCOPY (1 << 0) | ||
|
||
struct xdp_diag_umem { | ||
__u64 size; | ||
__u32 id; | ||
__u32 num_pages; | ||
__u32 chunk_size; | ||
__u32 headroom; | ||
__u32 ifindex; | ||
__u32 queue_id; | ||
__u32 flags; | ||
__u32 refs; | ||
}; | ||
|
||
#endif /* _LINUX_XDP_DIAG_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o | ||
obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
/* SPDX-License-Identifier: GPL-2.0 */ | ||
/* Copyright(c) 2019 Intel Corporation. */ | ||
|
||
#ifndef XSK_H_ | ||
#define XSK_H_ | ||
|
||
static inline struct xdp_sock *xdp_sk(struct sock *sk) | ||
{ | ||
return (struct xdp_sock *)sk; | ||
} | ||
|
||
#endif /* XSK_H_ */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
// SPDX-License-Identifier: GPL-2.0 | ||
/* XDP sockets monitoring support | ||
* | ||
* Copyright(c) 2019 Intel Corporation. | ||
* | ||
* Author: Björn Töpel <bjorn.topel@intel.com> | ||
*/ | ||
|
||
#include <linux/module.h> | ||
#include <net/xdp_sock.h> | ||
#include <linux/xdp_diag.h> | ||
#include <linux/sock_diag.h> | ||
|
||
#include "xsk_queue.h" | ||
#include "xsk.h" | ||
|
||
static int xsk_diag_put_info(const struct xdp_sock *xs, struct sk_buff *nlskb) | ||
{ | ||
struct xdp_diag_info di = {}; | ||
|
||
di.ifindex = xs->dev ? xs->dev->ifindex : 0; | ||
di.queue_id = xs->queue_id; | ||
return nla_put(nlskb, XDP_DIAG_INFO, sizeof(di), &di); | ||
} | ||
|
||
static int xsk_diag_put_ring(const struct xsk_queue *queue, int nl_type, | ||
struct sk_buff *nlskb) | ||
{ | ||
struct xdp_diag_ring dr = {}; | ||
|
||
dr.entries = queue->nentries; | ||
return nla_put(nlskb, nl_type, sizeof(dr), &dr); | ||
} | ||
|
||
static int xsk_diag_put_rings_cfg(const struct xdp_sock *xs, | ||
struct sk_buff *nlskb) | ||
{ | ||
int err = 0; | ||
|
||
if (xs->rx) | ||
err = xsk_diag_put_ring(xs->rx, XDP_DIAG_RX_RING, nlskb); | ||
if (!err && xs->tx) | ||
err = xsk_diag_put_ring(xs->tx, XDP_DIAG_TX_RING, nlskb); | ||
return err; | ||
} | ||
|
||
static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) | ||
{ | ||
struct xdp_umem *umem = xs->umem; | ||
struct xdp_diag_umem du = {}; | ||
int err; | ||
|
||
if (!umem) | ||
return 0; | ||
|
||
du.id = umem->id; | ||
du.size = umem->size; | ||
du.num_pages = umem->npgs; | ||
du.chunk_size = (__u32)(~umem->chunk_mask + 1); | ||
du.headroom = umem->headroom; | ||
du.ifindex = umem->dev ? umem->dev->ifindex : 0; | ||
du.queue_id = umem->queue_id; | ||
du.flags = 0; | ||
if (umem->zc) | ||
du.flags |= XDP_DU_F_ZEROCOPY; | ||
du.refs = refcount_read(&umem->users); | ||
|
||
err = nla_put(nlskb, XDP_DIAG_UMEM, sizeof(du), &du); | ||
|
||
if (!err && umem->fq) | ||
err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_FILL_RING, nlskb); | ||
if (!err && umem->cq) { | ||
err = xsk_diag_put_ring(xs->tx, XDP_DIAG_UMEM_COMPLETION_RING, | ||
nlskb); | ||
} | ||
return err; | ||
} | ||
|
||
static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, | ||
struct xdp_diag_req *req, | ||
struct user_namespace *user_ns, | ||
u32 portid, u32 seq, u32 flags, int sk_ino) | ||
{ | ||
struct xdp_sock *xs = xdp_sk(sk); | ||
struct xdp_diag_msg *msg; | ||
struct nlmsghdr *nlh; | ||
|
||
nlh = nlmsg_put(nlskb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*msg), | ||
flags); | ||
if (!nlh) | ||
return -EMSGSIZE; | ||
|
||
msg = nlmsg_data(nlh); | ||
memset(msg, 0, sizeof(*msg)); | ||
msg->xdiag_family = AF_XDP; | ||
msg->xdiag_type = sk->sk_type; | ||
msg->xdiag_ino = sk_ino; | ||
sock_diag_save_cookie(sk, msg->xdiag_cookie); | ||
|
||
if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb)) | ||
goto out_nlmsg_trim; | ||
|
||
if ((req->xdiag_show & XDP_SHOW_INFO) && | ||
nla_put_u32(nlskb, XDP_DIAG_UID, | ||
from_kuid_munged(user_ns, sock_i_uid(sk)))) | ||
goto out_nlmsg_trim; | ||
|
||
if ((req->xdiag_show & XDP_SHOW_RING_CFG) && | ||
xsk_diag_put_rings_cfg(xs, nlskb)) | ||
goto out_nlmsg_trim; | ||
|
||
if ((req->xdiag_show & XDP_SHOW_UMEM) && | ||
xsk_diag_put_umem(xs, nlskb)) | ||
goto out_nlmsg_trim; | ||
|
||
if ((req->xdiag_show & XDP_SHOW_MEMINFO) && | ||
sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO)) | ||
goto out_nlmsg_trim; | ||
|
||
nlmsg_end(nlskb, nlh); | ||
return 0; | ||
|
||
out_nlmsg_trim: | ||
nlmsg_cancel(nlskb, nlh); | ||
return -EMSGSIZE; | ||
} | ||
|
||
static int xsk_diag_dump(struct sk_buff *nlskb, struct netlink_callback *cb) | ||
{ | ||
struct xdp_diag_req *req = nlmsg_data(cb->nlh); | ||
struct net *net = sock_net(nlskb->sk); | ||
int num = 0, s_num = cb->args[0]; | ||
struct sock *sk; | ||
|
||
mutex_lock(&net->xdp.lock); | ||
|
||
sk_for_each(sk, &net->xdp.list) { | ||
if (!net_eq(sock_net(sk), net)) | ||
continue; | ||
if (num++ < s_num) | ||
continue; | ||
|
||
if (xsk_diag_fill(sk, nlskb, req, | ||
sk_user_ns(NETLINK_CB(cb->skb).sk), | ||
NETLINK_CB(cb->skb).portid, | ||
cb->nlh->nlmsg_seq, NLM_F_MULTI, | ||
sock_i_ino(sk)) < 0) { | ||
num--; | ||
break; | ||
} | ||
} | ||
|
||
mutex_unlock(&net->xdp.lock); | ||
cb->args[0] = num; | ||
return nlskb->len; | ||
} | ||
|
||
static int xsk_diag_handler_dump(struct sk_buff *nlskb, struct nlmsghdr *hdr) | ||
{ | ||
struct netlink_dump_control c = { .dump = xsk_diag_dump }; | ||
int hdrlen = sizeof(struct xdp_diag_req); | ||
struct net *net = sock_net(nlskb->sk); | ||
|
||
if (nlmsg_len(hdr) < hdrlen) | ||
return -EINVAL; | ||
|
||
if (!(hdr->nlmsg_flags & NLM_F_DUMP)) | ||
return -EOPNOTSUPP; | ||
|
||
return netlink_dump_start(net->diag_nlsk, nlskb, hdr, &c); | ||
} | ||
|
||
static const struct sock_diag_handler xsk_diag_handler = { | ||
.family = AF_XDP, | ||
.dump = xsk_diag_handler_dump, | ||
}; | ||
|
||
static int __init xsk_diag_init(void) | ||
{ | ||
return sock_diag_register(&xsk_diag_handler); | ||
} | ||
|
||
static void __exit xsk_diag_exit(void) | ||
{ | ||
sock_diag_unregister(&xsk_diag_handler); | ||
} | ||
|
||
module_init(xsk_diag_init); | ||
module_exit(xsk_diag_exit); | ||
MODULE_LICENSE("GPL"); | ||
MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, AF_XDP); |