diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 288923e965ae..85402a2e289c 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -276,6 +276,9 @@ attribute-sets: doc: The timeout, in nanoseconds, of how long to suspend irq processing, if event polling finds events type: uint + - + name: xsk-info + attributes: [] - name: queue attributes: @@ -294,6 +297,9 @@ attribute-sets: - name: type doc: Queue type as rx, tx. Each queue type defines a separate ID space. + XDP TX queues allocated in the kernel are not linked to NAPIs and + thus not listed. AF_XDP queues will have more information set in + the xsk attribute. type: u32 enum: queue-type - @@ -309,7 +315,11 @@ attribute-sets: doc: io_uring memory provider information. type: nest nested-attributes: io-uring-provider-info - + - + name: xsk + doc: XSK information for this queue, if any. + type: nest + nested-attributes: xsk-info - name: qstats doc: | @@ -652,6 +662,7 @@ operations: - ifindex - dmabuf - io-uring + - xsk dump: request: attributes: diff --git a/include/net/netlink.h b/include/net/netlink.h index e015ffbed819..29e0db940382 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -118,6 +118,7 @@ * nla_nest_start(skb, type) start a nested attribute * nla_nest_end(skb, nla) finalize a nested attribute * nla_nest_cancel(skb, nla) cancel nested attribute construction + * nla_put_empty_nest(skb, type) create an empty nest * * Attribute Length Calculations: * nla_attr_size(payload) length of attribute w/o padding @@ -2240,6 +2241,20 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) nlmsg_trim(skb, start); } +/** + * nla_put_empty_nest - Create an empty nest + * @skb: socket buffer the message is stored in + * @attrtype: attribute type of the container + * + * This function is a helper for creating empty nests. + * + * Returns: 0 when successful or -EMSGSIZE on failure. + */ +static inline int nla_put_empty_nest(struct sk_buff *skb, int attrtype) +{ + return nla_nest_start(skb, attrtype) ? 0 : -EMSGSIZE; +} + /** * __nla_validate_nested - Validate a stream of nested attributes * @start: container attribute diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 6c6ee183802d..4e82f3871473 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -136,6 +136,11 @@ enum { NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) }; +enum { + __NETDEV_A_XSK_INFO_MAX, + NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1) +}; + enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, @@ -143,6 +148,7 @@ enum { NETDEV_A_QUEUE_NAPI_ID, NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, + NETDEV_A_QUEUE_XSK, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 22ac51356d9f..c92fba65b20d 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -400,11 +400,23 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, if (params->mp_ops && params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) goto nla_put_failure; +#ifdef CONFIG_XDP_SOCKETS + if (rxq->pool) + if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) + goto nla_put_failure; +#endif + break; case NETDEV_QUEUE_TYPE_TX: txq = netdev_get_tx_queue(netdev, q_idx); if (nla_put_napi_id(rsp, txq->napi)) goto nla_put_failure; +#ifdef CONFIG_XDP_SOCKETS + if (txq->pool) + if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) + goto nla_put_failure; +#endif + break; } genlmsg_end(rsp, hdr); diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 6c6ee183802d..4e82f3871473 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -136,6 +136,11 @@ enum { NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) }; +enum { + __NETDEV_A_XSK_INFO_MAX, + NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1) +}; + enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, @@ -143,6 +148,7 @@ enum { NETDEV_A_QUEUE_NAPI_ID, NETDEV_A_QUEUE_DMABUF, NETDEV_A_QUEUE_IO_URING, + NETDEV_A_QUEUE_XSK, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore new file mode 100644 index 000000000000..ec746f374e85 --- /dev/null +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +xdp_helper diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 28b6d47f812d..0c95bd944d56 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -1,10 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 +CFLAGS += $(KHDR_INCLUDES) TEST_INCLUDES := $(wildcard lib/py/*.py) \ $(wildcard lib/sh/*.sh) \ ../../net/net_helper.sh \ ../../net/lib.sh \ +TEST_GEN_FILES := xdp_helper + TEST_PROGS := \ netcons_basic.sh \ netcons_fragmented_msg.sh \ diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index a2d8af60876d..f27172ddee0a 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -4,3 +4,4 @@ CONFIG_CONFIGFS_FS=y CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETCONSOLE_EXTENDED_LOG=y +CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py index 38303da957ee..5fdfebc6415f 100755 --- a/tools/testing/selftests/drivers/net/queues.py +++ b/tools/testing/selftests/drivers/net/queues.py @@ -2,13 +2,16 @@ # SPDX-License-Identifier: GPL-2.0 from lib.py import ksft_disruptive, ksft_exit, ksft_run -from lib.py import ksft_eq, ksft_raises, KsftSkipEx +from lib.py import ksft_eq, ksft_raises, KsftSkipEx, KsftFailEx from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import NetDrvEnv from lib.py import cmd, defer, ip import errno import glob - +import os +import socket +import struct +import subprocess def sys_get_queues(ifname, qtype='rx') -> int: folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*') @@ -21,6 +24,39 @@ def nl_get_queues(cfg, nl, qtype='rx'): return len([q for q in queues if q['type'] == qtype]) return None +def check_xdp(cfg, nl, xdp_queue_id=0) -> None: + test_dir = os.path.dirname(os.path.realpath(__file__)) + xdp = subprocess.Popen([f"{test_dir}/xdp_helper", f"{cfg.ifindex}", f"{xdp_queue_id}"], + stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=1, + text=True) + defer(xdp.kill) + + stdout, stderr = xdp.communicate(timeout=10) + rx = tx = False + + if xdp.returncode == 255: + raise KsftSkipEx('AF_XDP unsupported') + elif xdp.returncode > 0: + raise KsftFailEx('unable to create AF_XDP socket') + + queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True) + if not queues: + raise KsftSkipEx("Netlink reports no queues") + + for q in queues: + if q['id'] == 0: + if q['type'] == 'rx': + rx = True + if q['type'] == 'tx': + tx = True + + ksft_eq(q['xsk'], {}) + else: + if 'xsk' in q: + _fail("Check failed: xsk attribute set.") + + ksft_eq(rx, True) + ksft_eq(tx, True) def get_queues(cfg, nl) -> None: snl = NetdevFamily(recv_size=4096) @@ -81,7 +117,7 @@ def check_down(cfg, nl) -> None: def main() -> None: with NetDrvEnv(__file__, queue_count=100) as cfg: - ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily())) + ksft_run([get_queues, addremove_queues, check_down, check_xdp], args=(cfg, NetdevFamily())) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/drivers/net/xdp_helper.c new file mode 100644 index 000000000000..cf06a88b830b --- /dev/null +++ b/tools/testing/selftests/drivers/net/xdp_helper.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define UMEM_SZ (1U << 16) +#define NUM_DESC (UMEM_SZ / 2048) + +/* this is a simple helper program that creates an XDP socket and does the + * minimum necessary to get bind() to succeed. + * + * this test program is not intended to actually process packets, but could be + * extended in the future if that is actually needed. + * + * it is used by queues.py to ensure the xsk netlinux attribute is set + * correctly. + */ +int main(int argc, char **argv) +{ + struct xdp_umem_reg umem_reg = { 0 }; + struct sockaddr_xdp sxdp = { 0 }; + int num_desc = NUM_DESC; + void *umem_area; + int ifindex; + int sock_fd; + int queue; + char byte; + + if (argc != 3) { + fprintf(stderr, "Usage: %s ifindex queue_id", argv[0]); + return 1; + } + + sock_fd = socket(AF_XDP, SOCK_RAW, 0); + if (sock_fd < 0) { + perror("socket creation failed"); + /* if the kernel doesn't support AF_XDP, let the test program + * know with -1. All other error paths return 1. + */ + if (errno == EAFNOSUPPORT) + return -1; + return 1; + } + + ifindex = atoi(argv[1]); + queue = atoi(argv[2]); + + umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (umem_area == MAP_FAILED) { + perror("mmap failed"); + return 1; + } + + umem_reg.addr = (uintptr_t)umem_area; + umem_reg.len = UMEM_SZ; + umem_reg.chunk_size = 2048; + umem_reg.headroom = 0; + + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg, + sizeof(umem_reg)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc)); + + sxdp.sxdp_family = AF_XDP; + sxdp.sxdp_ifindex = ifindex; + sxdp.sxdp_queue_id = queue; + sxdp.sxdp_flags = 0; + + if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) { + munmap(umem_area, UMEM_SZ); + perror("bind failed"); + close(sock_fd); + return 1; + } + + /* give the parent program some data when the socket is ready*/ + fprintf(stdout, "%d\n", sock_fd); + + /* parent program will write a byte to stdin when its ready for this + * helper to exit + */ + read(STDIN_FILENO, &byte, 1); + + close(sock_fd); + return 0; +}