Skip to content

Commit

Permalink
Merge branch 'netdev-genl-add-an-xsk-attribute-to-queues'
Browse files Browse the repository at this point in the history
Joe Damato says:

====================
netdev-genl: Add an xsk attribute to queues

This is an attempt to followup on something Jakub asked me about [1],
adding an xsk attribute to queues and more clearly documenting which
queues are linked to NAPIs...

After the RFC [2], Jakub suggested creating an empty nest for queues
which have a pool, so I've adjusted this version to work that way.

The nest can be extended in the future to express attributes about XSK
as needed. Queues which are not used for AF_XDP do not have the xsk
attribute present.

I've run the included test on:
  - my mlx5 machine (via NETIF=)
  - without setting NETIF

And the test seems to pass in both cases.

[1]: https://lore.kernel.org/netdev/20250113143109.60afa59a@kernel.org/
[2]: https://lore.kernel.org/netdev/20250129172431.65773-1-jdamato@fastly.com/
====================

Link: https://patch.msgid.link/20250214211255.14194-1-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Feb 18, 2025
2 parents c935af4 + 788e52e commit 24fc595
Showing 10 changed files with 194 additions and 4 deletions.
13 changes: 12 additions & 1 deletion Documentation/netlink/specs/netdev.yaml
Original file line number Diff line number Diff line change
@@ -276,6 +276,9 @@ attribute-sets:
doc: The timeout, in nanoseconds, of how long to suspend irq
processing, if event polling finds events
type: uint
-
name: xsk-info
attributes: []
-
name: queue
attributes:
@@ -294,6 +297,9 @@ attribute-sets:
-
name: type
doc: Queue type as rx, tx. Each queue type defines a separate ID space.
XDP TX queues allocated in the kernel are not linked to NAPIs and
thus not listed. AF_XDP queues will have more information set in
the xsk attribute.
type: u32
enum: queue-type
-
@@ -309,7 +315,11 @@ attribute-sets:
doc: io_uring memory provider information.
type: nest
nested-attributes: io-uring-provider-info

-
name: xsk
doc: XSK information for this queue, if any.
type: nest
nested-attributes: xsk-info
-
name: qstats
doc: |
@@ -652,6 +662,7 @@ operations:
- ifindex
- dmabuf
- io-uring
- xsk
dump:
request:
attributes:
15 changes: 15 additions & 0 deletions include/net/netlink.h
Original file line number Diff line number Diff line change
@@ -118,6 +118,7 @@
* nla_nest_start(skb, type) start a nested attribute
* nla_nest_end(skb, nla) finalize a nested attribute
* nla_nest_cancel(skb, nla) cancel nested attribute construction
* nla_put_empty_nest(skb, type) create an empty nest
*
* Attribute Length Calculations:
* nla_attr_size(payload) length of attribute w/o padding
@@ -2240,6 +2241,20 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
nlmsg_trim(skb, start);
}

/**
* nla_put_empty_nest - Create an empty nest
* @skb: socket buffer the message is stored in
* @attrtype: attribute type of the container
*
* This function is a helper for creating empty nests.
*
* Returns: 0 when successful or -EMSGSIZE on failure.
*/
static inline int nla_put_empty_nest(struct sk_buff *skb, int attrtype)
{
return nla_nest_start(skb, attrtype) ? 0 : -EMSGSIZE;
}

/**
* __nla_validate_nested - Validate a stream of nested attributes
* @start: container attribute
6 changes: 6 additions & 0 deletions include/uapi/linux/netdev.h
Original file line number Diff line number Diff line change
@@ -136,13 +136,19 @@ enum {
NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
};

enum {
__NETDEV_A_XSK_INFO_MAX,
NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1)
};

enum {
NETDEV_A_QUEUE_ID = 1,
NETDEV_A_QUEUE_IFINDEX,
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
NETDEV_A_QUEUE_DMABUF,
NETDEV_A_QUEUE_IO_URING,
NETDEV_A_QUEUE_XSK,

__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
12 changes: 12 additions & 0 deletions net/core/netdev-genl.c
Original file line number Diff line number Diff line change
@@ -400,11 +400,23 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
if (params->mp_ops &&
params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
goto nla_put_failure;
#ifdef CONFIG_XDP_SOCKETS
if (rxq->pool)
if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
goto nla_put_failure;
#endif

break;
case NETDEV_QUEUE_TYPE_TX:
txq = netdev_get_tx_queue(netdev, q_idx);
if (nla_put_napi_id(rsp, txq->napi))
goto nla_put_failure;
#ifdef CONFIG_XDP_SOCKETS
if (txq->pool)
if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
goto nla_put_failure;
#endif
break;
}

genlmsg_end(rsp, hdr);
6 changes: 6 additions & 0 deletions tools/include/uapi/linux/netdev.h
Original file line number Diff line number Diff line change
@@ -136,13 +136,19 @@ enum {
NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
};

enum {
__NETDEV_A_XSK_INFO_MAX,
NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1)
};

enum {
NETDEV_A_QUEUE_ID = 1,
NETDEV_A_QUEUE_IFINDEX,
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
NETDEV_A_QUEUE_DMABUF,
NETDEV_A_QUEUE_IO_URING,
NETDEV_A_QUEUE_XSK,

__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
2 changes: 2 additions & 0 deletions tools/testing/selftests/drivers/net/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
xdp_helper
3 changes: 3 additions & 0 deletions tools/testing/selftests/drivers/net/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += $(KHDR_INCLUDES)

TEST_INCLUDES := $(wildcard lib/py/*.py) \
$(wildcard lib/sh/*.sh) \
../../net/net_helper.sh \
../../net/lib.sh \

TEST_GEN_FILES := xdp_helper

TEST_PROGS := \
netcons_basic.sh \
netcons_fragmented_msg.sh \
1 change: 1 addition & 0 deletions tools/testing/selftests/drivers/net/config
Original file line number Diff line number Diff line change
@@ -4,3 +4,4 @@ CONFIG_CONFIGFS_FS=y
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
CONFIG_NETCONSOLE_EXTENDED_LOG=y
CONFIG_XDP_SOCKETS=y
42 changes: 39 additions & 3 deletions tools/testing/selftests/drivers/net/queues.py
Original file line number Diff line number Diff line change
@@ -2,13 +2,16 @@
# SPDX-License-Identifier: GPL-2.0

from lib.py import ksft_disruptive, ksft_exit, ksft_run
from lib.py import ksft_eq, ksft_raises, KsftSkipEx
from lib.py import ksft_eq, ksft_raises, KsftSkipEx, KsftFailEx
from lib.py import EthtoolFamily, NetdevFamily, NlError
from lib.py import NetDrvEnv
from lib.py import cmd, defer, ip
import errno
import glob

import os
import socket
import struct
import subprocess

def sys_get_queues(ifname, qtype='rx') -> int:
folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*')
@@ -21,6 +24,39 @@ def nl_get_queues(cfg, nl, qtype='rx'):
return len([q for q in queues if q['type'] == qtype])
return None

def check_xdp(cfg, nl, xdp_queue_id=0) -> None:
test_dir = os.path.dirname(os.path.realpath(__file__))
xdp = subprocess.Popen([f"{test_dir}/xdp_helper", f"{cfg.ifindex}", f"{xdp_queue_id}"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=1,
text=True)
defer(xdp.kill)

stdout, stderr = xdp.communicate(timeout=10)
rx = tx = False

if xdp.returncode == 255:
raise KsftSkipEx('AF_XDP unsupported')
elif xdp.returncode > 0:
raise KsftFailEx('unable to create AF_XDP socket')

queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
if not queues:
raise KsftSkipEx("Netlink reports no queues")

for q in queues:
if q['id'] == 0:
if q['type'] == 'rx':
rx = True
if q['type'] == 'tx':
tx = True

ksft_eq(q['xsk'], {})
else:
if 'xsk' in q:
_fail("Check failed: xsk attribute set.")

ksft_eq(rx, True)
ksft_eq(tx, True)

def get_queues(cfg, nl) -> None:
snl = NetdevFamily(recv_size=4096)
@@ -81,7 +117,7 @@ def check_down(cfg, nl) -> None:

def main() -> None:
with NetDrvEnv(__file__, queue_count=100) as cfg:
ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily()))
ksft_run([get_queues, addremove_queues, check_down, check_xdp], args=(cfg, NetdevFamily()))
ksft_exit()


98 changes: 98 additions & 0 deletions tools/testing/selftests/drivers/net/xdp_helper.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <linux/if_xdp.h>
#include <linux/if_link.h>
#include <net/if.h>
#include <inttypes.h>

#define UMEM_SZ (1U << 16)
#define NUM_DESC (UMEM_SZ / 2048)

/* this is a simple helper program that creates an XDP socket and does the
* minimum necessary to get bind() to succeed.
*
* this test program is not intended to actually process packets, but could be
* extended in the future if that is actually needed.
*
* it is used by queues.py to ensure the xsk netlinux attribute is set
* correctly.
*/
int main(int argc, char **argv)
{
struct xdp_umem_reg umem_reg = { 0 };
struct sockaddr_xdp sxdp = { 0 };
int num_desc = NUM_DESC;
void *umem_area;
int ifindex;
int sock_fd;
int queue;
char byte;

if (argc != 3) {
fprintf(stderr, "Usage: %s ifindex queue_id", argv[0]);
return 1;
}

sock_fd = socket(AF_XDP, SOCK_RAW, 0);
if (sock_fd < 0) {
perror("socket creation failed");
/* if the kernel doesn't support AF_XDP, let the test program
* know with -1. All other error paths return 1.
*/
if (errno == EAFNOSUPPORT)
return -1;
return 1;
}

ifindex = atoi(argv[1]);
queue = atoi(argv[2]);

umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE |
MAP_ANONYMOUS, -1, 0);
if (umem_area == MAP_FAILED) {
perror("mmap failed");
return 1;
}

umem_reg.addr = (uintptr_t)umem_area;
umem_reg.len = UMEM_SZ;
umem_reg.chunk_size = 2048;
umem_reg.headroom = 0;

setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg,
sizeof(umem_reg));
setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc,
sizeof(num_desc));
setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc,
sizeof(num_desc));
setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc));

sxdp.sxdp_family = AF_XDP;
sxdp.sxdp_ifindex = ifindex;
sxdp.sxdp_queue_id = queue;
sxdp.sxdp_flags = 0;

if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) {
munmap(umem_area, UMEM_SZ);
perror("bind failed");
close(sock_fd);
return 1;
}

/* give the parent program some data when the socket is ready*/
fprintf(stdout, "%d\n", sock_fd);

/* parent program will write a byte to stdin when its ready for this
* helper to exit
*/
read(STDIN_FILENO, &byte, 1);

close(sock_fd);
return 0;
}

0 comments on commit 24fc595

Please sign in to comment.