Skip to content

Commit

Permalink
Merge branch 'netdev-genl-add-an-xsk-attribute-to-queues'
Browse files Browse the repository at this point in the history
Joe Damato says:

====================
netdev-genl: Add an xsk attribute to queues

This is an attempt to followup on something Jakub asked me about [1],
adding an xsk attribute to queues and more clearly documenting which
queues are linked to NAPIs...

After the RFC [2], Jakub suggested creating an empty nest for queues
which have a pool, so I've adjusted this version to work that way.

The nest can be extended in the future to express attributes about XSK
as needed. Queues which are not used for AF_XDP do not have the xsk
attribute present.

I've run the included test on:
  - my mlx5 machine (via NETIF=)
  - without setting NETIF

And the test seems to pass in both cases.

[1]: https://lore.kernel.org/netdev/20250113143109.60afa59a@kernel.org/
[2]: https://lore.kernel.org/netdev/20250129172431.65773-1-jdamato@fastly.com/
====================

Link: https://patch.msgid.link/20250214211255.14194-1-jdamato@fastly.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Feb 18, 2025
2 parents c935af4 + 788e52e commit 24fc595
Show file tree
Hide file tree
Showing 10 changed files with 194 additions and 4 deletions.
13 changes: 12 additions & 1 deletion Documentation/netlink/specs/netdev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,9 @@ attribute-sets:
doc: The timeout, in nanoseconds, of how long to suspend irq
processing, if event polling finds events
type: uint
-
name: xsk-info
attributes: []
-
name: queue
attributes:
Expand All @@ -294,6 +297,9 @@ attribute-sets:
-
name: type
doc: Queue type as rx, tx. Each queue type defines a separate ID space.
XDP TX queues allocated in the kernel are not linked to NAPIs and
thus not listed. AF_XDP queues will have more information set in
the xsk attribute.
type: u32
enum: queue-type
-
Expand All @@ -309,7 +315,11 @@ attribute-sets:
doc: io_uring memory provider information.
type: nest
nested-attributes: io-uring-provider-info

-
name: xsk
doc: XSK information for this queue, if any.
type: nest
nested-attributes: xsk-info
-
name: qstats
doc: |
Expand Down Expand Up @@ -652,6 +662,7 @@ operations:
- ifindex
- dmabuf
- io-uring
- xsk
dump:
request:
attributes:
Expand Down
15 changes: 15 additions & 0 deletions include/net/netlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@
* nla_nest_start(skb, type) start a nested attribute
* nla_nest_end(skb, nla) finalize a nested attribute
* nla_nest_cancel(skb, nla) cancel nested attribute construction
* nla_put_empty_nest(skb, type) create an empty nest
*
* Attribute Length Calculations:
* nla_attr_size(payload) length of attribute w/o padding
Expand Down Expand Up @@ -2240,6 +2241,20 @@ static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
nlmsg_trim(skb, start);
}

/**
* nla_put_empty_nest - Create an empty nest
* @skb: socket buffer the message is stored in
* @attrtype: attribute type of the container
*
* This function is a helper for creating empty nests.
*
* Returns: 0 when successful or -EMSGSIZE on failure.
*/
static inline int nla_put_empty_nest(struct sk_buff *skb, int attrtype)
{
return nla_nest_start(skb, attrtype) ? 0 : -EMSGSIZE;
}

/**
* __nla_validate_nested - Validate a stream of nested attributes
* @start: container attribute
Expand Down
6 changes: 6 additions & 0 deletions include/uapi/linux/netdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,19 @@ enum {
NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
};

enum {
__NETDEV_A_XSK_INFO_MAX,
NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1)
};

enum {
NETDEV_A_QUEUE_ID = 1,
NETDEV_A_QUEUE_IFINDEX,
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
NETDEV_A_QUEUE_DMABUF,
NETDEV_A_QUEUE_IO_URING,
NETDEV_A_QUEUE_XSK,

__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
Expand Down
12 changes: 12 additions & 0 deletions net/core/netdev-genl.c
Original file line number Diff line number Diff line change
Expand Up @@ -400,11 +400,23 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev,
if (params->mp_ops &&
params->mp_ops->nl_fill(params->mp_priv, rsp, rxq))
goto nla_put_failure;
#ifdef CONFIG_XDP_SOCKETS
if (rxq->pool)
if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
goto nla_put_failure;
#endif

break;
case NETDEV_QUEUE_TYPE_TX:
txq = netdev_get_tx_queue(netdev, q_idx);
if (nla_put_napi_id(rsp, txq->napi))
goto nla_put_failure;
#ifdef CONFIG_XDP_SOCKETS
if (txq->pool)
if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK))
goto nla_put_failure;
#endif
break;
}

genlmsg_end(rsp, hdr);
Expand Down
6 changes: 6 additions & 0 deletions tools/include/uapi/linux/netdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,19 @@ enum {
NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1)
};

enum {
__NETDEV_A_XSK_INFO_MAX,
NETDEV_A_XSK_INFO_MAX = (__NETDEV_A_XSK_INFO_MAX - 1)
};

enum {
NETDEV_A_QUEUE_ID = 1,
NETDEV_A_QUEUE_IFINDEX,
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
NETDEV_A_QUEUE_DMABUF,
NETDEV_A_QUEUE_IO_URING,
NETDEV_A_QUEUE_XSK,

__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
Expand Down
2 changes: 2 additions & 0 deletions tools/testing/selftests/drivers/net/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
xdp_helper
3 changes: 3 additions & 0 deletions tools/testing/selftests/drivers/net/Makefile
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += $(KHDR_INCLUDES)

TEST_INCLUDES := $(wildcard lib/py/*.py) \
$(wildcard lib/sh/*.sh) \
../../net/net_helper.sh \
../../net/lib.sh \

TEST_GEN_FILES := xdp_helper

TEST_PROGS := \
netcons_basic.sh \
netcons_fragmented_msg.sh \
Expand Down
1 change: 1 addition & 0 deletions tools/testing/selftests/drivers/net/config
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ CONFIG_CONFIGFS_FS=y
CONFIG_NETCONSOLE=m
CONFIG_NETCONSOLE_DYNAMIC=y
CONFIG_NETCONSOLE_EXTENDED_LOG=y
CONFIG_XDP_SOCKETS=y
42 changes: 39 additions & 3 deletions tools/testing/selftests/drivers/net/queues.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@
# SPDX-License-Identifier: GPL-2.0

from lib.py import ksft_disruptive, ksft_exit, ksft_run
from lib.py import ksft_eq, ksft_raises, KsftSkipEx
from lib.py import ksft_eq, ksft_raises, KsftSkipEx, KsftFailEx
from lib.py import EthtoolFamily, NetdevFamily, NlError
from lib.py import NetDrvEnv
from lib.py import cmd, defer, ip
import errno
import glob

import os
import socket
import struct
import subprocess

def sys_get_queues(ifname, qtype='rx') -> int:
folders = glob.glob(f'/sys/class/net/{ifname}/queues/{qtype}-*')
Expand All @@ -21,6 +24,39 @@ def nl_get_queues(cfg, nl, qtype='rx'):
return len([q for q in queues if q['type'] == qtype])
return None

def check_xdp(cfg, nl, xdp_queue_id=0) -> None:
test_dir = os.path.dirname(os.path.realpath(__file__))
xdp = subprocess.Popen([f"{test_dir}/xdp_helper", f"{cfg.ifindex}", f"{xdp_queue_id}"],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, bufsize=1,
text=True)
defer(xdp.kill)

stdout, stderr = xdp.communicate(timeout=10)
rx = tx = False

if xdp.returncode == 255:
raise KsftSkipEx('AF_XDP unsupported')
elif xdp.returncode > 0:
raise KsftFailEx('unable to create AF_XDP socket')

queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
if not queues:
raise KsftSkipEx("Netlink reports no queues")

for q in queues:
if q['id'] == 0:
if q['type'] == 'rx':
rx = True
if q['type'] == 'tx':
tx = True

ksft_eq(q['xsk'], {})
else:
if 'xsk' in q:
_fail("Check failed: xsk attribute set.")

ksft_eq(rx, True)
ksft_eq(tx, True)

def get_queues(cfg, nl) -> None:
snl = NetdevFamily(recv_size=4096)
Expand Down Expand Up @@ -81,7 +117,7 @@ def check_down(cfg, nl) -> None:

def main() -> None:
with NetDrvEnv(__file__, queue_count=100) as cfg:
ksft_run([get_queues, addremove_queues, check_down], args=(cfg, NetdevFamily()))
ksft_run([get_queues, addremove_queues, check_down, check_xdp], args=(cfg, NetdevFamily()))
ksft_exit()


Expand Down
98 changes: 98 additions & 0 deletions tools/testing/selftests/drivers/net/xdp_helper.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/socket.h>
#include <linux/if_xdp.h>
#include <linux/if_link.h>
#include <net/if.h>
#include <inttypes.h>

#define UMEM_SZ (1U << 16)
#define NUM_DESC (UMEM_SZ / 2048)

/* this is a simple helper program that creates an XDP socket and does the
* minimum necessary to get bind() to succeed.
*
* this test program is not intended to actually process packets, but could be
* extended in the future if that is actually needed.
*
* it is used by queues.py to ensure the xsk netlinux attribute is set
* correctly.
*/
int main(int argc, char **argv)
{
struct xdp_umem_reg umem_reg = { 0 };
struct sockaddr_xdp sxdp = { 0 };
int num_desc = NUM_DESC;
void *umem_area;
int ifindex;
int sock_fd;
int queue;
char byte;

if (argc != 3) {
fprintf(stderr, "Usage: %s ifindex queue_id", argv[0]);
return 1;
}

sock_fd = socket(AF_XDP, SOCK_RAW, 0);
if (sock_fd < 0) {
perror("socket creation failed");
/* if the kernel doesn't support AF_XDP, let the test program
* know with -1. All other error paths return 1.
*/
if (errno == EAFNOSUPPORT)
return -1;
return 1;
}

ifindex = atoi(argv[1]);
queue = atoi(argv[2]);

umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE |
MAP_ANONYMOUS, -1, 0);
if (umem_area == MAP_FAILED) {
perror("mmap failed");
return 1;
}

umem_reg.addr = (uintptr_t)umem_area;
umem_reg.len = UMEM_SZ;
umem_reg.chunk_size = 2048;
umem_reg.headroom = 0;

setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg,
sizeof(umem_reg));
setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc,
sizeof(num_desc));
setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc,
sizeof(num_desc));
setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc));

sxdp.sxdp_family = AF_XDP;
sxdp.sxdp_ifindex = ifindex;
sxdp.sxdp_queue_id = queue;
sxdp.sxdp_flags = 0;

if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) {
munmap(umem_area, UMEM_SZ);
perror("bind failed");
close(sock_fd);
return 1;
}

/* give the parent program some data when the socket is ready*/
fprintf(stdout, "%d\n", sock_fd);

/* parent program will write a byte to stdin when its ready for this
* helper to exit
*/
read(STDIN_FILENO, &byte, 1);

close(sock_fd);
return 0;
}

0 comments on commit 24fc595

Please sign in to comment.