Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this organization
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
mariux64
/
linux
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
0
Code
Issues
2
Pull requests
0
Actions
Projects
0
Wiki
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Actions
Projects
Wiki
Security
Insights
Files
2e8806f
Documentation
LICENSES
arch
block
certs
crypto
drivers
fs
include
init
ipc
kernel
lib
mm
net
samples
auxdisplay
binderfs
bpf
.gitignore
Makefile
Makefile.target
README.rst
asm_goto_workaround.h
bpf_insn.h
bpf_load.c
bpf_load.h
cookie_uid_helper_example.c
cpustat_kern.c
cpustat_user.c
do_hbm_test.sh
fds_example.c
hash_func01.h
hbm.c
hbm.h
hbm_edt_kern.c
hbm_kern.h
hbm_out_kern.c
ibumad_kern.c
ibumad_user.c
lathist_kern.c
lathist_user.c
lwt_len_hist.sh
lwt_len_hist_kern.c
lwt_len_hist_user.c
map_perf_test_kern.c
map_perf_test_user.c
offwaketime_kern.c
offwaketime_user.c
parse_ldabs.c
parse_simple.c
parse_varlen.c
run_cookie_uid_helper_example.sh
sampleip_kern.c
sampleip_user.c
sock_example.c
sock_example.h
sock_flags_kern.c
sockex1_kern.c
sockex1_user.c
sockex2_kern.c
sockex2_user.c
sockex3_kern.c
sockex3_user.c
spintest_kern.c
spintest_user.c
syscall_nrs.c
syscall_tp_kern.c
syscall_tp_user.c
task_fd_query_kern.c
task_fd_query_user.c
tc_l2_redirect.sh
tc_l2_redirect_kern.c
tc_l2_redirect_user.c
tcbpf1_kern.c
tcp_basertt_kern.c
tcp_bpf.readme
tcp_bufs_kern.c
tcp_clamp_kern.c
tcp_cong_kern.c
tcp_dumpstats_kern.c
tcp_iw_kern.c
tcp_rwnd_kern.c
tcp_synrto_kern.c
tcp_tos_reflect_kern.c
test_cgrp2_array_pin.c
test_cgrp2_attach.c
test_cgrp2_sock.c
test_cgrp2_sock.sh
test_cgrp2_sock2.c
test_cgrp2_sock2.sh
test_cgrp2_tc.sh
test_cgrp2_tc_kern.c
test_cls_bpf.sh
test_current_task_under_cgroup_kern.c
test_current_task_under_cgroup_user.c
test_ipip.sh
test_lru_dist.c
test_lwt_bpf.c
test_lwt_bpf.sh
test_map_in_map_kern.c
test_map_in_map_user.c
test_overhead_kprobe_kern.c
test_overhead_raw_tp_kern.c
test_overhead_tp_kern.c
test_overhead_user.c
test_override_return.sh
test_probe_write_user_kern.c
test_probe_write_user_user.c
trace_common.h
trace_event_kern.c
trace_event_user.c
trace_output_kern.c
trace_output_user.c
tracex1_kern.c
tracex1_user.c
tracex2_kern.c
tracex2_user.c
tracex3_kern.c
tracex3_user.c
tracex4_kern.c
tracex4_user.c
tracex5_kern.c
tracex5_user.c
tracex6_kern.c
tracex6_user.c
tracex7_kern.c
tracex7_user.c
xdp1_kern.c
xdp1_user.c
xdp2_kern.c
xdp2skb_meta.sh
xdp2skb_meta_kern.c
xdp_adjust_tail_kern.c
xdp_adjust_tail_user.c
xdp_fwd_kern.c
xdp_fwd_user.c
xdp_monitor_kern.c
xdp_monitor_user.c
xdp_redirect_cpu_kern.c
xdp_redirect_cpu_user.c
xdp_redirect_kern.c
xdp_redirect_map_kern.c
xdp_redirect_map_user.c
xdp_redirect_user.c
xdp_router_ipv4_kern.c
xdp_router_ipv4_user.c
xdp_rxq_info_kern.c
xdp_rxq_info_user.c
xdp_sample_pkts_kern.c
xdp_sample_pkts_user.c
xdp_tx_iptunnel_common.h
xdp_tx_iptunnel_kern.c
xdp_tx_iptunnel_user.c
xdpsock.h
xdpsock_kern.c
xdpsock_user.c
xsk_fwd.c
configfs
connector
ftrace
hidraw
hw_breakpoint
kdb
kfifo
kobject
kprobes
livepatch
mei
mic
pidfd
pktgen
qmi
rpmsg
seccomp
timers
trace_events
trace_printk
uhid
v4l
vfio-mdev
vfs
watch_queue
watchdog
Kconfig
Makefile
scripts
security
sound
tools
usr
virt
.clang-format
.cocciconfig
.get_maintainer.ignore
.gitattributes
.gitignore
.mailmap
COPYING
CREDITS
Kbuild
Kconfig
MAINTAINERS
Makefile
README
Breadcrumbs
linux
/
samples
/
bpf
/
xdpsock_user.c
Blame
Blame
Latest commit
History
History
1334 lines (1142 loc) · 32.4 KB
Breadcrumbs
linux
/
samples
/
bpf
/
xdpsock_user.c
Top
File metadata and controls
Code
Blame
1334 lines (1142 loc) · 32.4 KB
Raw
// SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2017 - 2018 Intel Corporation. */ #include <asm/barrier.h> #include <errno.h> #include <getopt.h> #include <libgen.h> #include <linux/bpf.h> #include <linux/compiler.h> #include <linux/if_link.h> #include <linux/if_xdp.h> #include <linux/if_ether.h> #include <linux/ip.h> #include <linux/udp.h> #include <arpa/inet.h> #include <locale.h> #include <net/ethernet.h> #include <net/if.h> #include <poll.h> #include <pthread.h> #include <signal.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/mman.h> #include <sys/resource.h> #include <sys/socket.h> #include <sys/types.h> #include <time.h> #include <unistd.h> #include <bpf/libbpf.h> #include <bpf/xsk.h> #include <bpf/bpf.h> #include "xdpsock.h" #ifndef SOL_XDP #define SOL_XDP 283 #endif #ifndef AF_XDP #define AF_XDP 44 #endif #ifndef PF_XDP #define PF_XDP AF_XDP #endif #define NUM_FRAMES (4 * 1024) #define MIN_PKT_SIZE 64 #define DEBUG_HEXDUMP 0 typedef __u64 u64; typedef __u32 u32; typedef __u16 u16; typedef __u8 u8; static unsigned long prev_time; enum benchmark_type { BENCH_RXDROP = 0, BENCH_TXONLY = 1, BENCH_L2FWD = 2, }; static enum benchmark_type opt_bench = BENCH_RXDROP; static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static const char *opt_if = ""; static int opt_ifindex; static int opt_queue; static unsigned long opt_duration; static unsigned long start_time; static bool benchmark_done; static u32 opt_batch_size = 64; static int opt_pkt_count; static u16 opt_pkt_size = MIN_PKT_SIZE; static u32 opt_pkt_fill_pattern = 0x12345678; static bool opt_extra_stats; static bool opt_quiet; static int opt_poll; static int opt_interval = 1; static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; static u32 opt_umem_flags; static int opt_unaligned_chunks; static int opt_mmap_flags; static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; static int opt_timeout = 1000; static bool opt_need_wakeup = true; static u32 opt_num_xsks = 1; static u32 prog_id; struct xsk_ring_stats { unsigned long rx_npkts; unsigned long tx_npkts; unsigned long rx_dropped_npkts; unsigned long rx_invalid_npkts; unsigned long tx_invalid_npkts; unsigned long rx_full_npkts; unsigned long rx_fill_empty_npkts; unsigned long tx_empty_npkts; unsigned long prev_rx_npkts; unsigned long prev_tx_npkts; unsigned long prev_rx_dropped_npkts; unsigned long prev_rx_invalid_npkts; unsigned long prev_tx_invalid_npkts; unsigned long prev_rx_full_npkts; unsigned long prev_rx_fill_empty_npkts; unsigned long prev_tx_empty_npkts; }; struct xsk_umem_info { struct xsk_ring_prod fq; struct xsk_ring_cons cq; struct xsk_umem *umem; void *buffer; }; struct xsk_socket_info { struct xsk_ring_cons rx; struct xsk_ring_prod tx; struct xsk_umem_info *umem; struct xsk_socket *xsk; struct xsk_ring_stats ring_stats; u32 outstanding_tx; }; static int num_socks; struct xsk_socket_info *xsks[MAX_SOCKS]; static unsigned long get_nsecs(void) { struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); return ts.tv_sec * 1000000000UL + ts.tv_nsec; } static void print_benchmark(bool running) { const char *bench_str = "INVALID"; if (opt_bench == BENCH_RXDROP) bench_str = "rxdrop"; else if (opt_bench == BENCH_TXONLY) bench_str = "txonly"; else if (opt_bench == BENCH_L2FWD) bench_str = "l2fwd"; printf("%s:%d %s ", opt_if, opt_queue, bench_str); if (opt_xdp_flags & XDP_FLAGS_SKB_MODE) printf("xdp-skb "); else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE) printf("xdp-drv "); else printf(" "); if (opt_poll) printf("poll() "); if (running) { printf("running..."); fflush(stdout); } } static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk) { struct xdp_statistics stats; socklen_t optlen; int err; optlen = sizeof(stats); err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); if (err) return err; if (optlen == sizeof(struct xdp_statistics)) { xsk->ring_stats.rx_dropped_npkts = stats.rx_dropped; xsk->ring_stats.rx_invalid_npkts = stats.rx_invalid_descs; xsk->ring_stats.tx_invalid_npkts = stats.tx_invalid_descs; xsk->ring_stats.rx_full_npkts = stats.rx_ring_full; xsk->ring_stats.rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs; xsk->ring_stats.tx_empty_npkts = stats.tx_ring_empty_descs; return 0; } return -EINVAL; } static void dump_stats(void) { unsigned long now = get_nsecs(); long dt = now - prev_time; int i; prev_time = now; for (i = 0; i < num_socks && xsks[i]; i++) { char *fmt = "%-15s %'-11.0f %'-11lu\n"; double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps, tx_invalid_pps, tx_empty_pps; rx_pps = (xsks[i]->ring_stats.rx_npkts - xsks[i]->ring_stats.prev_rx_npkts) * 1000000000. / dt; tx_pps = (xsks[i]->ring_stats.tx_npkts - xsks[i]->ring_stats.prev_tx_npkts) * 1000000000. / dt; printf("\n sock%d@", i); print_benchmark(false); printf("\n"); printf("%-15s %-11s %-11s %-11.2f\n", "", "pps", "pkts", dt / 1000000000.); printf(fmt, "rx", rx_pps, xsks[i]->ring_stats.rx_npkts); printf(fmt, "tx", tx_pps, xsks[i]->ring_stats.tx_npkts); xsks[i]->ring_stats.prev_rx_npkts = xsks[i]->ring_stats.rx_npkts; xsks[i]->ring_stats.prev_tx_npkts = xsks[i]->ring_stats.tx_npkts; if (opt_extra_stats) { if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) { dropped_pps = (xsks[i]->ring_stats.rx_dropped_npkts - xsks[i]->ring_stats.prev_rx_dropped_npkts) * 1000000000. / dt; rx_invalid_pps = (xsks[i]->ring_stats.rx_invalid_npkts - xsks[i]->ring_stats.prev_rx_invalid_npkts) * 1000000000. / dt; tx_invalid_pps = (xsks[i]->ring_stats.tx_invalid_npkts - xsks[i]->ring_stats.prev_tx_invalid_npkts) * 1000000000. / dt; full_pps = (xsks[i]->ring_stats.rx_full_npkts - xsks[i]->ring_stats.prev_rx_full_npkts) * 1000000000. / dt; fill_empty_pps = (xsks[i]->ring_stats.rx_fill_empty_npkts - xsks[i]->ring_stats.prev_rx_fill_empty_npkts) * 1000000000. / dt; tx_empty_pps = (xsks[i]->ring_stats.tx_empty_npkts - xsks[i]->ring_stats.prev_tx_empty_npkts) * 1000000000. / dt; printf(fmt, "rx dropped", dropped_pps, xsks[i]->ring_stats.rx_dropped_npkts); printf(fmt, "rx invalid", rx_invalid_pps, xsks[i]->ring_stats.rx_invalid_npkts); printf(fmt, "tx invalid", tx_invalid_pps, xsks[i]->ring_stats.tx_invalid_npkts); printf(fmt, "rx queue full", full_pps, xsks[i]->ring_stats.rx_full_npkts); printf(fmt, "fill ring empty", fill_empty_pps, xsks[i]->ring_stats.rx_fill_empty_npkts); printf(fmt, "tx ring empty", tx_empty_pps, xsks[i]->ring_stats.tx_empty_npkts); xsks[i]->ring_stats.prev_rx_dropped_npkts = xsks[i]->ring_stats.rx_dropped_npkts; xsks[i]->ring_stats.prev_rx_invalid_npkts = xsks[i]->ring_stats.rx_invalid_npkts; xsks[i]->ring_stats.prev_tx_invalid_npkts = xsks[i]->ring_stats.tx_invalid_npkts; xsks[i]->ring_stats.prev_rx_full_npkts = xsks[i]->ring_stats.rx_full_npkts; xsks[i]->ring_stats.prev_rx_fill_empty_npkts = xsks[i]->ring_stats.rx_fill_empty_npkts; xsks[i]->ring_stats.prev_tx_empty_npkts = xsks[i]->ring_stats.tx_empty_npkts; } else { printf("%-15s\n", "Error retrieving extra stats"); } } } } static bool is_benchmark_done(void) { if (opt_duration > 0) { unsigned long dt = (get_nsecs() - start_time); if (dt >= opt_duration) benchmark_done = true; } return benchmark_done; } static void *poller(void *arg) { (void)arg; while (!is_benchmark_done()) { sleep(opt_interval); dump_stats(); } return NULL; } static void remove_xdp_program(void) { u32 curr_prog_id = 0; if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { printf("bpf_get_link_xdp_id failed\n"); exit(EXIT_FAILURE); } if (prog_id == curr_prog_id) bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); else if (!curr_prog_id) printf("couldn't find a prog id on a given interface\n"); else printf("program on interface changed, not removing\n"); } static void int_exit(int sig) { benchmark_done = true; } static void xdpsock_cleanup(void) { struct xsk_umem *umem = xsks[0]->umem->umem; int i; dump_stats(); for (i = 0; i < num_socks; i++) xsk_socket__delete(xsks[i]->xsk); (void)xsk_umem__delete(umem); remove_xdp_program(); } static void __exit_with_error(int error, const char *file, const char *func, int line) { fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func, line, error, strerror(error)); dump_stats(); remove_xdp_program(); exit(EXIT_FAILURE); } #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \ __LINE__) static void swap_mac_addresses(void *data) { struct ether_header *eth = (struct ether_header *)data; struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost; struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost; struct ether_addr tmp; tmp = *src_addr; *src_addr = *dst_addr; *dst_addr = tmp; } static void hex_dump(void *pkt, size_t length, u64 addr) { const unsigned char *address = (unsigned char *)pkt; const unsigned char *line = address; size_t line_size = 32; unsigned char c; char buf[32]; int i = 0; if (!DEBUG_HEXDUMP) return; sprintf(buf, "addr=%llu", addr); printf("length = %zu\n", length); printf("%s | ", buf); while (length-- > 0) { printf("%02X ", *address++); if (!(++i % line_size) || (length == 0 && i % line_size)) { if (length == 0) { while (i++ % line_size) printf("__ "); } printf(" | "); /* right close */ while (line < address) { c = *line++; printf("%c", (c < 33 || c == 255) ? 0x2E : c); } printf("\n"); if (length > 0) printf("%s | ", buf); } } printf("\n"); } static void *memset32_htonl(void *dest, u32 val, u32 size) { u32 *ptr = (u32 *)dest; int i; val = htonl(val); for (i = 0; i < (size & (~0x3)); i += 4) ptr[i >> 2] = val; for (; i < size; i++) ((char *)dest)[i] = ((char *)&val)[i & 3]; return dest; } /* * This function code has been taken from * Linux kernel lib/checksum.c */ static inline unsigned short from32to16(unsigned int x) { /* add up 16-bit and 16-bit for 16+c bit */ x = (x & 0xffff) + (x >> 16); /* add up carry.. */ x = (x & 0xffff) + (x >> 16); return x; } /* * This function code has been taken from * Linux kernel lib/checksum.c */ static unsigned int do_csum(const unsigned char *buff, int len) { unsigned int result = 0; int odd; if (len <= 0) goto out; odd = 1 & (unsigned long)buff; if (odd) { #ifdef __LITTLE_ENDIAN result += (*buff << 8); #else result = *buff; #endif len--; buff++; } if (len >= 2) { if (2 & (unsigned long)buff) { result += *(unsigned short *)buff; len -= 2; buff += 2; } if (len >= 4) { const unsigned char *end = buff + ((unsigned int)len & ~3); unsigned int carry = 0; do { unsigned int w = *(unsigned int *)buff; buff += 4; result += carry; result += w; carry = (w > result); } while (buff < end); result += carry; result = (result & 0xffff) + (result >> 16); } if (len & 2) { result += *(unsigned short *)buff; buff += 2; } } if (len & 1) #ifdef __LITTLE_ENDIAN result += *buff; #else result += (*buff << 8); #endif result = from32to16(result); if (odd) result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); out: return result; } __sum16 ip_fast_csum(const void *iph, unsigned int ihl); /* * This is a version of ip_compute_csum() optimized for IP headers, * which always checksum on 4 octet boundaries. * This function code has been taken from * Linux kernel lib/checksum.c */ __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { return (__force __sum16)~do_csum(iph, ihl * 4); } /* * Fold a partial checksum * This function code has been taken from * Linux kernel include/asm-generic/checksum.h */ static inline __sum16 csum_fold(__wsum csum) { u32 sum = (__force u32)csum; sum = (sum & 0xffff) + (sum >> 16); sum = (sum & 0xffff) + (sum >> 16); return (__force __sum16)~sum; } /* * This function code has been taken from * Linux kernel lib/checksum.c */ static inline u32 from64to32(u64 x) { /* add up 32-bit and 32-bit for 32+c bit */ x = (x & 0xffffffff) + (x >> 32); /* add up carry.. */ x = (x & 0xffffffff) + (x >> 32); return (u32)x; } __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum); /* * This function code has been taken from * Linux kernel lib/checksum.c */ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { unsigned long long s = (__force u32)sum; s += (__force u32)saddr; s += (__force u32)daddr; #ifdef __BIG_ENDIAN__ s += proto + len; #else s += (proto + len) << 8; #endif return (__force __wsum)from64to32(s); } /* * This function has been taken from * Linux kernel include/asm-generic/checksum.h */ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) { u32 csum = 0; u32 cnt = 0; /* udp hdr and data */ for (; cnt < len; cnt += 2) csum += udp_pkt[cnt >> 1]; return csum_tcpudp_magic(saddr, daddr, len, proto, csum); } #define ETH_FCS_SIZE 4 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ sizeof(struct udphdr)) #define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE) #define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; static void gen_eth_hdr_data(void) { struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr)); struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + sizeof(struct ethhdr)); struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; /* ethernet header */ memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); eth_hdr->h_proto = htons(ETH_P_IP); /* IP header */ ip_hdr->version = IPVERSION; ip_hdr->ihl = 0x5; /* 20 byte header */ ip_hdr->tos = 0x0; ip_hdr->tot_len = htons(IP_PKT_SIZE); ip_hdr->id = 0; ip_hdr->frag_off = 0; ip_hdr->ttl = IPDEFTTL; ip_hdr->protocol = IPPROTO_UDP; ip_hdr->saddr = htonl(0x0a0a0a10); ip_hdr->daddr = htonl(0x0a0a0a20); /* IP header checksum */ ip_hdr->check = 0; ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl); /* UDP header */ udp_hdr->source = htons(0x1000); udp_hdr->dest = htons(0x1000); udp_hdr->len = htons(UDP_PKT_SIZE); /* UDP data */ memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern, UDP_PKT_DATA_SIZE); /* UDP header checksum */ udp_hdr->check = 0; udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr); } static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr) { memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE); } static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) { struct xsk_umem_info *umem; struct xsk_umem_config cfg = { /* We recommend that you set the fill ring size >= HW RX ring size + * AF_XDP RX ring size. Make sure you fill up the fill ring * with buffers at regular intervals, and you will with this setting * avoid allocation failures in the driver. These are usually quite * expensive since drivers have not been written to assume that * allocation failures are common. For regular sockets, kernel * allocated memory is used that only runs out in OOM situations * that should be rare. */ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, .frame_size = opt_xsk_frame_size, .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, .flags = opt_umem_flags }; int ret; umem = calloc(1, sizeof(*umem)); if (!umem) exit_with_error(errno); ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq, &cfg); if (ret) exit_with_error(-ret); umem->buffer = buffer; return umem; } static void xsk_populate_fill_ring(struct xsk_umem_info *umem) { int ret, i; u32 idx; ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, &idx); if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS * 2) exit_with_error(-ret); for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; i++) *xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * opt_xsk_frame_size; xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS * 2); } static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, bool rx, bool tx) { struct xsk_socket_config cfg; struct xsk_socket_info *xsk; struct xsk_ring_cons *rxr; struct xsk_ring_prod *txr; int ret; xsk = calloc(1, sizeof(*xsk)); if (!xsk) exit_with_error(errno); xsk->umem = umem; cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; if (opt_num_xsks > 1) cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; else cfg.libbpf_flags = 0; cfg.xdp_flags = opt_xdp_flags; cfg.bind_flags = opt_xdp_bind_flags; rxr = rx ? &xsk->rx : NULL; txr = tx ? &xsk->tx : NULL; ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem, rxr, txr, &cfg); if (ret) exit_with_error(-ret); ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags); if (ret) exit_with_error(-ret); return xsk; } static struct option long_options[] = { {"rxdrop", no_argument, 0, 'r'}, {"txonly", no_argument, 0, 't'}, {"l2fwd", no_argument, 0, 'l'}, {"interface", required_argument, 0, 'i'}, {"queue", required_argument, 0, 'q'}, {"poll", no_argument, 0, 'p'}, {"xdp-skb", no_argument, 0, 'S'}, {"xdp-native", no_argument, 0, 'N'}, {"interval", required_argument, 0, 'n'}, {"zero-copy", no_argument, 0, 'z'}, {"copy", no_argument, 0, 'c'}, {"frame-size", required_argument, 0, 'f'}, {"no-need-wakeup", no_argument, 0, 'm'}, {"unaligned", no_argument, 0, 'u'}, {"shared-umem", no_argument, 0, 'M'}, {"force", no_argument, 0, 'F'}, {"duration", required_argument, 0, 'd'}, {"batch-size", required_argument, 0, 'b'}, {"tx-pkt-count", required_argument, 0, 'C'}, {"tx-pkt-size", required_argument, 0, 's'}, {"tx-pkt-pattern", required_argument, 0, 'P'}, {"extra-stats", no_argument, 0, 'x'}, {"quiet", no_argument, 0, 'Q'}, {0, 0, 0, 0} }; static void usage(const char *prog) { const char *str = " Usage: %s [OPTIONS]\n" " Options:\n" " -r, --rxdrop Discard all incoming packets (default)\n" " -t, --txonly Only send packets\n" " -l, --l2fwd MAC swap L2 forwarding\n" " -i, --interface=n Run on interface n\n" " -q, --queue=n Use queue n (default 0)\n" " -p, --poll Use poll syscall\n" " -S, --xdp-skb=n Use XDP skb-mod\n" " -N, --xdp-native=n Enforce XDP native mode\n" " -n, --interval=n Specify statistics update interval (default 1 sec).\n" " -z, --zero-copy Force zero-copy mode.\n" " -c, --copy Force copy mode.\n" " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n" " -f, --frame-size=n Set the frame size (must be a power of two in aligned mode, default is %d).\n" " -u, --unaligned Enable unaligned chunk placement\n" " -M, --shared-umem Enable XDP_SHARED_UMEM\n" " -F, --force Force loading the XDP prog\n" " -d, --duration=n Duration in secs to run command.\n" " Default: forever.\n" " -b, --batch-size=n Batch size for sending or receiving\n" " packets. Default: %d\n" " -C, --tx-pkt-count=n Number of packets to send.\n" " Default: Continuous packets.\n" " -s, --tx-pkt-size=n Transmit packet size.\n" " (Default: %d bytes)\n" " Min size: %d, Max size %d.\n" " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n" " -x, --extra-stats Display extra statistics.\n" " -Q, --quiet Do not display any stats.\n" "\n"; fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern); exit(EXIT_FAILURE); } static void parse_command_line(int argc, char **argv) { int option_index, c; opterr = 0; for (;;) { c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQ", long_options, &option_index); if (c == -1) break; switch (c) { case 'r': opt_bench = BENCH_RXDROP; break; case 't': opt_bench = BENCH_TXONLY; break; case 'l': opt_bench = BENCH_L2FWD; break; case 'i': opt_if = optarg; break; case 'q': opt_queue = atoi(optarg); break; case 'p': opt_poll = 1; break; case 'S': opt_xdp_flags |= XDP_FLAGS_SKB_MODE; opt_xdp_bind_flags |= XDP_COPY; break; case 'N': /* default, set below */ break; case 'n': opt_interval = atoi(optarg); break; case 'z': opt_xdp_bind_flags |= XDP_ZEROCOPY; break; case 'c': opt_xdp_bind_flags |= XDP_COPY; break; case 'u': opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; opt_unaligned_chunks = 1; opt_mmap_flags = MAP_HUGETLB; break; case 'F': opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; break; case 'f': opt_xsk_frame_size = atoi(optarg); break; case 'm': opt_need_wakeup = false; opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP; break; case 'M': opt_num_xsks = MAX_SOCKS; break; case 'd': opt_duration = atoi(optarg); opt_duration *= 1000000000; break; case 'b': opt_batch_size = atoi(optarg); break; case 'C': opt_pkt_count = atoi(optarg); break; case 's': opt_pkt_size = atoi(optarg); if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) || opt_pkt_size < MIN_PKT_SIZE) { fprintf(stderr, "ERROR: Invalid frame size %d\n", opt_pkt_size); usage(basename(argv[0])); } break; case 'P': opt_pkt_fill_pattern = strtol(optarg, NULL, 16); break; case 'x': opt_extra_stats = 1; break; case 'Q': opt_quiet = 1; break; default: usage(basename(argv[0])); } } if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE)) opt_xdp_flags |= XDP_FLAGS_DRV_MODE; opt_ifindex = if_nametoindex(opt_if); if (!opt_ifindex) { fprintf(stderr, "ERROR: interface \"%s\" does not exist\n", opt_if); usage(basename(argv[0])); } if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) && !opt_unaligned_chunks) { fprintf(stderr, "--frame-size=%d is not a power of two\n", opt_xsk_frame_size); usage(basename(argv[0])); } } static void kick_tx(struct xsk_socket_info *xsk) { int ret; ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) return; exit_with_error(errno); } static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) { struct xsk_umem_info *umem = xsk->umem; u32 idx_cq = 0, idx_fq = 0; unsigned int rcvd; size_t ndescs; if (!xsk->outstanding_tx) return; /* In copy mode, Tx is driven by a syscall so we need to use e.g. sendto() to * really send the packets. In zero-copy mode we do not have to do this, since Tx * is driven by the NAPI loop. So as an optimization, we do not have to call * sendto() all the time in zero-copy mode for l2fwd. */ if (opt_xdp_bind_flags & XDP_COPY) kick_tx(xsk); ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size : xsk->outstanding_tx; /* re-add completed Tx buffers */ rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq); if (rcvd > 0) { unsigned int i; int ret; ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); if (xsk_ring_prod__needs_wakeup(&umem->fq)) ret = poll(fds, num_socks, opt_timeout); ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); } for (i = 0; i < rcvd; i++) *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = *xsk_ring_cons__comp_addr(&umem->cq, idx_cq++); xsk_ring_prod__submit(&xsk->umem->fq, rcvd); xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; xsk->ring_stats.tx_npkts += rcvd; } } static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size) { unsigned int rcvd; u32 idx; if (!xsk->outstanding_tx) return; if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) kick_tx(xsk); rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); if (rcvd > 0) { xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; xsk->ring_stats.tx_npkts += rcvd; } } static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds) { unsigned int rcvd, i; u32 idx_rx = 0, idx_fq = 0; int ret; rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); if (!rcvd) { if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) ret = poll(fds, num_socks, opt_timeout); return; } ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) ret = poll(fds, num_socks, opt_timeout); ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); } for (i = 0; i < rcvd; i++) { u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; u64 orig = xsk_umem__extract_addr(addr); addr = xsk_umem__add_offset_to_addr(addr); char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); hex_dump(pkt, len, addr); *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig; } xsk_ring_prod__submit(&xsk->umem->fq, rcvd); xsk_ring_cons__release(&xsk->rx, rcvd); xsk->ring_stats.rx_npkts += rcvd; } static void rx_drop_all(void) { struct pollfd fds[MAX_SOCKS] = {}; int i, ret; for (i = 0; i < num_socks; i++) { fds[i].fd = xsk_socket__fd(xsks[i]->xsk); fds[i].events = POLLIN; } for (;;) { if (opt_poll) { ret = poll(fds, num_socks, opt_timeout); if (ret <= 0) continue; } for (i = 0; i < num_socks; i++) rx_drop(xsks[i], fds); if (benchmark_done) break; } } static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) { u32 idx; unsigned int i; while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size) { complete_tx_only(xsk, batch_size); } for (i = 0; i < batch_size; i++) { struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); tx_desc->addr = (*frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; tx_desc->len = PKT_SIZE; } xsk_ring_prod__submit(&xsk->tx, batch_size); xsk->outstanding_tx += batch_size; *frame_nb += batch_size; *frame_nb %= NUM_FRAMES; complete_tx_only(xsk, batch_size); } static inline int get_batch_size(int pkt_cnt) { if (!opt_pkt_count) return opt_batch_size; if (pkt_cnt + opt_batch_size <= opt_pkt_count) return opt_batch_size; return opt_pkt_count - pkt_cnt; } static void complete_tx_only_all(void) { bool pending; int i; do { pending = false; for (i = 0; i < num_socks; i++) { if (xsks[i]->outstanding_tx) { complete_tx_only(xsks[i], opt_batch_size); pending = !!xsks[i]->outstanding_tx; } } } while (pending); } static void tx_only_all(void) { struct pollfd fds[MAX_SOCKS] = {}; u32 frame_nb[MAX_SOCKS] = {}; int pkt_cnt = 0; int i, ret; for (i = 0; i < num_socks; i++) { fds[0].fd = xsk_socket__fd(xsks[i]->xsk); fds[0].events = POLLOUT; } while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { int batch_size = get_batch_size(pkt_cnt); if (opt_poll) { ret = poll(fds, num_socks, opt_timeout); if (ret <= 0) continue; if (!(fds[0].revents & POLLOUT)) continue; } for (i = 0; i < num_socks; i++) tx_only(xsks[i], &frame_nb[i], batch_size); pkt_cnt += batch_size; if (benchmark_done) break; } if (opt_pkt_count) complete_tx_only_all(); } static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) { unsigned int rcvd, i; u32 idx_rx = 0, idx_tx = 0; int ret; complete_tx_l2fwd(xsk, fds); rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); if (!rcvd) { if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) ret = poll(fds, num_socks, opt_timeout); return; } ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); while (ret != rcvd) { if (ret < 0) exit_with_error(-ret); complete_tx_l2fwd(xsk, fds); if (xsk_ring_prod__needs_wakeup(&xsk->tx)) kick_tx(xsk); ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); } for (i = 0; i < rcvd; i++) { u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; u64 orig = addr; addr = xsk_umem__add_offset_to_addr(addr); char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); swap_mac_addresses(pkt); hex_dump(pkt, len, addr); xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig; xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len; } xsk_ring_prod__submit(&xsk->tx, rcvd); xsk_ring_cons__release(&xsk->rx, rcvd); xsk->ring_stats.rx_npkts += rcvd; xsk->outstanding_tx += rcvd; } static void l2fwd_all(void) { struct pollfd fds[MAX_SOCKS] = {}; int i, ret; for (i = 0; i < num_socks; i++) { fds[i].fd = xsk_socket__fd(xsks[i]->xsk); fds[i].events = POLLOUT | POLLIN; } for (;;) { if (opt_poll) { ret = poll(fds, num_socks, opt_timeout); if (ret <= 0) continue; } for (i = 0; i < num_socks; i++) l2fwd(xsks[i], fds); if (benchmark_done) break; } } static void load_xdp_program(char **argv, struct bpf_object **obj) { struct bpf_prog_load_attr prog_load_attr = { .prog_type = BPF_PROG_TYPE_XDP, }; char xdp_filename[256]; int prog_fd; snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]); prog_load_attr.file = xdp_filename; if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd)) exit(EXIT_FAILURE); if (prog_fd < 0) { fprintf(stderr, "ERROR: no program found: %s\n", strerror(prog_fd)); exit(EXIT_FAILURE); } if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) { fprintf(stderr, "ERROR: link set xdp fd failed\n"); exit(EXIT_FAILURE); } } static void enter_xsks_into_map(struct bpf_object *obj) { struct bpf_map *map; int i, xsks_map; map = bpf_object__find_map_by_name(obj, "xsks_map"); xsks_map = bpf_map__fd(map); if (xsks_map < 0) { fprintf(stderr, "ERROR: no xsks map found: %s\n", strerror(xsks_map)); exit(EXIT_FAILURE); } for (i = 0; i < num_socks; i++) { int fd = xsk_socket__fd(xsks[i]->xsk); int key, ret; key = i; ret = bpf_map_update_elem(xsks_map, &key, &fd, 0); if (ret) { fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i); exit(EXIT_FAILURE); } } } int main(int argc, char **argv) { struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; bool rx = false, tx = false; struct xsk_umem_info *umem; struct bpf_object *obj; pthread_t pt; int i, ret; void *bufs; parse_command_line(argc, argv); if (setrlimit(RLIMIT_MEMLOCK, &r)) { fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", strerror(errno)); exit(EXIT_FAILURE); } if (opt_num_xsks > 1) load_xdp_program(argv, &obj); /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */ bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0); if (bufs == MAP_FAILED) { printf("ERROR: mmap failed\n"); exit(EXIT_FAILURE); } /* Create sockets... */ umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size); if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) { rx = true; xsk_populate_fill_ring(umem); } if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY) tx = true; for (i = 0; i < opt_num_xsks; i++) xsks[num_socks++] = xsk_configure_socket(umem, rx, tx); if (opt_bench == BENCH_TXONLY) { gen_eth_hdr_data(); for (i = 0; i < NUM_FRAMES; i++) gen_eth_frame(umem, i * opt_xsk_frame_size); } if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY) enter_xsks_into_map(obj); signal(SIGINT, int_exit); signal(SIGTERM, int_exit); signal(SIGABRT, int_exit); setlocale(LC_ALL, ""); if (!opt_quiet) { ret = pthread_create(&pt, NULL, poller, NULL); if (ret) exit_with_error(ret); } prev_time = get_nsecs(); start_time = prev_time; if (opt_bench == BENCH_RXDROP) rx_drop_all(); else if (opt_bench == BENCH_TXONLY) tx_only_all(); else l2fwd_all(); benchmark_done = true; if (!opt_quiet) pthread_join(pt, NULL); xdpsock_cleanup(); return 0; }
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
You can’t perform that action at this time.