From b61a28cf11d61f512172e673b8f8c4a6c789b425 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Wed, 28 Jul 2021 18:47:41 +0200 Subject: [PATCH 01/31] bpf: Fix off-by-one in tail call count limiting Before, the interpreter allowed up to MAX_TAIL_CALL_CNT + 1 tail calls. Now precisely MAX_TAIL_CALL_CNT is allowed, which is in line with the behavior of the x86 JITs. Signed-off-by: Johan Almbladh Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210728164741.350370-1-johan.almbladh@anyfinetworks.com --- kernel/bpf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b1a5fc04492bd..fe807b203a6fd 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1562,7 +1562,7 @@ static u64 ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn) if (unlikely(index >= array->map.max_entries)) goto out; - if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT)) + if (unlikely(tail_call_cnt >= MAX_TAIL_CALL_CNT)) goto out; tail_call_cnt++; From 83f31535565c63ac4f62c7b8592210929a630d3d Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Sat, 31 Jul 2021 12:50:38 -0700 Subject: [PATCH 02/31] bpf, unix: Check socket type in unix_bpf_update_proto() As of now, only AF_UNIX datagram socket supports sockmap. But unix_proto is shared for all kinds of AF_UNIX sockets, so we have to check the socket type in unix_bpf_update_proto() to explicitly reject other types, otherwise they could be added into sockmap, too. Fixes: c63829182c37 ("af_unix: Implement ->psock_update_sk_prot()") Reported-by: Jakub Sitnicki Signed-off-by: Cong Wang Signed-off-by: Daniel Borkmann Acked-by: Jakub Sitnicki Link: https://lore.kernel.org/bpf/20210731195038.8084-1-xiyou.wangcong@gmail.com --- net/unix/unix_bpf.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index 177e883f451e8..20f53575b5c9f 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -105,6 +105,9 @@ static void unix_bpf_check_needs_rebuild(struct proto *ops) int unix_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore) { + if (sk->sk_type != SOCK_DGRAM) + return -EOPNOTSUPP; + if (restore) { sk->sk_write_space = psock->saved_write_space; WRITE_ONCE(sk->sk_prot, psock->sk_proto); From 6d4eb36d65979ad48f4b05c3309e0c74f04e5ac6 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 4 Aug 2021 08:37:50 -0700 Subject: [PATCH 03/31] bpf: Fix bpf_prog_test_run_xdp logic after incorrect merge resolution During recent net into net-next merge ([0]) a piece of old logic ([1]) got reintroduced accidentally while resolving merge conflict between bpf's [2] and bpf-next's [3]. This check was removed in bpf-next tree to allow extra ctx_in parameter passed for XDP test runs. Reinstating the check breaks bpf_prog_test_run_xdp logic and causes a corresponding xdp_context_test_run selftest failure. Fix by removing the check and allow ctx_in for XDP test runs. [0] 5af84df962dd ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") [1] 947e8b595b82 ("bpf: explicitly prohibit ctx_{in, out} in non-skb BPF_PROG_TEST_RUN") [2] 5e21bb4e8125 ("bpf, test: fix NULL pointer dereference on invalid expected_attach_type") [3] 47316f4a3053 ("bpf: Support input xdp_md context in BPF_PROG_TEST_RUN") Fixes: 5af84df962dd ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Daniel Borkmann --- net/bpf/test_run.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 695449088e42e..335e1d8c17f8a 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -763,8 +763,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, if (prog->expected_attach_type == BPF_XDP_DEVMAP || prog->expected_attach_type == BPF_XDP_CPUMAP) return -EINVAL; - if (kattr->test.ctx_in || kattr->test.ctx_out) - return -EINVAL; + ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md)); if (IS_ERR(ctx)) return PTR_ERR(ctx); From 34ad6d9d8c27293e1895b448af7d6cf5d351ad8d Mon Sep 17 00:00:00 2001 From: Matthew Cover Date: Fri, 30 Jul 2021 17:56:32 -0700 Subject: [PATCH 04/31] bpf, samples: Add missing mprog-disable to xdp_redirect_cpu's optstring Commit ce4dade7f12a ("samples/bpf: xdp_redirect_cpu: Load a eBPF program on cpumap") added the following option, but missed adding it to optstring: - mprog-disable: disable loading XDP program on cpumap entries Fix it and add the missing option character. Fixes: ce4dade7f12a ("samples/bpf: xdp_redirect_cpu: Load a eBPF program on cpumap") Signed-off-by: Matthew Cover Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210731005632.13228-1-matthew.cover@stackpath.com --- samples/bpf/xdp_redirect_cpu_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index d3ecdc18b9c1f..9e225c96b77ec 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -841,7 +841,7 @@ int main(int argc, char **argv) memset(cpu, 0, n_cpus * sizeof(int)); /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:", + while ((opt = getopt_long(argc, argv, "hSd:s:p:q:c:xzFf:e:r:m:n", long_options, &longindex)) != -1) { switch (opt) { case 'd': From 372642ea83ff1c71a5d567a704c912359eb59776 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Wed, 4 Aug 2021 13:55:24 -0700 Subject: [PATCH 05/31] selftests/bpf: Move netcnt test under test_progs Rewrite to skel and ASSERT macros as well while we are at it. v3: - replace -f with -A to make it work with busybox ping. -A is available on both busybox and iputils, from the man page: On networks with low RTT this mode is essentially equivalent to flood mode. v2: - don't check result of bpf_map__fd (Yonghong Song) - remove from .gitignore (Andrii Nakryiko) - move ping_command into network_helpers (Andrii Nakryiko) - remove assert() (Andrii Nakryiko) Signed-off-by: Stanislav Fomichev Signed-off-by: Andrii Nakryiko Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210804205524.3748709-1-sdf@google.com --- tools/testing/selftests/bpf/.gitignore | 1 - tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/network_helpers.c | 12 ++ tools/testing/selftests/bpf/network_helpers.h | 1 + .../testing/selftests/bpf/prog_tests/netcnt.c | 82 ++++++++++ .../selftests/bpf/prog_tests/tc_redirect.c | 12 -- tools/testing/selftests/bpf/test_netcnt.c | 148 ------------------ 7 files changed, 96 insertions(+), 163 deletions(-) create mode 100644 tools/testing/selftests/bpf/prog_tests/netcnt.c delete mode 100644 tools/testing/selftests/bpf/test_netcnt.c diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index addcfd8b615eb..433f8bef261e2 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -23,7 +23,6 @@ test_skb_cgroup_id_user test_cgroup_storage test_flow_dissector flow_dissector_load -test_netcnt test_tcpnotify_user test_libbpf test_tcp_check_syncookie_user diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f405b20c1e6c5..2a58b7b5aea48 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -38,7 +38,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_verifier_log test_dev_cgroup \ test_sock test_sockmap get_cgroup_id_user \ test_cgroup_storage \ - test_netcnt test_tcpnotify_user test_sysctl \ + test_tcpnotify_user test_sysctl \ test_progs-no_alu32 # Also test bpf-gcc, if present @@ -197,7 +197,6 @@ $(OUTPUT)/test_sockmap: cgroup_helpers.c $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c -$(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 26468a8f44f37..d6857683397fb 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -310,3 +310,15 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, } return -1; } + +char *ping_command(int family) +{ + if (family == AF_INET6) { + /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */ + if (!system("which ping6 >/dev/null 2>&1")) + return "ping6"; + else + return "ping -6"; + } + return "ping"; +} diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index d60bc2897770e..c59a8f6d770b7 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -46,5 +46,6 @@ int fastopen_connect(int server_fd, const char *data, unsigned int data_len, int timeout_ms); int make_sockaddr(int family, const char *addr_str, __u16 port, struct sockaddr_storage *addr, socklen_t *len); +char *ping_command(int family); #endif diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c new file mode 100644 index 0000000000000..6ede48bde91b7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "network_helpers.h" +#include "netcnt_prog.skel.h" +#include "netcnt_common.h" + +#define CG_NAME "/netcnt" + +void test_netcnt(void) +{ + union percpu_net_cnt *percpu_netcnt = NULL; + struct bpf_cgroup_storage_key key; + int map_fd, percpu_map_fd; + struct netcnt_prog *skel; + unsigned long packets; + union net_cnt netcnt; + unsigned long bytes; + int cpu, nproc; + int cg_fd = -1; + char cmd[128]; + + skel = netcnt_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load")) + return; + + nproc = get_nprocs_conf(); + percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc); + if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)")) + goto err; + + cg_fd = test__join_cgroup(CG_NAME); + if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup")) + goto err; + + skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd); + if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt, + "attach_cgroup(bpf_nextcnt)")) + goto err; + + snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6)); + ASSERT_OK(system(cmd), cmd); + + map_fd = bpf_map__fd(skel->maps.netcnt); + if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key")) + goto err; + + if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)")) + goto err; + + percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt); + if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]), + "bpf_map_lookup_elem(percpu_netcnt)")) + goto err; + + /* Some packets can be still in per-cpu cache, but not more than + * MAX_PERCPU_PACKETS. + */ + packets = netcnt.packets; + bytes = netcnt.bytes; + for (cpu = 0; cpu < nproc; cpu++) { + ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS"); + + packets += percpu_netcnt[cpu].packets; + bytes += percpu_netcnt[cpu].bytes; + } + + /* No packets should be lost */ + ASSERT_EQ(packets, 10000, "packets"); + + /* Let's check that bytes counter matches the number of packets + * multiplied by the size of ipv6 ICMP packet. + */ + ASSERT_EQ(bytes, packets * 104, "bytes"); + +err: + if (cg_fd != -1) + close(cg_fd); + free(percpu_netcnt); + netcnt_prog__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 932e4ee3f97c1..e7201ba29ccd6 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -390,18 +390,6 @@ static void test_tcp(int family, const char *addr, __u16 port) close(client_fd); } -static char *ping_command(int family) -{ - if (family == AF_INET6) { - /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */ - if (!system("which ping6 >/dev/null 2>&1")) - return "ping6"; - else - return "ping -6"; - } - return "ping"; -} - static int test_ping(int family, const char *addr) { SYS("ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr); diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c deleted file mode 100644 index 4990a99e73818..0000000000000 --- a/tools/testing/selftests/bpf/test_netcnt.c +++ /dev/null @@ -1,148 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "cgroup_helpers.h" -#include "bpf_rlimit.h" -#include "netcnt_common.h" - -#define BPF_PROG "./netcnt_prog.o" -#define TEST_CGROUP "/test-network-counters/" - -static int bpf_find_map(const char *test, struct bpf_object *obj, - const char *name) -{ - struct bpf_map *map; - - map = bpf_object__find_map_by_name(obj, name); - if (!map) { - printf("%s:FAIL:map '%s' not found\n", test, name); - return -1; - } - return bpf_map__fd(map); -} - -int main(int argc, char **argv) -{ - union percpu_net_cnt *percpu_netcnt; - struct bpf_cgroup_storage_key key; - int map_fd, percpu_map_fd; - int error = EXIT_FAILURE; - struct bpf_object *obj; - int prog_fd, cgroup_fd; - unsigned long packets; - union net_cnt netcnt; - unsigned long bytes; - int cpu, nproc; - __u32 prog_cnt; - - nproc = get_nprocs_conf(); - percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc); - if (!percpu_netcnt) { - printf("Not enough memory for per-cpu area (%d cpus)\n", nproc); - goto err; - } - - if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB, - &obj, &prog_fd)) { - printf("Failed to load bpf program\n"); - goto out; - } - - cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); - if (cgroup_fd < 0) - goto err; - - /* Attach bpf program */ - if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) { - printf("Failed to attach bpf program"); - goto err; - } - - if (system("which ping6 &>/dev/null") == 0) - assert(!system("ping6 ::1 -c 10000 -f -q > /dev/null")); - else - assert(!system("ping -6 ::1 -c 10000 -f -q > /dev/null")); - - if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL, - &prog_cnt)) { - printf("Failed to query attached programs"); - goto err; - } - - map_fd = bpf_find_map(__func__, obj, "netcnt"); - if (map_fd < 0) { - printf("Failed to find bpf map with net counters"); - goto err; - } - - percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt"); - if (percpu_map_fd < 0) { - printf("Failed to find bpf map with percpu net counters"); - goto err; - } - - if (bpf_map_get_next_key(map_fd, NULL, &key)) { - printf("Failed to get key in cgroup storage\n"); - goto err; - } - - if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) { - printf("Failed to lookup cgroup storage\n"); - goto err; - } - - if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) { - printf("Failed to lookup percpu cgroup storage\n"); - goto err; - } - - /* Some packets can be still in per-cpu cache, but not more than - * MAX_PERCPU_PACKETS. - */ - packets = netcnt.packets; - bytes = netcnt.bytes; - for (cpu = 0; cpu < nproc; cpu++) { - if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) { - printf("Unexpected percpu value: %llu\n", - percpu_netcnt[cpu].packets); - goto err; - } - - packets += percpu_netcnt[cpu].packets; - bytes += percpu_netcnt[cpu].bytes; - } - - /* No packets should be lost */ - if (packets != 10000) { - printf("Unexpected packet count: %lu\n", packets); - goto err; - } - - /* Let's check that bytes counter matches the number of packets - * multiplied by the size of ipv6 ICMP packet. - */ - if (bytes != packets * 104) { - printf("Unexpected bytes count: %lu\n", bytes); - goto err; - } - - error = 0; - printf("test_netcnt:PASS\n"); - -err: - cleanup_cgroup_environment(); - free(percpu_netcnt); - -out: - return error; -} From 277b134057036df8c657079ca92c3e5e7d10aeaf Mon Sep 17 00:00:00 2001 From: Jose Blanquicet Date: Thu, 5 Aug 2021 18:40:36 +0200 Subject: [PATCH 06/31] selftests/bpf: Fix bpf-iter-tcp4 test to print correctly the dest IP Currently, this test is incorrectly printing the destination port in place of the destination IP. Fixes: 2767c97765cb ("selftests/bpf: Implement sample tcp/tcp6 bpf_iter programs") Signed-off-by: Jose Blanquicet Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210805164044.527903-1-josebl@microsoft.com --- tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c index 2e4775c354149..92267abb462fc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -121,7 +121,7 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp, } BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ", - seq_num, src, srcp, destp, destp); + seq_num, src, srcp, dest, destp); BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ", state, tp->write_seq - tp->snd_una, rx_queue, From 579345e7f2190c1ee97f44154526dcd458ea790d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 5 Aug 2021 16:07:34 -0700 Subject: [PATCH 07/31] selftests/bpf: Rename reference_tracking BPF programs BPF programs for reference_tracking selftest use "fail_" prefix to notify that they are expected to fail. This is really confusing and inconvenient when trying to grep through test_progs output to find *actually* failed tests. So rename the prefix from "fail_" to "err_". Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210805230734.437914-1-andrii@kernel.org --- .../selftests/bpf/prog_tests/reference_tracking.c | 4 ++-- .../selftests/bpf/progs/test_sk_lookup_kern.c | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index de26881666962..4e91f4d6466c6 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -34,8 +34,8 @@ void test_reference_tracking(void) if (!test__start_subtest(title)) continue; - /* Expect verifier failure if test name has 'fail' */ - if (strstr(title, "fail") != NULL) { + /* Expect verifier failure if test name has 'err' */ + if (strstr(title, "err_") != NULL) { libbpf_print_fn_t old_print_fn; old_print_fn = libbpf_set_print(NULL); diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index e83d0b48d80ca..8249075f088f2 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -91,7 +91,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_use_after_free") +SEC("classifier/err_use_after_free") int bpf_sk_lookup_uaf(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -106,7 +106,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb) return family; } -SEC("classifier/fail_modify_sk_pointer") +SEC("classifier/err_modify_sk_pointer") int bpf_sk_lookup_modptr(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -121,7 +121,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_modify_sk_or_null_pointer") +SEC("classifier/err_modify_sk_or_null_pointer") int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -135,7 +135,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_no_release") +SEC("classifier/err_no_release") int bpf_sk_lookup_test2(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -144,7 +144,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_release_twice") +SEC("classifier/err_release_twice") int bpf_sk_lookup_test3(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -156,7 +156,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb) return 0; } -SEC("classifier/fail_release_unchecked") +SEC("classifier/err_release_unchecked") int bpf_sk_lookup_test4(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -173,7 +173,7 @@ void lookup_no_release(struct __sk_buff *skb) bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } -SEC("classifier/fail_no_release_subcall") +SEC("classifier/err_no_release_subcall") int bpf_sk_lookup_test5(struct __sk_buff *skb) { lookup_no_release(skb); From 29f24c43cbe09b83162776a370848d5a782dc3b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Fri, 6 Aug 2021 14:28:54 +0200 Subject: [PATCH 08/31] samples/bpf: xdpsock: Make the sample more useful outside the tree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The xdpsock sample application is a useful base for experiment's around AF_XDP sockets. Compiling the sample outside of the kernel tree is made harder then it has to be as the sample includes two headers and that are not installed by 'make install_header' nor are usually part of distributions kernel headers. The first header asm/barrier.h is not used and can just be dropped. The second linux/compiler.h are only needed for the decorator __force and are only used in ip_fast_csum(), csum_fold() and csum_tcpudp_nofold(). These functions are copied verbatim from include/asm-generic/checksum.h and lib/checksum.c. While it's fine to copy and use these functions in the sample application the decorator brings no value and can be dropped together with the include. With this change it's trivial to compile the xdpsock sample outside the kernel tree from xdpsock_user.c and xdpsock.h. $ gcc -o xdpsock xdpsock_user.c -lbpf -lpthread Signed-off-by: Niklas Söderlund Signed-off-by: Simon Horman Signed-off-by: Andrii Nakryiko Reviewed-by: Louis Peens Link: https://lore.kernel.org/bpf/20210806122855.26115-2-simon.horman@corigine.com --- samples/bpf/xdpsock_user.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 33d0bdebbed81..7c56a7a784e13 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1,12 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2017 - 2018 Intel Corporation. */ -#include #include #include #include #include -#include #include #include #include @@ -663,7 +661,7 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl); */ __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { - return (__force __sum16)~do_csum(iph, ihl * 4); + return (__sum16)~do_csum(iph, ihl * 4); } /* @@ -673,11 +671,11 @@ __sum16 ip_fast_csum(const void *iph, unsigned int ihl) */ static inline __sum16 csum_fold(__wsum csum) { - u32 sum = (__force u32)csum; + u32 sum = (u32)csum; sum = (sum & 0xffff) + (sum >> 16); sum = (sum & 0xffff) + (sum >> 16); - return (__force __sum16)~sum; + return (__sum16)~sum; } /* @@ -703,16 +701,16 @@ __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __wsum sum) { - unsigned long long s = (__force u32)sum; + unsigned long long s = (u32)sum; - s += (__force u32)saddr; - s += (__force u32)daddr; + s += (u32)saddr; + s += (u32)daddr; #ifdef __BIG_ENDIAN__ s += proto + len; #else s += (proto + len) << 8; #endif - return (__force __wsum)from64to32(s); + return (__wsum)from64to32(s); } /* From f4700a62c27161e364f66fdce527e8b04083c444 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Niklas=20S=C3=B6derlund?= Date: Fri, 6 Aug 2021 14:28:55 +0200 Subject: [PATCH 09/31] samples/bpf: xdpsock: Remove forward declaration of ip_fast_csum() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a forward declaration of ip_fast_csum() just before its implementation, remove the unneeded forward declaration. While at it mark the implementation as static inline. Signed-off-by: Niklas Söderlund Signed-off-by: Simon Horman Signed-off-by: Andrii Nakryiko Reviewed-by: Louis Peens Link: https://lore.kernel.org/bpf/20210806122855.26115-3-simon.horman@corigine.com --- samples/bpf/xdpsock_user.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index 7c56a7a784e13..49d7a6ad7e397 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -651,15 +651,13 @@ static unsigned int do_csum(const unsigned char *buff, int len) return result; } -__sum16 ip_fast_csum(const void *iph, unsigned int ihl); - /* * This is a version of ip_compute_csum() optimized for IP headers, * which always checksum on 4 octet boundaries. * This function code has been taken from * Linux kernel lib/checksum.c */ -__sum16 ip_fast_csum(const void *iph, unsigned int ihl) +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) { return (__sum16)~do_csum(iph, ihl * 4); } From a815bde56b15ce626caaacc952ab12501671e45d Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Sat, 31 Jul 2021 05:57:32 +0000 Subject: [PATCH 10/31] net, bonding: Refactor bond_xmit_hash for use with xdp_buff In preparation for adding XDP support to the bonding driver refactor the packet hashing functions to be able to work with any linear data buffer without an skb. Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Link: https://lore.kernel.org/bpf/20210731055738.16820-2-joamaki@gmail.com --- drivers/net/bonding/bond_main.c | 147 +++++++++++++++++++------------- 1 file changed, 90 insertions(+), 57 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 616ebbb08ca6b..487c71fcec586 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -3614,55 +3614,80 @@ static struct notifier_block bond_netdev_notifier = { /*---------------------------- Hashing Policies -----------------------------*/ +/* Helper to access data in a packet, with or without a backing skb. + * If skb is given the data is linearized if necessary via pskb_may_pull. + */ +static inline const void *bond_pull_data(struct sk_buff *skb, + const void *data, int hlen, int n) +{ + if (likely(n <= hlen)) + return data; + else if (skb && likely(pskb_may_pull(skb, n))) + return skb->head; + + return NULL; +} + /* L2 hash helper */ -static inline u32 bond_eth_hash(struct sk_buff *skb) +static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen) { - struct ethhdr *ep, hdr_tmp; + struct ethhdr *ep; - ep = skb_header_pointer(skb, 0, sizeof(hdr_tmp), &hdr_tmp); - if (ep) - return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto; - return 0; + data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr)); + if (!data) + return 0; + + ep = (struct ethhdr *)(data + mhoff); + return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto; } -static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, - int *noff, int *proto, bool l34) +static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data, + int hlen, __be16 l2_proto, int *nhoff, int *ip_proto, bool l34) { const struct ipv6hdr *iph6; const struct iphdr *iph; - if (skb->protocol == htons(ETH_P_IP)) { - if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph)))) + if (l2_proto == htons(ETH_P_IP)) { + data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph)); + if (!data) return false; - iph = (const struct iphdr *)(skb->data + *noff); + + iph = (const struct iphdr *)(data + *nhoff); iph_to_flow_copy_v4addrs(fk, iph); - *noff += iph->ihl << 2; + *nhoff += iph->ihl << 2; if (!ip_is_fragment(iph)) - *proto = iph->protocol; - } else if (skb->protocol == htons(ETH_P_IPV6)) { - if (unlikely(!pskb_may_pull(skb, *noff + sizeof(*iph6)))) + *ip_proto = iph->protocol; + } else if (l2_proto == htons(ETH_P_IPV6)) { + data = bond_pull_data(skb, data, hlen, *nhoff + sizeof(*iph6)); + if (!data) return false; - iph6 = (const struct ipv6hdr *)(skb->data + *noff); + + iph6 = (const struct ipv6hdr *)(data + *nhoff); iph_to_flow_copy_v6addrs(fk, iph6); - *noff += sizeof(*iph6); - *proto = iph6->nexthdr; + *nhoff += sizeof(*iph6); + *ip_proto = iph6->nexthdr; } else { return false; } - if (l34 && *proto >= 0) - fk->ports.ports = skb_flow_get_ports(skb, *noff, *proto); + if (l34 && *ip_proto >= 0) + fk->ports.ports = __skb_flow_get_ports(skb, *nhoff, *ip_proto, data, hlen); return true; } -static u32 bond_vlan_srcmac_hash(struct sk_buff *skb) +static u32 bond_vlan_srcmac_hash(struct sk_buff *skb, const void *data, int mhoff, int hlen) { - struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb); + struct ethhdr *mac_hdr; u32 srcmac_vendor = 0, srcmac_dev = 0; u16 vlan; int i; + data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr)); + if (!data) + return 0; + mac_hdr = (struct ethhdr *)(data + mhoff); + for (i = 0; i < 3; i++) srcmac_vendor = (srcmac_vendor << 8) | mac_hdr->h_source[i]; @@ -3678,26 +3703,25 @@ static u32 bond_vlan_srcmac_hash(struct sk_buff *skb) } /* Extract the appropriate headers based on bond's xmit policy */ -static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, - struct flow_keys *fk) +static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, const void *data, + __be16 l2_proto, int nhoff, int hlen, struct flow_keys *fk) { bool l34 = bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34; - int noff, proto = -1; + int ip_proto = -1; switch (bond->params.xmit_policy) { case BOND_XMIT_POLICY_ENCAP23: case BOND_XMIT_POLICY_ENCAP34: memset(fk, 0, sizeof(*fk)); return __skb_flow_dissect(NULL, skb, &flow_keys_bonding, - fk, NULL, 0, 0, 0, 0); + fk, data, l2_proto, nhoff, hlen, 0); default: break; } fk->ports.ports = 0; memset(&fk->icmp, 0, sizeof(fk->icmp)); - noff = skb_network_offset(skb); - if (!bond_flow_ip(skb, fk, &noff, &proto, l34)) + if (!bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34)) return false; /* ICMP error packets contains at least 8 bytes of the header @@ -3705,22 +3729,20 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb, * to correlate ICMP error packets within the same flow which * generated the error. */ - if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) { - skb_flow_get_icmp_tci(skb, &fk->icmp, skb->data, - skb_transport_offset(skb), - skb_headlen(skb)); - if (proto == IPPROTO_ICMP) { + if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) { + skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen); + if (ip_proto == IPPROTO_ICMP) { if (!icmp_is_err(fk->icmp.type)) return true; - noff += sizeof(struct icmphdr); - } else if (proto == IPPROTO_ICMPV6) { + nhoff += sizeof(struct icmphdr); + } else if (ip_proto == IPPROTO_ICMPV6) { if (!icmpv6_is_err(fk->icmp.type)) return true; - noff += sizeof(struct icmp6hdr); + nhoff += sizeof(struct icmp6hdr); } - return bond_flow_ip(skb, fk, &noff, &proto, l34); + return bond_flow_ip(skb, fk, data, hlen, l2_proto, &nhoff, &ip_proto, l34); } return true; @@ -3736,33 +3758,26 @@ static u32 bond_ip_hash(u32 hash, struct flow_keys *flow) return hash >> 1; } -/** - * bond_xmit_hash - generate a hash value based on the xmit policy - * @bond: bonding device - * @skb: buffer to use for headers - * - * This function will extract the necessary headers from the skb buffer and use - * them to generate a hash based on the xmit_policy set in the bonding device +/* Generate hash based on xmit policy. If @skb is given it is used to linearize + * the data as required, but this function can be used without it if the data is + * known to be linear (e.g. with xdp_buff). */ -u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) +static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, const void *data, + __be16 l2_proto, int mhoff, int nhoff, int hlen) { struct flow_keys flow; u32 hash; - if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 && - skb->l4_hash) - return skb->hash; - if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC) - return bond_vlan_srcmac_hash(skb); + return bond_vlan_srcmac_hash(skb, data, mhoff, hlen); if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER2 || - !bond_flow_dissect(bond, skb, &flow)) - return bond_eth_hash(skb); + !bond_flow_dissect(bond, skb, data, l2_proto, nhoff, hlen, &flow)) + return bond_eth_hash(skb, data, mhoff, hlen); if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 || bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23) { - hash = bond_eth_hash(skb); + hash = bond_eth_hash(skb, data, mhoff, hlen); } else { if (flow.icmp.id) memcpy(&hash, &flow.icmp, sizeof(hash)); @@ -3773,6 +3788,25 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) return bond_ip_hash(hash, &flow); } +/** + * bond_xmit_hash - generate a hash value based on the xmit policy + * @bond: bonding device + * @skb: buffer to use for headers + * + * This function will extract the necessary headers from the skb buffer and use + * them to generate a hash based on the xmit_policy set in the bonding device + */ +u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) +{ + if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 && + skb->l4_hash) + return skb->hash; + + return __bond_xmit_hash(bond, skb, skb->head, skb->protocol, + skb->mac_header, skb->network_header, + skb_headlen(skb)); +} + /*-------------------------- Device entry points ----------------------------*/ void bond_work_init_all(struct bonding *bond) @@ -4434,8 +4468,7 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, return bond_tx_drop(bond_dev, skb); } -static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond, - struct sk_buff *skb) +static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond) { return rcu_dereference(bond->curr_active_slave); } @@ -4449,7 +4482,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb, struct bonding *bond = netdev_priv(bond_dev); struct slave *slave; - slave = bond_xmit_activebackup_slave_get(bond, skb); + slave = bond_xmit_activebackup_slave_get(bond); if (slave) return bond_dev_queue_xmit(bond, skb, slave->dev); @@ -4747,7 +4780,7 @@ static struct net_device *bond_xmit_get_slave(struct net_device *master_dev, slave = bond_xmit_roundrobin_slave_get(bond, skb); break; case BOND_MODE_ACTIVEBACKUP: - slave = bond_xmit_activebackup_slave_get(bond, skb); + slave = bond_xmit_activebackup_slave_get(bond); break; case BOND_MODE_8023AD: case BOND_MODE_XOR: From 879af96ffd72706c6e3278ea6b45b0b0e37ec5d7 Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Sat, 31 Jul 2021 05:57:33 +0000 Subject: [PATCH 11/31] net, core: Add support for XDP redirection to slave device This adds the ndo_xdp_get_xmit_slave hook for transforming XDP_TX into XDP_REDIRECT after BPF program run when the ingress device is a bond slave. The dev_xdp_prog_count is exposed so that slave devices can be checked for loaded XDP programs in order to avoid the situation where both bond master and slave have programs loaded according to xdp_state. Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Link: https://lore.kernel.org/bpf/20210731055738.16820-3-joamaki@gmail.com --- include/linux/filter.h | 13 ++++++++++++- include/linux/netdevice.h | 6 ++++++ net/core/dev.c | 13 ++++++++++++- net/core/filter.c | 25 +++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index ff698c9d1c946..1797e8506929d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -776,6 +776,10 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, DECLARE_BPF_DISPATCHER(xdp) +DECLARE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); + +u32 xdp_master_redirect(struct xdp_buff *xdp); + static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, struct xdp_buff *xdp) { @@ -783,7 +787,14 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, * under local_bh_disable(), which provides the needed RCU protection * for accessing map entries. */ - return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); + u32 act = __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp)); + + if (static_branch_unlikely(&bpf_master_redirect_enabled_key)) { + if (act == XDP_TX && netif_is_bond_slave(xdp->rxq->dev)) + act = xdp_master_redirect(xdp); + } + + return act; } void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d63a94ecbf3bb..02c6e8e10c86e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1330,6 +1330,9 @@ struct netdev_net_notifier { * that got dropped are freed/returned via xdp_return_frame(). * Returns negative number, means general error invoking ndo, meaning * no frames were xmit'ed and core-caller will free all frames. + * struct net_device *(*ndo_xdp_get_xmit_slave)(struct net_device *dev, + * struct xdp_buff *xdp); + * Get the xmit slave of master device based on the xdp_buff. * int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); * This function is used to wake up the softirq, ksoftirqd or kthread * responsible for sending and/or receiving packets on a specific @@ -1557,6 +1560,8 @@ struct net_device_ops { int (*ndo_xdp_xmit)(struct net_device *dev, int n, struct xdp_frame **xdp, u32 flags); + struct net_device * (*ndo_xdp_get_xmit_slave)(struct net_device *dev, + struct xdp_buff *xdp); int (*ndo_xsk_wakeup)(struct net_device *dev, u32 queue_id, u32 flags); struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev); @@ -4087,6 +4092,7 @@ typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, int expected_fd, u32 flags); int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); +u8 dev_xdp_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index 64e1a5f63f93b..9eb6dc9e02b31 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9380,7 +9380,7 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev, return dev->xdp_state[mode].prog; } -static u8 dev_xdp_prog_count(struct net_device *dev) +u8 dev_xdp_prog_count(struct net_device *dev) { u8 count = 0; int i; @@ -9390,6 +9390,7 @@ static u8 dev_xdp_prog_count(struct net_device *dev) count++; return count; } +EXPORT_SYMBOL_GPL(dev_xdp_prog_count); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) { @@ -9483,6 +9484,8 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack { unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES); struct bpf_prog *cur_prog; + struct net_device *upper; + struct list_head *iter; enum bpf_xdp_mode mode; bpf_op_t bpf_op; int err; @@ -9521,6 +9524,14 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack return -EBUSY; } + /* don't allow if an upper device already has a program */ + netdev_for_each_upper_dev_rcu(dev, upper, iter) { + if (dev_xdp_prog_count(upper) > 0) { + NL_SET_ERR_MSG(extack, "Cannot attach when an upper device already has a program"); + return -EEXIST; + } + } + cur_prog = dev_xdp_prog(dev, mode); /* can't replace attached prog with link */ if (link && cur_prog) { diff --git a/net/core/filter.c b/net/core/filter.c index faf29fd822767..ff62cd39046dc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3950,6 +3950,31 @@ void bpf_clear_redirect_map(struct bpf_map *map) } } +DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key); +EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key); + +u32 xdp_master_redirect(struct xdp_buff *xdp) +{ + struct net_device *master, *slave; + struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + + master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev); + slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp); + if (slave && slave != xdp->rxq->dev) { + /* The target device is different from the receiving device, so + * redirect it to the new device. + * Using XDP_REDIRECT gets the correct behaviour from XDP enabled + * drivers to unmap the packet from their rx ring. + */ + ri->tgt_index = slave->ifindex; + ri->map_id = INT_MAX; + ri->map_type = BPF_MAP_TYPE_UNSPEC; + return XDP_REDIRECT; + } + return XDP_TX; +} +EXPORT_SYMBOL_GPL(xdp_master_redirect); + int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { From 9e2ee5c7e7c35d195e2aa0692a7241d47a433d1e Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Sat, 31 Jul 2021 05:57:34 +0000 Subject: [PATCH 12/31] net, bonding: Add XDP support to the bonding driver XDP is implemented in the bonding driver by transparently delegating the XDP program loading, removal and xmit operations to the bonding slave devices. The overall goal of this work is that XDP programs can be attached to a bond device *without* any further changes (or awareness) necessary to the program itself, meaning the same XDP program can be attached to a native device but also a bonding device. Semantics of XDP_TX when attached to a bond are equivalent in such setting to the case when a tc/BPF program would be attached to the bond, meaning transmitting the packet out of the bond itself using one of the bond's configured xmit methods to select a slave device (rather than XDP_TX on the slave itself). Handling of XDP_TX to transmit using the configured bonding mechanism is therefore implemented by rewriting the BPF program return value in bpf_prog_run_xdp. To avoid performance impact this check is guarded by a static key, which is incremented when a XDP program is loaded onto a bond device. This approach was chosen to avoid changes to drivers implementing XDP. If the slave device does not match the receive device, then XDP_REDIRECT is transparently used to perform the redirection in order to have the network driver release the packet from its RX ring. The bonding driver hashing functions have been refactored to allow reuse with xdp_buff's to avoid code duplication. The motivation for this change is to enable use of bonding (and 802.3ad) in hairpinning L4 load-balancers such as [1] implemented with XDP and also to transparently support bond devices for projects that use XDP given most modern NICs have dual port adapters. An alternative to this approach would be to implement 802.3ad in user-space and implement the bonding load-balancing in the XDP program itself, but is rather a cumbersome endeavor in terms of slave device management (e.g. by watching netlink) and requires separate programs for native vs bond cases for the orchestrator. A native in-kernel implementation overcomes these issues and provides more flexibility. Below are benchmark results done on two machines with 100Gbit Intel E810 (ice) NIC and with 32-core 3970X on sending machine, and 16-core 3950X on receiving machine. 64 byte packets were sent with pktgen-dpdk at full rate. Two issues [2, 3] were identified with the ice driver, so the tests were performed with iommu=off and patch [2] applied. Additionally the bonding round robin algorithm was modified to use per-cpu tx counters as high CPU load (50% vs 10%) and high rate of cache misses were caused by the shared rr_tx_counter (see patch 2/3). The statistics were collected using "sar -n dev -u 1 10". On top of that, for ice, further work is in progress on improving the XDP_TX numbers [4]. -----------------------| CPU |--| rxpck/s |--| txpck/s |---- without patch (1 dev): XDP_DROP: 3.15% 48.6Mpps XDP_TX: 3.12% 18.3Mpps 18.3Mpps XDP_DROP (RSS): 9.47% 116.5Mpps XDP_TX (RSS): 9.67% 25.3Mpps 24.2Mpps ----------------------- with patch, bond (1 dev): XDP_DROP: 3.14% 46.7Mpps XDP_TX: 3.15% 13.9Mpps 13.9Mpps XDP_DROP (RSS): 10.33% 117.2Mpps XDP_TX (RSS): 10.64% 25.1Mpps 24.0Mpps ----------------------- with patch, bond (2 devs): XDP_DROP: 6.27% 92.7Mpps XDP_TX: 6.26% 17.6Mpps 17.5Mpps XDP_DROP (RSS): 11.38% 117.2Mpps XDP_TX (RSS): 14.30% 28.7Mpps 27.4Mpps -------------------------------------------------------------- RSS: Receive Side Scaling, e.g. the packets were sent to a range of destination IPs. [1]: https://cilium.io/blog/2021/05/20/cilium-110#standalonelb [2]: https://lore.kernel.org/bpf/20210601113236.42651-1-maciej.fijalkowski@intel.com/T/#t [3]: https://lore.kernel.org/bpf/CAHn8xckNXci+X_Eb2WMv4uVYjO2331UWB2JLtXr_58z0Av8+8A@mail.gmail.com/ [4]: https://lore.kernel.org/bpf/20210805230046.28715-1-maciej.fijalkowski@intel.com/T/#t Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Cc: Jay Vosburgh Cc: Veaceslav Falico Cc: Andy Gospodarek Cc: Maciej Fijalkowski Cc: Magnus Karlsson Link: https://lore.kernel.org/bpf/20210731055738.16820-4-joamaki@gmail.com --- drivers/net/bonding/bond_main.c | 309 +++++++++++++++++++++++++++++++- include/net/bonding.h | 1 + 2 files changed, 309 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 487c71fcec586..04cf78fa1721e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -317,6 +317,19 @@ bool bond_sk_check(struct bonding *bond) } } +static bool bond_xdp_check(struct bonding *bond) +{ + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + case BOND_MODE_ACTIVEBACKUP: + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + return true; + default: + return false; + } +} + /*---------------------------------- VLAN -----------------------------------*/ /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, @@ -2133,6 +2146,41 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, bond_update_slave_arr(bond, NULL); + if (!slave_dev->netdev_ops->ndo_bpf || + !slave_dev->netdev_ops->ndo_xdp_xmit) { + if (bond->xdp_prog) { + NL_SET_ERR_MSG(extack, "Slave does not support XDP"); + slave_err(bond_dev, slave_dev, "Slave does not support XDP\n"); + res = -EOPNOTSUPP; + goto err_sysfs_del; + } + } else { + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = bond->xdp_prog, + .extack = extack, + }; + + if (dev_xdp_prog_count(slave_dev) > 0) { + NL_SET_ERR_MSG(extack, + "Slave has XDP program loaded, please unload before enslaving"); + slave_err(bond_dev, slave_dev, + "Slave has XDP program loaded, please unload before enslaving\n"); + res = -EOPNOTSUPP; + goto err_sysfs_del; + } + + res = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (res < 0) { + /* ndo_bpf() sets extack error message */ + slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res); + goto err_sysfs_del; + } + if (bond->xdp_prog) + bpf_prog_inc(bond->xdp_prog); + } + slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n", bond_is_active_slave(new_slave) ? "an active" : "a backup", new_slave->link != BOND_LINK_DOWN ? "an up" : "a down"); @@ -2252,6 +2300,17 @@ static int __bond_release_one(struct net_device *bond_dev, /* recompute stats just before removing the slave */ bond_get_stats(bond->dev, &bond->bond_stats); + if (bond->xdp_prog) { + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = NULL, + .extack = NULL, + }; + if (slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp)) + slave_warn(bond_dev, slave_dev, "failed to unload XDP program\n"); + } + /* unregister rx_handler early so bond_handle_frame wouldn't be called * for this slave anymore. */ @@ -3638,7 +3697,7 @@ static inline u32 bond_eth_hash(struct sk_buff *skb, const void *data, int mhoff return 0; ep = (struct ethhdr *)(data + mhoff); - return ep->h_dest[5] ^ ep->h_source[5] ^ ep->h_proto; + return ep->h_dest[5] ^ ep->h_source[5] ^ be16_to_cpu(ep->h_proto); } static bool bond_flow_ip(struct sk_buff *skb, struct flow_keys *fk, const void *data, @@ -3807,6 +3866,26 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb) skb_headlen(skb)); } +/** + * bond_xmit_hash_xdp - generate a hash value based on the xmit policy + * @bond: bonding device + * @xdp: buffer to use for headers + * + * The XDP variant of bond_xmit_hash. + */ +static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp) +{ + struct ethhdr *eth; + + if (xdp->data + sizeof(struct ethhdr) > xdp->data_end) + return 0; + + eth = (struct ethhdr *)xdp->data; + + return __bond_xmit_hash(bond, NULL, xdp->data, eth->h_proto, 0, + sizeof(struct ethhdr), xdp->data_end - xdp->data); +} + /*-------------------------- Device entry points ----------------------------*/ void bond_work_init_all(struct bonding *bond) @@ -4455,6 +4534,47 @@ static struct slave *bond_xmit_roundrobin_slave_get(struct bonding *bond, return NULL; } +static struct slave *bond_xdp_xmit_roundrobin_slave_get(struct bonding *bond, + struct xdp_buff *xdp) +{ + struct slave *slave; + int slave_cnt; + u32 slave_id; + const struct ethhdr *eth; + void *data = xdp->data; + + if (data + sizeof(struct ethhdr) > xdp->data_end) + goto non_igmp; + + eth = (struct ethhdr *)data; + data += sizeof(struct ethhdr); + + /* See comment on IGMP in bond_xmit_roundrobin_slave_get() */ + if (eth->h_proto == htons(ETH_P_IP)) { + const struct iphdr *iph; + + if (data + sizeof(struct iphdr) > xdp->data_end) + goto non_igmp; + + iph = (struct iphdr *)data; + + if (iph->protocol == IPPROTO_IGMP) { + slave = rcu_dereference(bond->curr_active_slave); + if (slave) + return slave; + return bond_get_slave_by_id(bond, 0); + } + } + +non_igmp: + slave_cnt = READ_ONCE(bond->slave_cnt); + if (likely(slave_cnt)) { + slave_id = bond_rr_gen_slave_id(bond) % slave_cnt; + return bond_get_slave_by_id(bond, slave_id); + } + return NULL; +} + static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb, struct net_device *bond_dev) { @@ -4670,6 +4790,22 @@ static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond, return slave; } +static struct slave *bond_xdp_xmit_3ad_xor_slave_get(struct bonding *bond, + struct xdp_buff *xdp) +{ + struct bond_up_slave *slaves; + unsigned int count; + u32 hash; + + hash = bond_xmit_hash_xdp(bond, xdp); + slaves = rcu_dereference(bond->usable_slaves); + count = slaves ? READ_ONCE(slaves->count) : 0; + if (unlikely(!count)) + return NULL; + + return slaves->arr[hash % count]; +} + /* Use this Xmit function for 3AD as well as XOR modes. The current * usable slave array is formed in the control path. The xmit function * just calculates hash and sends the packet out. @@ -4954,6 +5090,174 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev) return ret; } +static struct net_device * +bond_xdp_get_xmit_slave(struct net_device *bond_dev, struct xdp_buff *xdp) +{ + struct bonding *bond = netdev_priv(bond_dev); + struct slave *slave; + + /* Caller needs to hold rcu_read_lock() */ + + switch (BOND_MODE(bond)) { + case BOND_MODE_ROUNDROBIN: + slave = bond_xdp_xmit_roundrobin_slave_get(bond, xdp); + break; + + case BOND_MODE_ACTIVEBACKUP: + slave = bond_xmit_activebackup_slave_get(bond); + break; + + case BOND_MODE_8023AD: + case BOND_MODE_XOR: + slave = bond_xdp_xmit_3ad_xor_slave_get(bond, xdp); + break; + + default: + /* Should never happen. Mode guarded by bond_xdp_check() */ + netdev_err(bond_dev, "Unknown bonding mode %d for xdp xmit\n", BOND_MODE(bond)); + WARN_ON_ONCE(1); + return NULL; + } + + if (slave) + return slave->dev; + + return NULL; +} + +static int bond_xdp_xmit(struct net_device *bond_dev, + int n, struct xdp_frame **frames, u32 flags) +{ + int nxmit, err = -ENXIO; + + rcu_read_lock(); + + for (nxmit = 0; nxmit < n; nxmit++) { + struct xdp_frame *frame = frames[nxmit]; + struct xdp_frame *frames1[] = {frame}; + struct net_device *slave_dev; + struct xdp_buff xdp; + + xdp_convert_frame_to_buff(frame, &xdp); + + slave_dev = bond_xdp_get_xmit_slave(bond_dev, &xdp); + if (!slave_dev) { + err = -ENXIO; + break; + } + + err = slave_dev->netdev_ops->ndo_xdp_xmit(slave_dev, 1, frames1, flags); + if (err < 1) + break; + } + + rcu_read_unlock(); + + /* If error happened on the first frame then we can pass the error up, otherwise + * report the number of frames that were xmitted. + */ + if (err < 0) + return (nxmit == 0 ? err : nxmit); + + return nxmit; +} + +static int bond_xdp_set(struct net_device *dev, struct bpf_prog *prog, + struct netlink_ext_ack *extack) +{ + struct bonding *bond = netdev_priv(dev); + struct list_head *iter; + struct slave *slave, *rollback_slave; + struct bpf_prog *old_prog; + struct netdev_bpf xdp = { + .command = XDP_SETUP_PROG, + .flags = 0, + .prog = prog, + .extack = extack, + }; + int err; + + ASSERT_RTNL(); + + if (!bond_xdp_check(bond)) + return -EOPNOTSUPP; + + old_prog = bond->xdp_prog; + bond->xdp_prog = prog; + + bond_for_each_slave(bond, slave, iter) { + struct net_device *slave_dev = slave->dev; + + if (!slave_dev->netdev_ops->ndo_bpf || + !slave_dev->netdev_ops->ndo_xdp_xmit) { + NL_SET_ERR_MSG(extack, "Slave device does not support XDP"); + slave_err(dev, slave_dev, "Slave does not support XDP\n"); + err = -EOPNOTSUPP; + goto err; + } + + if (dev_xdp_prog_count(slave_dev) > 0) { + NL_SET_ERR_MSG(extack, + "Slave has XDP program loaded, please unload before enslaving"); + slave_err(dev, slave_dev, + "Slave has XDP program loaded, please unload before enslaving\n"); + err = -EOPNOTSUPP; + goto err; + } + + err = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (err < 0) { + /* ndo_bpf() sets extack error message */ + slave_err(dev, slave_dev, "Error %d calling ndo_bpf\n", err); + goto err; + } + if (prog) + bpf_prog_inc(prog); + } + + if (old_prog) + bpf_prog_put(old_prog); + + if (prog) + static_branch_inc(&bpf_master_redirect_enabled_key); + else + static_branch_dec(&bpf_master_redirect_enabled_key); + + return 0; + +err: + /* unwind the program changes */ + bond->xdp_prog = old_prog; + xdp.prog = old_prog; + xdp.extack = NULL; /* do not overwrite original error */ + + bond_for_each_slave(bond, rollback_slave, iter) { + struct net_device *slave_dev = rollback_slave->dev; + int err_unwind; + + if (slave == rollback_slave) + break; + + err_unwind = slave_dev->netdev_ops->ndo_bpf(slave_dev, &xdp); + if (err_unwind < 0) + slave_err(dev, slave_dev, + "Error %d when unwinding XDP program change\n", err_unwind); + else if (xdp.prog) + bpf_prog_inc(xdp.prog); + } + return err; +} + +static int bond_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return bond_xdp_set(dev, xdp->prog, xdp->extack); + default: + return -EINVAL; + } +} + static u32 bond_mode_bcast_speed(struct slave *slave, u32 speed) { if (speed == 0 || speed == SPEED_UNKNOWN) @@ -5042,6 +5346,9 @@ static const struct net_device_ops bond_netdev_ops = { .ndo_features_check = passthru_features_check, .ndo_get_xmit_slave = bond_xmit_get_slave, .ndo_sk_get_lower_dev = bond_sk_get_lower_dev, + .ndo_bpf = bond_xdp, + .ndo_xdp_xmit = bond_xdp_xmit, + .ndo_xdp_get_xmit_slave = bond_xdp_get_xmit_slave, }; static const struct device_type bond_type = { diff --git a/include/net/bonding.h b/include/net/bonding.h index 625d9c72dee37..b91c365e4e957 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -258,6 +258,7 @@ struct bonding { /* protecting ipsec_list */ spinlock_t ipsec_lock; #endif /* CONFIG_XFRM_OFFLOAD */ + struct bpf_prog *xdp_prog; }; #define bond_slave_get_rcu(dev) \ From aeea1b86f9363f3feabb496534d886f082a89f21 Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Sat, 31 Jul 2021 05:57:35 +0000 Subject: [PATCH 13/31] bpf, devmap: Exclude XDP broadcast to master device If the ingress device is bond slave, do not broadcast back through it or the bond master. Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210731055738.16820-5-joamaki@gmail.com --- kernel/bpf/devmap.c | 69 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 542e94fa30b40..f02d04540c0c0 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -534,10 +534,9 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, return __xdp_enqueue(dev, xdp, dev_rx, dst->xdp_prog); } -static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp, - int exclude_ifindex) +static bool is_valid_dst(struct bpf_dtab_netdev *obj, struct xdp_buff *xdp) { - if (!obj || obj->dev->ifindex == exclude_ifindex || + if (!obj || !obj->dev->netdev_ops->ndo_xdp_xmit) return false; @@ -562,17 +561,48 @@ static int dev_map_enqueue_clone(struct bpf_dtab_netdev *obj, return 0; } +static inline bool is_ifindex_excluded(int *excluded, int num_excluded, int ifindex) +{ + while (num_excluded--) { + if (ifindex == excluded[num_excluded]) + return true; + } + return false; +} + +/* Get ifindex of each upper device. 'indexes' must be able to hold at + * least MAX_NEST_DEV elements. + * Returns the number of ifindexes added. + */ +static int get_upper_ifindexes(struct net_device *dev, int *indexes) +{ + struct net_device *upper; + struct list_head *iter; + int n = 0; + + netdev_for_each_upper_dev_rcu(dev, upper, iter) { + indexes[n++] = upper->ifindex; + } + return n; +} + int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, struct bpf_map *map, bool exclude_ingress) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int exclude_ifindex = exclude_ingress ? dev_rx->ifindex : 0; struct bpf_dtab_netdev *dst, *last_dst = NULL; + int excluded_devices[1+MAX_NEST_DEV]; struct hlist_head *head; struct xdp_frame *xdpf; + int num_excluded = 0; unsigned int i; int err; + if (exclude_ingress) { + num_excluded = get_upper_ifindexes(dev_rx, excluded_devices); + excluded_devices[num_excluded++] = dev_rx->ifindex; + } + xdpf = xdp_convert_buff_to_frame(xdp); if (unlikely(!xdpf)) return -EOVERFLOW; @@ -581,7 +611,10 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, for (i = 0; i < map->max_entries; i++) { dst = rcu_dereference_check(dtab->netdev_map[i], rcu_read_lock_bh_held()); - if (!is_valid_dst(dst, xdp, exclude_ifindex)) + if (!is_valid_dst(dst, xdp)) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ @@ -601,7 +634,11 @@ int dev_map_enqueue_multi(struct xdp_buff *xdp, struct net_device *dev_rx, head = dev_map_index_hash(dtab, i); hlist_for_each_entry_rcu(dst, head, index_hlist, lockdep_is_held(&dtab->index_lock)) { - if (!is_valid_dst(dst, xdp, exclude_ifindex)) + if (!is_valid_dst(dst, xdp)) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, + dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ @@ -675,18 +712,27 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, bool exclude_ingress) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int exclude_ifindex = exclude_ingress ? dev->ifindex : 0; struct bpf_dtab_netdev *dst, *last_dst = NULL; + int excluded_devices[1+MAX_NEST_DEV]; struct hlist_head *head; struct hlist_node *next; + int num_excluded = 0; unsigned int i; int err; + if (exclude_ingress) { + num_excluded = get_upper_ifindexes(dev, excluded_devices); + excluded_devices[num_excluded++] = dev->ifindex; + } + if (map->map_type == BPF_MAP_TYPE_DEVMAP) { for (i = 0; i < map->max_entries; i++) { dst = rcu_dereference_check(dtab->netdev_map[i], rcu_read_lock_bh_held()); - if (!dst || dst->dev->ifindex == exclude_ifindex) + if (!dst) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ @@ -700,12 +746,17 @@ int dev_map_redirect_multi(struct net_device *dev, struct sk_buff *skb, return err; last_dst = dst; + } } else { /* BPF_MAP_TYPE_DEVMAP_HASH */ for (i = 0; i < dtab->n_buckets; i++) { head = dev_map_index_hash(dtab, i); hlist_for_each_entry_safe(dst, next, head, index_hlist) { - if (!dst || dst->dev->ifindex == exclude_ifindex) + if (!dst) + continue; + + if (is_ifindex_excluded(excluded_devices, num_excluded, + dst->dev->ifindex)) continue; /* we only need n-1 clones; last_dst enqueued below */ From 689186699931313c7a42462602bd5c03eef77f9f Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Sat, 31 Jul 2021 05:57:36 +0000 Subject: [PATCH 14/31] net, core: Allow netdev_lower_get_next_private_rcu in bh context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For the XDP bonding slave lookup to work in the NAPI poll context in which the redudant rcu_read_lock() has been removed we have to follow the same approach as in 694cea395fde ("bpf: Allow RCU-protected lookups to happen from bh context") and modify the WARN_ON to also check rcu_read_lock_bh_held(). Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Cc: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20210731055738.16820-6-joamaki@gmail.com --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index 9eb6dc9e02b31..e5045b628dec2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7615,7 +7615,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, { struct netdev_adjacent *lower; - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); From 95413846cca37f20000dd095cf6d91f8777129d7 Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Sat, 31 Jul 2021 05:57:37 +0000 Subject: [PATCH 15/31] selftests/bpf: Fix xdp_tx.c prog section name The program type cannot be deduced from 'tx' which causes an invalid argument error when trying to load xdp_tx.o using the skeleton. Rename the section name to "xdp" so that libbpf can deduce the type. Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210731055738.16820-7-joamaki@gmail.com --- tools/testing/selftests/bpf/progs/xdp_tx.c | 2 +- tools/testing/selftests/bpf/test_xdp_veth.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c index 94e6c2b281cb6..5f725c720e008 100644 --- a/tools/testing/selftests/bpf/progs/xdp_tx.c +++ b/tools/testing/selftests/bpf/progs/xdp_tx.c @@ -3,7 +3,7 @@ #include #include -SEC("tx") +SEC("xdp") int xdp_tx(struct xdp_md *xdp) { return XDP_TX; diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh index ba8ffcdaac302..995278e684b6e 100755 --- a/tools/testing/selftests/bpf/test_xdp_veth.sh +++ b/tools/testing/selftests/bpf/test_xdp_veth.sh @@ -108,7 +108,7 @@ ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1 ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2 ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy -ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx +ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy trap cleanup EXIT From 6aab1c81b98a90a9289a4d5256b6f7374872cc3f Mon Sep 17 00:00:00 2001 From: Jussi Maki Date: Sat, 31 Jul 2021 05:57:38 +0000 Subject: [PATCH 16/31] selftests/bpf: Add tests for XDP bonding Add a test suite to test XDP bonding implementation over a pair of veth devices. Signed-off-by: Jussi Maki Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20210731055738.16820-8-joamaki@gmail.com --- .../selftests/bpf/prog_tests/xdp_bonding.c | 520 ++++++++++++++++++ 1 file changed, 520 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_bonding.c diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c new file mode 100644 index 0000000000000..6b186b4238d08 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -0,0 +1,520 @@ +// SPDX-License-Identifier: GPL-2.0 + +/** + * Test XDP bonding support + * + * Sets up two bonded veth pairs between two fresh namespaces + * and verifies that XDP_TX program loaded on a bond device + * are correctly loaded onto the slave devices and XDP_TX'd + * packets are balanced using bonding. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include "test_progs.h" +#include "network_helpers.h" +#include +#include +#include + +#include "xdp_dummy.skel.h" +#include "xdp_redirect_multi_kern.skel.h" +#include "xdp_tx.skel.h" + +#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55} +#define BOND1_MAC_STR "00:11:22:33:44:55" +#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66} +#define BOND2_MAC_STR "00:22:33:44:55:66" +#define NPACKETS 100 + +static int root_netns_fd = -1; + +static void restore_root_netns(void) +{ + ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns"); +} + +static int setns_by_name(char *name) +{ + int nsfd, err; + char nspath[PATH_MAX]; + + snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); + nsfd = open(nspath, O_RDONLY | O_CLOEXEC); + if (nsfd < 0) + return -1; + + err = setns(nsfd, CLONE_NEWNET); + close(nsfd); + return err; +} + +static int get_rx_packets(const char *iface) +{ + FILE *f; + char line[512]; + int iface_len = strlen(iface); + + f = fopen("/proc/net/dev", "r"); + if (!f) + return -1; + + while (fgets(line, sizeof(line), f)) { + char *p = line; + + while (*p == ' ') + p++; /* skip whitespace */ + if (!strncmp(p, iface, iface_len)) { + p += iface_len; + if (*p++ != ':') + continue; + while (*p == ' ') + p++; /* skip whitespace */ + while (*p && *p != ' ') + p++; /* skip rx bytes */ + while (*p == ' ') + p++; /* skip whitespace */ + fclose(f); + return atoi(p); + } + } + fclose(f); + return -1; +} + +#define MAX_BPF_LINKS 8 + +struct skeletons { + struct xdp_dummy *xdp_dummy; + struct xdp_tx *xdp_tx; + struct xdp_redirect_multi_kern *xdp_redirect_multi_kern; + + int nlinks; + struct bpf_link *links[MAX_BPF_LINKS]; +}; + +static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface) +{ + struct bpf_link *link; + int ifindex; + + ifindex = if_nametoindex(iface); + if (!ASSERT_GT(ifindex, 0, "get ifindex")) + return -1; + + if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached")) + return -1; + + link = bpf_program__attach_xdp(prog, ifindex); + if (!ASSERT_OK_PTR(link, "attach xdp program")) + return -1; + + skeletons->links[skeletons->nlinks++] = link; + return 0; +} + +enum { + BOND_ONE_NO_ATTACH = 0, + BOND_BOTH_AND_ATTACH, +}; + +static const char * const mode_names[] = { + [BOND_MODE_ROUNDROBIN] = "balance-rr", + [BOND_MODE_ACTIVEBACKUP] = "active-backup", + [BOND_MODE_XOR] = "balance-xor", + [BOND_MODE_BROADCAST] = "broadcast", + [BOND_MODE_8023AD] = "802.3ad", + [BOND_MODE_TLB] = "balance-tlb", + [BOND_MODE_ALB] = "balance-alb", +}; + +static const char * const xmit_policy_names[] = { + [BOND_XMIT_POLICY_LAYER2] = "layer2", + [BOND_XMIT_POLICY_LAYER34] = "layer3+4", + [BOND_XMIT_POLICY_LAYER23] = "layer2+3", + [BOND_XMIT_POLICY_ENCAP23] = "encap2+3", + [BOND_XMIT_POLICY_ENCAP34] = "encap3+4", +}; + +static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy, + int bond_both_attach) +{ +#define SYS(fmt, ...) \ + ({ \ + char cmd[1024]; \ + snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \ + if (!ASSERT_OK(system(cmd), cmd)) \ + return -1; \ + }) + + SYS("ip netns add ns_dst"); + SYS("ip link add veth1_1 type veth peer name veth2_1 netns ns_dst"); + SYS("ip link add veth1_2 type veth peer name veth2_2 netns ns_dst"); + + SYS("ip link add bond1 type bond mode %s xmit_hash_policy %s", + mode_names[mode], xmit_policy_names[xmit_policy]); + SYS("ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none"); + SYS("ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s", + mode_names[mode], xmit_policy_names[xmit_policy]); + SYS("ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none"); + + SYS("ip link set veth1_1 master bond1"); + if (bond_both_attach == BOND_BOTH_AND_ATTACH) { + SYS("ip link set veth1_2 master bond1"); + } else { + SYS("ip link set veth1_2 up addrgenmode none"); + + if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2")) + return -1; + } + + SYS("ip -netns ns_dst link set veth2_1 master bond2"); + + if (bond_both_attach == BOND_BOTH_AND_ATTACH) + SYS("ip -netns ns_dst link set veth2_2 master bond2"); + else + SYS("ip -netns ns_dst link set veth2_2 up addrgenmode none"); + + /* Load a dummy program on sending side as with veth peer needs to have a + * XDP program loaded as well. + */ + if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1")) + return -1; + + if (bond_both_attach == BOND_BOTH_AND_ATTACH) { + if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst")) + return -1; + + if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2")) + return -1; + + restore_root_netns(); + } + + return 0; + +#undef SYS +} + +static void bonding_cleanup(struct skeletons *skeletons) +{ + restore_root_netns(); + while (skeletons->nlinks) { + skeletons->nlinks--; + bpf_link__destroy(skeletons->links[skeletons->nlinks]); + } + ASSERT_OK(system("ip link delete bond1"), "delete bond1"); + ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1"); + ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2"); + ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst"); +} + +static int send_udp_packets(int vary_dst_ip) +{ + struct ethhdr eh = { + .h_source = BOND1_MAC, + .h_dest = BOND2_MAC, + .h_proto = htons(ETH_P_IP), + }; + uint8_t buf[128] = {}; + struct iphdr *iph = (struct iphdr *)(buf + sizeof(eh)); + struct udphdr *uh = (struct udphdr *)(buf + sizeof(eh) + sizeof(*iph)); + int i, s = -1; + int ifindex; + + s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW); + if (!ASSERT_GE(s, 0, "socket")) + goto err; + + ifindex = if_nametoindex("bond1"); + if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex")) + goto err; + + memcpy(buf, &eh, sizeof(eh)); + iph->ihl = 5; + iph->version = 4; + iph->tos = 16; + iph->id = 1; + iph->ttl = 64; + iph->protocol = IPPROTO_UDP; + iph->saddr = 1; + iph->daddr = 2; + iph->tot_len = htons(sizeof(buf) - ETH_HLEN); + iph->check = 0; + + for (i = 1; i <= NPACKETS; i++) { + int n; + struct sockaddr_ll saddr_ll = { + .sll_ifindex = ifindex, + .sll_halen = ETH_ALEN, + .sll_addr = BOND2_MAC, + }; + + /* vary the UDP destination port for even distribution with roundrobin/xor modes */ + uh->dest++; + + if (vary_dst_ip) + iph->daddr++; + + n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll)); + if (!ASSERT_EQ(n, sizeof(buf), "sendto")) + goto err; + } + + return 0; + +err: + if (s >= 0) + close(s); + return -1; +} + +static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy) +{ + int bond1_rx; + + if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH)) + goto out; + + if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34)) + goto out; + + bond1_rx = get_rx_packets("bond1"); + ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets"); + + switch (mode) { + case BOND_MODE_ROUNDROBIN: + case BOND_MODE_XOR: { + int veth1_rx = get_rx_packets("veth1_1"); + int veth2_rx = get_rx_packets("veth1_2"); + int diff = abs(veth1_rx - veth2_rx); + + ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets"); + + switch (xmit_policy) { + case BOND_XMIT_POLICY_LAYER2: + ASSERT_GE(diff, NPACKETS, + "expected packets on only one of the interfaces"); + break; + case BOND_XMIT_POLICY_LAYER23: + case BOND_XMIT_POLICY_LAYER34: + ASSERT_LT(diff, NPACKETS/2, + "expected even distribution of packets"); + break; + default: + PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy); + break; + } + break; + } + case BOND_MODE_ACTIVEBACKUP: { + int veth1_rx = get_rx_packets("veth1_1"); + int veth2_rx = get_rx_packets("veth1_2"); + int diff = abs(veth1_rx - veth2_rx); + + ASSERT_GE(diff, NPACKETS, + "expected packets on only one of the interfaces"); + break; + } + default: + PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy); + break; + } + +out: + bonding_cleanup(skeletons); +} + +/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding + * all the interfaces to it and checking that broadcasting won't send the packet + * to neither the ingress bond device (bond2) or its slave (veth2_1). + */ +static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons) +{ + static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"}; + int veth1_1_rx, veth1_2_rx; + int err; + + if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, + BOND_ONE_NO_ATTACH)) + goto out; + + + if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst")) + goto out; + + /* populate the devmap with the relevant interfaces */ + for (int i = 0; i < ARRAY_SIZE(ifaces); i++) { + int ifindex = if_nametoindex(ifaces[i]); + int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all); + + if (!ASSERT_GT(ifindex, 0, "could not get interface index")) + goto out; + + err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0); + if (!ASSERT_OK(err, "add interface to map_all")) + goto out; + } + + if (xdp_attach(skeletons, + skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog, + "bond2")) + goto out; + + restore_root_netns(); + + if (send_udp_packets(BOND_MODE_ROUNDROBIN)) + goto out; + + veth1_1_rx = get_rx_packets("veth1_1"); + veth1_2_rx = get_rx_packets("veth1_2"); + + ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1"); + ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2"); + +out: + restore_root_netns(); + bonding_cleanup(skeletons); +} + +/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */ +static void test_xdp_bonding_attach(struct skeletons *skeletons) +{ + struct bpf_link *link = NULL; + struct bpf_link *link2 = NULL; + int veth, bond; + int err; + + if (!ASSERT_OK(system("ip link add veth type veth"), "add veth")) + goto out; + if (!ASSERT_OK(system("ip link add bond type bond"), "add bond")) + goto out; + + veth = if_nametoindex("veth"); + if (!ASSERT_GE(veth, 0, "if_nametoindex veth")) + goto out; + bond = if_nametoindex("bond"); + if (!ASSERT_GE(bond, 0, "if_nametoindex bond")) + goto out; + + /* enslaving with a XDP program loaded fails */ + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth); + if (!ASSERT_OK_PTR(link, "attach program to veth")) + goto out; + + err = system("ip link set veth master bond"); + if (!ASSERT_NEQ(err, 0, "attaching slave with xdp program expected to fail")) + goto out; + + bpf_link__destroy(link); + link = NULL; + + err = system("ip link set veth master bond"); + if (!ASSERT_OK(err, "set veth master")) + goto out; + + /* attaching to slave when master has no program is allowed */ + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth); + if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved")) + goto out; + + /* attaching to master not allowed when slave has program loaded */ + link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond); + if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program")) + goto out; + + bpf_link__destroy(link); + link = NULL; + + /* attaching XDP program to master allowed when slave has no program */ + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond); + if (!ASSERT_OK_PTR(link, "attach program to master")) + goto out; + + /* attaching to slave not allowed when master has program loaded */ + link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond); + ASSERT_ERR_PTR(link2, "attach program to slave when master has program"); + +out: + bpf_link__destroy(link); + bpf_link__destroy(link2); + + system("ip link del veth"); + system("ip link del bond"); +} + +static int libbpf_debug_print(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (level != LIBBPF_WARN) + vprintf(format, args); + return 0; +} + +struct bond_test_case { + char *name; + int mode; + int xmit_policy; +}; + +static struct bond_test_case bond_test_cases[] = { + { "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, }, + { "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 }, + + { "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, }, + { "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, }, + { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, }, +}; + +void test_xdp_bonding(void) +{ + libbpf_print_fn_t old_print_fn; + struct skeletons skeletons = {}; + int i; + + old_print_fn = libbpf_set_print(libbpf_debug_print); + + root_netns_fd = open("/proc/self/ns/net", O_RDONLY); + if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net")) + goto out; + + skeletons.xdp_dummy = xdp_dummy__open_and_load(); + if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load")) + goto out; + + skeletons.xdp_tx = xdp_tx__open_and_load(); + if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load")) + goto out; + + skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load(); + if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern, + "xdp_redirect_multi_kern__open_and_load")) + goto out; + + if (!test__start_subtest("xdp_bonding_attach")) + test_xdp_bonding_attach(&skeletons); + + for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) { + struct bond_test_case *test_case = &bond_test_cases[i]; + + if (!test__start_subtest(test_case->name)) + test_xdp_bonding_with_mode( + &skeletons, + test_case->mode, + test_case->xmit_policy); + } + + if (!test__start_subtest("xdp_bonding_redirect_multi")) + test_xdp_bonding_redirect_multi(&skeletons); + +out: + xdp_dummy__destroy(skeletons.xdp_dummy); + xdp_tx__destroy(skeletons.xdp_tx); + xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern); + + libbpf_set_print(old_print_fn); + if (root_netns_fd >= 0) + close(root_netns_fd); +} From d692a637b4c5151a064f1eabd404944b31e28336 Mon Sep 17 00:00:00 2001 From: Muhammad Falak R Wani Date: Mon, 9 Aug 2021 12:30:46 +0530 Subject: [PATCH 17/31] samples, bpf: Add an explict comment to handle nested vlan tagging. A codeblock for handling nested vlan trips newbies into thinking it as duplicate code. Explicitly add a comment to clarify. Signed-off-by: Muhammad Falak R Wani Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210809070046.32142-1-falakreyaz@gmail.com --- samples/bpf/xdp1_kern.c | 2 ++ samples/bpf/xdp2_kern.c | 2 ++ 2 files changed, 4 insertions(+) diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c index 34b64394ed9ca..f0c5d95084dec 100644 --- a/samples/bpf/xdp1_kern.c +++ b/samples/bpf/xdp1_kern.c @@ -57,6 +57,7 @@ int xdp_prog1(struct xdp_md *ctx) h_proto = eth->h_proto; + /* Handle VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; @@ -66,6 +67,7 @@ int xdp_prog1(struct xdp_md *ctx) return rc; h_proto = vhdr->h_vlan_encapsulated_proto; } + /* Handle double VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c index c787f4b496462..d8a64ab077b00 100644 --- a/samples/bpf/xdp2_kern.c +++ b/samples/bpf/xdp2_kern.c @@ -73,6 +73,7 @@ int xdp_prog1(struct xdp_md *ctx) h_proto = eth->h_proto; + /* Handle VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; @@ -82,6 +83,7 @@ int xdp_prog1(struct xdp_md *ctx) return rc; h_proto = vhdr->h_vlan_encapsulated_proto; } + /* Handle double VLAN tagged packet */ if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { struct vlan_hdr *vhdr; From b55dfa850015453144c969208a7518e7095259a4 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:16 +0200 Subject: [PATCH 18/31] bpf, tests: Add BPF_JMP32 test cases An eBPF JIT may implement JMP32 operations in a different way than JMP, especially on 32-bit architectures. This patch adds a series of tests for JMP32 operations, mainly for testing JITs. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210809091829.810076-2-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 511 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 511 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index f6d5d30d01bf2..377e866764cbe 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -4398,6 +4398,517 @@ static struct bpf_test tests[] = { { { 0, 4134 } }, .fill_helper = bpf_fill_stxdw, }, + /* BPF_JMP32 | BPF_JEQ | BPF_K */ + { + "JMP32_JEQ_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JEQ, R0, 321, 1), + BPF_JMP32_IMM(BPF_JEQ, R0, 123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JEQ_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 12345678), + BPF_JMP32_IMM(BPF_JEQ, R0, 12345678 & 0xffff, 1), + BPF_JMP32_IMM(BPF_JEQ, R0, 12345678, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 12345678 } } + }, + { + "JMP32_JEQ_K: negative immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JEQ, R0, 123, 1), + BPF_JMP32_IMM(BPF_JEQ, R0, -123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + /* BPF_JMP32 | BPF_JEQ | BPF_X */ + { + "JMP32_JEQ_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1234), + BPF_ALU32_IMM(BPF_MOV, R1, 4321), + BPF_JMP32_REG(BPF_JEQ, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 1234), + BPF_JMP32_REG(BPF_JEQ, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1234 } } + }, + /* BPF_JMP32 | BPF_JNE | BPF_K */ + { + "JMP32_JNE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JNE, R0, 123, 1), + BPF_JMP32_IMM(BPF_JNE, R0, 321, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JNE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 12345678), + BPF_JMP32_IMM(BPF_JNE, R0, 12345678, 1), + BPF_JMP32_IMM(BPF_JNE, R0, 12345678 & 0xffff, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 12345678 } } + }, + { + "JMP32_JNE_K: negative immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JNE, R0, -123, 1), + BPF_JMP32_IMM(BPF_JNE, R0, 123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + /* BPF_JMP32 | BPF_JNE | BPF_X */ + { + "JMP32_JNE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1234), + BPF_ALU32_IMM(BPF_MOV, R1, 1234), + BPF_JMP32_REG(BPF_JNE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 4321), + BPF_JMP32_REG(BPF_JNE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1234 } } + }, + /* BPF_JMP32 | BPF_JSET | BPF_K */ + { + "JMP32_JSET_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_JMP32_IMM(BPF_JSET, R0, 2, 1), + BPF_JMP32_IMM(BPF_JSET, R0, 3, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "JMP32_JSET_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x40000000), + BPF_JMP32_IMM(BPF_JSET, R0, 0x3fffffff, 1), + BPF_JMP32_IMM(BPF_JSET, R0, 0x60000000, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x40000000 } } + }, + { + "JMP32_JSET_K: negative immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSET, R0, -1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + /* BPF_JMP32 | BPF_JSET | BPF_X */ + { + "JMP32_JSET_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 8), + BPF_ALU32_IMM(BPF_MOV, R1, 7), + BPF_JMP32_REG(BPF_JSET, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 8 | 2), + BPF_JMP32_REG(BPF_JNE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 8 } } + }, + /* BPF_JMP32 | BPF_JGT | BPF_K */ + { + "JMP32_JGT_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JGT, R0, 123, 1), + BPF_JMP32_IMM(BPF_JGT, R0, 122, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JGT_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_JMP32_IMM(BPF_JGT, R0, 0xffffffff, 1), + BPF_JMP32_IMM(BPF_JGT, R0, 0xfffffffd, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JGT | BPF_X */ + { + "JMP32_JGT_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_JMP32_REG(BPF_JGT, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd), + BPF_JMP32_REG(BPF_JGT, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JGE | BPF_K */ + { + "JMP32_JGE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JGE, R0, 124, 1), + BPF_JMP32_IMM(BPF_JGE, R0, 123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JGE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_JMP32_IMM(BPF_JGE, R0, 0xffffffff, 1), + BPF_JMP32_IMM(BPF_JGE, R0, 0xfffffffe, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JGE | BPF_X */ + { + "JMP32_JGE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_JMP32_REG(BPF_JGE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe), + BPF_JMP32_REG(BPF_JGE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JLT | BPF_K */ + { + "JMP32_JLT_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JLT, R0, 123, 1), + BPF_JMP32_IMM(BPF_JLT, R0, 124, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JLT_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_JMP32_IMM(BPF_JLT, R0, 0xfffffffd, 1), + BPF_JMP32_IMM(BPF_JLT, R0, 0xffffffff, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JLT | BPF_X */ + { + "JMP32_JLT_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd), + BPF_JMP32_REG(BPF_JLT, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0xffffffff), + BPF_JMP32_REG(BPF_JLT, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JLE | BPF_K */ + { + "JMP32_JLE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 123), + BPF_JMP32_IMM(BPF_JLE, R0, 122, 1), + BPF_JMP32_IMM(BPF_JLE, R0, 123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 123 } } + }, + { + "JMP32_JLE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffd, 1), + BPF_JMP32_IMM(BPF_JLE, R0, 0xfffffffe, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JLE | BPF_X */ + { + "JMP32_JLE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xfffffffe), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffd), + BPF_JMP32_REG(BPF_JLE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, 0xfffffffe), + BPF_JMP32_REG(BPF_JLE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfffffffe } } + }, + /* BPF_JMP32 | BPF_JSGT | BPF_K */ + { + "JMP32_JSGT_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSGT, R0, -123, 1), + BPF_JMP32_IMM(BPF_JSGT, R0, -124, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "JMP32_JSGT_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_JMP32_IMM(BPF_JSGT, R0, -12345678, 1), + BPF_JMP32_IMM(BPF_JSGT, R0, -12345679, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSGT | BPF_X */ + { + "JMP32_JSGT_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_ALU32_IMM(BPF_MOV, R1, -12345678), + BPF_JMP32_REG(BPF_JSGT, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, -12345679), + BPF_JMP32_REG(BPF_JSGT, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSGE | BPF_K */ + { + "JMP32_JSGE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSGE, R0, -122, 1), + BPF_JMP32_IMM(BPF_JSGE, R0, -123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "JMP32_JSGE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_JMP32_IMM(BPF_JSGE, R0, -12345677, 1), + BPF_JMP32_IMM(BPF_JSGE, R0, -12345678, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSGE | BPF_X */ + { + "JMP32_JSGE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_ALU32_IMM(BPF_MOV, R1, -12345677), + BPF_JMP32_REG(BPF_JSGE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, -12345678), + BPF_JMP32_REG(BPF_JSGE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSLT | BPF_K */ + { + "JMP32_JSLT_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSLT, R0, -123, 1), + BPF_JMP32_IMM(BPF_JSLT, R0, -122, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "JMP32_JSLT_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_JMP32_IMM(BPF_JSLT, R0, -12345678, 1), + BPF_JMP32_IMM(BPF_JSLT, R0, -12345677, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSLT | BPF_X */ + { + "JMP32_JSLT_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_ALU32_IMM(BPF_MOV, R1, -12345678), + BPF_JMP32_REG(BPF_JSLT, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, -12345677), + BPF_JMP32_REG(BPF_JSLT, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSLE | BPF_K */ + { + "JMP32_JSLE_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_JMP32_IMM(BPF_JSLE, R0, -124, 1), + BPF_JMP32_IMM(BPF_JSLE, R0, -123, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "JMP32_JSLE_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_JMP32_IMM(BPF_JSLE, R0, -12345679, 1), + BPF_JMP32_IMM(BPF_JSLE, R0, -12345678, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, + /* BPF_JMP32 | BPF_JSLE | BPF_K */ + { + "JMP32_JSLE_X", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -12345678), + BPF_ALU32_IMM(BPF_MOV, R1, -12345679), + BPF_JMP32_REG(BPF_JSLE, R0, R1, 2), + BPF_ALU32_IMM(BPF_MOV, R1, -12345678), + BPF_JMP32_REG(BPF_JSLE, R0, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -12345678 } } + }, /* BPF_JMP | BPF_EXIT */ { "JMP_EXIT", From 565731acfcf28ffdaeeae3f03f3ced719f30bd99 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:17 +0200 Subject: [PATCH 19/31] bpf, tests: Add BPF_MOV tests for zero and sign extension Tests for ALU32 and ALU64 MOV with different sizes of the immediate value. Depending on the immediate field width of the native CPU instructions, a JIT may generate code differently depending on the immediate value. Test that zero or sign extension is performed as expected. Mainly for JIT testing. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210809091829.810076-3-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 377e866764cbe..4509844331409 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -2360,6 +2360,48 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU_MOV_K: small negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "ALU_MOV_K: small negative zero extension", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU_MOV_K: large negative", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123456789), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123456789 } } + }, + { + "ALU_MOV_K: large negative zero extension", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -123456789), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, { "ALU64_MOV_K: dst = 2", .u.insns_int = { @@ -2412,6 +2454,48 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_MOV_K: small negative", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -123), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123 } } + }, + { + "ALU64_MOV_K: small negative sign extension", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -123), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } } + }, + { + "ALU64_MOV_K: large negative", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -123456789), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -123456789 } } + }, + { + "ALU64_MOV_K: large negative sign extension", + .u.insns_int = { + BPF_ALU64_IMM(BPF_MOV, R0, -123456789), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xffffffff } } + }, /* BPF_ALU | BPF_ADD | BPF_X */ { "ALU_ADD_X: 1 + 2 = 3", From e92c813bf1193248dd9f938e76af545fa9cf7361 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:18 +0200 Subject: [PATCH 20/31] bpf, tests: Fix typos in test case descriptions This patch corrects the test description in a number of cases where the description differed from what was actually tested and expected. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210809091829.810076-4-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 4509844331409..ec36a8bfa3f9f 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -3537,7 +3537,7 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, }, { - "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000ffff00000000", + "ALU64_AND_K: 0x0000ffffffff0000 & 0x0 = 0x0000000000000000", .u.insns_int = { BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), BPF_LD_IMM64(R3, 0x0000000000000000LL), @@ -3553,7 +3553,7 @@ static struct bpf_test tests[] = { { { 0, 0x1 } }, }, { - "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffffffff", + "ALU64_AND_K: 0x0000ffffffff0000 & -1 = 0x0000ffffffff0000", .u.insns_int = { BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), BPF_LD_IMM64(R3, 0x0000ffffffff0000LL), @@ -3679,7 +3679,7 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, }, { - "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffff00000000", + "ALU64_OR_K: 0x0000ffffffff0000 | 0x0 = 0x0000ffffffff0000", .u.insns_int = { BPF_LD_IMM64(R2, 0x0000ffffffff0000LL), BPF_LD_IMM64(R3, 0x0000ffffffff0000LL), @@ -3810,7 +3810,7 @@ static struct bpf_test tests[] = { { { 0, 3 } }, }, { - "ALU64_XOR_K: 1 & 0xffffffff = 0xfffffffe", + "ALU64_XOR_K: 1 ^ 0xffffffff = 0xfffffffe", .u.insns_int = { BPF_LD_IMM64(R0, 1), BPF_ALU64_IMM(BPF_XOR, R0, 0xffffffff), From ba89bcf78fba8ff99d84b762c56fbfdabc97731c Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:19 +0200 Subject: [PATCH 21/31] bpf, tests: Add more tests of ALU32 and ALU64 bitwise operations This patch adds tests of BPF_AND, BPF_OR and BPF_XOR with different magnitude of the immediate value. Mainly checking 32-bit JIT sub-word handling and zero/sign extension. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210809091829.810076-5-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 210 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 210 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index ec36a8bfa3f9f..73c2ea0cb13b7 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -3514,6 +3514,44 @@ static struct bpf_test tests[] = { { }, { { 0, 0xffffffff } }, }, + { + "ALU_AND_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304), + BPF_ALU32_IMM(BPF_AND, R0, 15), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 4 } } + }, + { + "ALU_AND_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4), + BPF_ALU32_IMM(BPF_AND, R0, 0xafbfcfdf), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xa1b2c3d4 } } + }, + { + "ALU_AND_K: Zero extension", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x0000000080a0c0e0LL), + BPF_ALU32_IMM(BPF_AND, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, { "ALU64_AND_K: 3 & 2 = 2", .u.insns_int = { @@ -3584,6 +3622,38 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_AND_K: Sign extension 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x00000000090b0d0fLL), + BPF_ALU64_IMM(BPF_AND, R0, 0x0f0f0f0f), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "ALU64_AND_K: Sign extension 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x0123456780a0c0e0LL), + BPF_ALU64_IMM(BPF_AND, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, /* BPF_ALU | BPF_OR | BPF_X */ { "ALU_OR_X: 1 | 2 = 3", @@ -3656,6 +3726,44 @@ static struct bpf_test tests[] = { { }, { { 0, 0xffffffff } }, }, + { + "ALU_OR_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304), + BPF_ALU32_IMM(BPF_OR, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x01020305 } } + }, + { + "ALU_OR_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304), + BPF_ALU32_IMM(BPF_OR, R0, 0xa0b0c0d0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xa1b2c3d4 } } + }, + { + "ALU_OR_K: Zero extension", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x00000000f9fbfdffLL), + BPF_ALU32_IMM(BPF_OR, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, { "ALU64_OR_K: 1 | 2 = 3", .u.insns_int = { @@ -3726,6 +3834,38 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_OR_K: Sign extension 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x012345678fafcfefLL), + BPF_ALU64_IMM(BPF_OR, R0, 0x0f0f0f0f), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "ALU64_OR_K: Sign extension 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0xfffffffff9fbfdffLL), + BPF_ALU64_IMM(BPF_OR, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, /* BPF_ALU | BPF_XOR | BPF_X */ { "ALU_XOR_X: 5 ^ 6 = 3", @@ -3798,6 +3938,44 @@ static struct bpf_test tests[] = { { }, { { 0, 0xfffffffe } }, }, + { + "ALU_XOR_K: Small immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x01020304), + BPF_ALU32_IMM(BPF_XOR, R0, 15), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x0102030b } } + }, + { + "ALU_XOR_K: Large immediate", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0xf1f2f3f4), + BPF_ALU32_IMM(BPF_XOR, R0, 0xafbfcfdf), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x5e4d3c2b } } + }, + { + "ALU_XOR_K: Zero extension", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x00000000795b3d1fLL), + BPF_ALU32_IMM(BPF_XOR, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, { "ALU64_XOR_K: 5 ^ 6 = 3", .u.insns_int = { @@ -3868,6 +4046,38 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_XOR_K: Sign extension 1", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0x0123456786a4c2e0LL), + BPF_ALU64_IMM(BPF_XOR, R0, 0x0f0f0f0f), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "ALU64_XOR_K: Sign extension 2", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_LD_IMM64(R1, 0xfedcba98795b3d1fLL), + BPF_ALU64_IMM(BPF_XOR, R0, 0xf0f0f0f0), + BPF_JMP_REG(BPF_JEQ, R0, R1, 2), + BPF_MOV32_IMM(R0, 2), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, /* BPF_ALU | BPF_LSH | BPF_X */ { "ALU_LSH_X: 1 << 1 = 2", From 0f2fca1ab18319dcb47f6b15b7c5d3f29da84b6d Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:20 +0200 Subject: [PATCH 22/31] bpf, tests: Add more ALU32 tests for BPF_LSH/RSH/ARSH This patch adds more tests of ALU32 shift operations BPF_LSH and BPF_RSH, including the special case of a zero immediate. Also add corresponding BPF_ARSH tests which were missing for ALU32. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210809091829.810076-6-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 73c2ea0cb13b7..8694b1fb8ff2c 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -4103,6 +4103,18 @@ static struct bpf_test tests[] = { { }, { { 0, 0x80000000 } }, }, + { + "ALU_LSH_X: 0x12345678 << 12 = 0x45678000", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU32_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x45678000 } } + }, { "ALU64_LSH_X: 1 << 1 = 2", .u.insns_int = { @@ -4150,6 +4162,28 @@ static struct bpf_test tests[] = { { }, { { 0, 0x80000000 } }, }, + { + "ALU_LSH_K: 0x12345678 << 12 = 0x45678000", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_LSH, R0, 12), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x45678000 } } + }, + { + "ALU_LSH_K: 0x12345678 << 0 = 0x12345678", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_LSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x12345678 } } + }, { "ALU64_LSH_K: 1 << 1 = 2", .u.insns_int = { @@ -4197,6 +4231,18 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + "ALU_RSH_X: 0x12345678 >> 20 = 0x123", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_MOV, R1, 20), + BPF_ALU32_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x123 } } + }, { "ALU64_RSH_X: 2 >> 1 = 1", .u.insns_int = { @@ -4244,6 +4290,28 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + "ALU_RSH_K: 0x12345678 >> 20 = 0x123", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_RSH, R0, 20), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x123 } } + }, + { + "ALU_RSH_K: 0x12345678 >> 0 = 0x12345678", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0x12345678), + BPF_ALU32_IMM(BPF_RSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x12345678 } } + }, { "ALU64_RSH_K: 2 >> 1 = 1", .u.insns_int = { @@ -4267,6 +4335,18 @@ static struct bpf_test tests[] = { { { 0, 1 } }, }, /* BPF_ALU | BPF_ARSH | BPF_X */ + { + "ALU32_ARSH_X: -1234 >> 7 = -10", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -1234), + BPF_ALU32_IMM(BPF_MOV, R1, 7), + BPF_ALU32_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -10 } } + }, { "ALU_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", .u.insns_int = { @@ -4280,6 +4360,28 @@ static struct bpf_test tests[] = { { { 0, 0xffff00ff } }, }, /* BPF_ALU | BPF_ARSH | BPF_K */ + { + "ALU32_ARSH_K: -1234 >> 7 = -10", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -1234), + BPF_ALU32_IMM(BPF_ARSH, R0, 7), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -10 } } + }, + { + "ALU32_ARSH_K: -1234 >> 0 = -1234", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, -1234), + BPF_ALU32_IMM(BPF_ARSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1234 } } + }, { "ALU_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", .u.insns_int = { From 3b9890ef80f4285d32f2274d20db108e064e5e9e Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:21 +0200 Subject: [PATCH 23/31] bpf, tests: Add more BPF_LSH/RSH/ARSH tests for ALU64 This patch adds a number of tests for BPF_LSH, BPF_RSH amd BPF_ARSH ALU64 operations with values that may trigger different JIT code paths. Mainly testing 32-bit JITs that implement ALU64 operations with two 32-bit CPU registers per operand. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210809091829.810076-7-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 544 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 542 insertions(+), 2 deletions(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 8694b1fb8ff2c..b95bed03ab1b3 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -4139,6 +4139,106 @@ static struct bpf_test tests[] = { { }, { { 0, 0x80000000 } }, }, + { + "ALU64_LSH_X: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xbcdef000 } } + }, + { + "ALU64_LSH_X: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x3456789a } } + }, + { + "ALU64_LSH_X: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_LSH_X: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x9abcdef0 } } + }, + { + "ALU64_LSH_X: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_LSH_X: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_LSH_X: Zero shift, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_LSH_X: Zero shift, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_LSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x01234567 } } + }, /* BPF_ALU | BPF_LSH | BPF_K */ { "ALU_LSH_K: 1 << 1 = 2", @@ -4206,6 +4306,86 @@ static struct bpf_test tests[] = { { }, { { 0, 0x80000000 } }, }, + { + "ALU64_LSH_K: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 12), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xbcdef000 } } + }, + { + "ALU64_LSH_K: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 12), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x3456789a } } + }, + { + "ALU64_LSH_K: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 36), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_LSH_K: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 36), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x9abcdef0 } } + }, + { + "ALU64_LSH_K: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_LSH_K: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 32), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_LSH_K: Zero shift", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_LSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, /* BPF_ALU | BPF_RSH | BPF_X */ { "ALU_RSH_X: 2 >> 1 = 1", @@ -4267,6 +4447,106 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + "ALU64_RSH_X: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x56789abc } } + }, + { + "ALU64_RSH_X: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x00081234 } } + }, + { + "ALU64_RSH_X: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x08123456 } } + }, + { + "ALU64_RSH_X: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_RSH_X: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, + { + "ALU64_RSH_X: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_RSH_X: Zero shift, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_RSH_X: Zero shift, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_RSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, /* BPF_ALU | BPF_RSH | BPF_K */ { "ALU_RSH_K: 2 >> 1 = 1", @@ -4334,6 +4614,86 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { + "ALU64_RSH_K: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 12), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x56789abc } } + }, + { + "ALU64_RSH_K: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 12), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x00081234 } } + }, + { + "ALU64_RSH_K: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 36), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x08123456 } } + }, + { + "ALU64_RSH_K: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 36), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_RSH_K: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, + { + "ALU64_RSH_K: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } } + }, + { + "ALU64_RSH_K: Zero shift", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, /* BPF_ALU | BPF_ARSH | BPF_X */ { "ALU32_ARSH_X: -1234 >> 7 = -10", @@ -4348,7 +4708,7 @@ static struct bpf_test tests[] = { { { 0, -10 } } }, { - "ALU_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + "ALU64_ARSH_X: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", .u.insns_int = { BPF_LD_IMM64(R0, 0xff00ff0000000000LL), BPF_ALU32_IMM(BPF_MOV, R1, 40), @@ -4359,6 +4719,106 @@ static struct bpf_test tests[] = { { }, { { 0, 0xffff00ff } }, }, + { + "ALU64_ARSH_X: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x56789abc } } + }, + { + "ALU64_ARSH_X: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 12), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfff81234 } } + }, + { + "ALU64_ARSH_X: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xf8123456 } } + }, + { + "ALU64_ARSH_X: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 36), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "ALU64_ARSH_X: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, + { + "ALU64_ARSH_X: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 32), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "ALU64_ARSH_X: Zero shift, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, + { + "ALU64_ARSH_X: Zero shift, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU32_IMM(BPF_MOV, R1, 0), + BPF_ALU64_REG(BPF_ARSH, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, /* BPF_ALU | BPF_ARSH | BPF_K */ { "ALU32_ARSH_K: -1234 >> 7 = -10", @@ -4383,7 +4843,7 @@ static struct bpf_test tests[] = { { { 0, -1234 } } }, { - "ALU_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", + "ALU64_ARSH_K: 0xff00ff0000000000 >> 40 = 0xffffffffffff00ff", .u.insns_int = { BPF_LD_IMM64(R0, 0xff00ff0000000000LL), BPF_ALU64_IMM(BPF_ARSH, R0, 40), @@ -4393,6 +4853,86 @@ static struct bpf_test tests[] = { { }, { { 0, 0xffff00ff } }, }, + { + "ALU64_ARSH_K: Shift < 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_RSH, R0, 12), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x56789abc } } + }, + { + "ALU64_ARSH_K: Shift < 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 12), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xfff81234 } } + }, + { + "ALU64_ARSH_K: Shift > 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 36), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xf8123456 } } + }, + { + "ALU64_ARSH_K: Shift > 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0xf123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 36), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "ALU64_ARSH_K: Shift == 32, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x81234567 } } + }, + { + "ALU64_ARSH_K: Shift == 32, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 32), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, -1 } } + }, + { + "ALU64_ARSH_K: Zero shoft", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x8123456789abcdefLL), + BPF_ALU64_IMM(BPF_ARSH, R0, 0), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } } + }, /* BPF_ALU | BPF_NEG */ { "ALU_NEG: -(3) = -3", From faa576253d5fe757f0c573ef7e183b3416c58dae Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:22 +0200 Subject: [PATCH 24/31] bpf, tests: Add more ALU64 BPF_MUL tests This patch adds BPF_MUL tests for 64x32 and 64x64 multiply. Mainly testing 32-bit JITs that implement ALU64 operations with two 32-bit CPU registers per operand. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210809091829.810076-8-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index b95bed03ab1b3..072f9c51bd9bc 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -3051,6 +3051,31 @@ static struct bpf_test tests[] = { { }, { { 0, 2147483647 } }, }, + { + "ALU64_MUL_X: 64x64 multiply, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0fedcba987654321LL), + BPF_LD_IMM64(R1, 0x123456789abcdef0LL), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xe5618cf0 } } + }, + { + "ALU64_MUL_X: 64x64 multiply, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0fedcba987654321LL), + BPF_LD_IMM64(R1, 0x123456789abcdef0LL), + BPF_ALU64_REG(BPF_MUL, R0, R1), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x2236d88f } } + }, /* BPF_ALU | BPF_MUL | BPF_K */ { "ALU_MUL_K: 2 * 3 = 6", @@ -3161,6 +3186,29 @@ static struct bpf_test tests[] = { { }, { { 0, 0x1 } }, }, + { + "ALU64_MUL_K: 64x32 multiply, low word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xe242d208 } } + }, + { + "ALU64_MUL_K: 64x32 multiply, high word", + .u.insns_int = { + BPF_LD_IMM64(R0, 0x0123456789abcdefLL), + BPF_ALU64_IMM(BPF_MUL, R0, 0x12345678), + BPF_ALU64_IMM(BPF_RSH, R0, 32), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0xc28f5c28 } } + }, /* BPF_ALU | BPF_DIV | BPF_X */ { "ALU_DIV_X: 6 / 2 = 3", From 84024a4e86d9b2085f3444190b30d5f88c76e07b Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:23 +0200 Subject: [PATCH 25/31] bpf, tests: Add tests for ALU operations implemented with function calls 32-bit JITs may implement complex ALU64 instructions using function calls. The new tests check aspects related to this, such as register clobbering and register argument re-ordering. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210809091829.810076-9-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 141 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 072f9c51bd9bc..e3c2569630209 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1916,6 +1916,147 @@ static struct bpf_test tests[] = { { }, { { 0, -1 } } }, + { + /* + * Register (non-)clobbering test, in the case where a 32-bit + * JIT implements complex ALU64 operations via function calls. + * If so, the function call must be invisible in the eBPF + * registers. The JIT must then save and restore relevant + * registers during the call. The following tests check that + * the eBPF registers retain their values after such a call. + */ + "INT: Register clobbering, R1 updated", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 123456789), + BPF_ALU32_IMM(BPF_MOV, R2, 2), + BPF_ALU32_IMM(BPF_MOV, R3, 3), + BPF_ALU32_IMM(BPF_MOV, R4, 4), + BPF_ALU32_IMM(BPF_MOV, R5, 5), + BPF_ALU32_IMM(BPF_MOV, R6, 6), + BPF_ALU32_IMM(BPF_MOV, R7, 7), + BPF_ALU32_IMM(BPF_MOV, R8, 8), + BPF_ALU32_IMM(BPF_MOV, R9, 9), + BPF_ALU64_IMM(BPF_DIV, R1, 123456789), + BPF_JMP_IMM(BPF_JNE, R0, 0, 10), + BPF_JMP_IMM(BPF_JNE, R1, 1, 9), + BPF_JMP_IMM(BPF_JNE, R2, 2, 8), + BPF_JMP_IMM(BPF_JNE, R3, 3, 7), + BPF_JMP_IMM(BPF_JNE, R4, 4, 6), + BPF_JMP_IMM(BPF_JNE, R5, 5, 5), + BPF_JMP_IMM(BPF_JNE, R6, 6, 4), + BPF_JMP_IMM(BPF_JNE, R7, 7, 3), + BPF_JMP_IMM(BPF_JNE, R8, 8, 2), + BPF_JMP_IMM(BPF_JNE, R9, 9, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + "INT: Register clobbering, R2 updated", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_ALU32_IMM(BPF_MOV, R1, 1), + BPF_ALU32_IMM(BPF_MOV, R2, 2 * 123456789), + BPF_ALU32_IMM(BPF_MOV, R3, 3), + BPF_ALU32_IMM(BPF_MOV, R4, 4), + BPF_ALU32_IMM(BPF_MOV, R5, 5), + BPF_ALU32_IMM(BPF_MOV, R6, 6), + BPF_ALU32_IMM(BPF_MOV, R7, 7), + BPF_ALU32_IMM(BPF_MOV, R8, 8), + BPF_ALU32_IMM(BPF_MOV, R9, 9), + BPF_ALU64_IMM(BPF_DIV, R2, 123456789), + BPF_JMP_IMM(BPF_JNE, R0, 0, 10), + BPF_JMP_IMM(BPF_JNE, R1, 1, 9), + BPF_JMP_IMM(BPF_JNE, R2, 2, 8), + BPF_JMP_IMM(BPF_JNE, R3, 3, 7), + BPF_JMP_IMM(BPF_JNE, R4, 4, 6), + BPF_JMP_IMM(BPF_JNE, R5, 5, 5), + BPF_JMP_IMM(BPF_JNE, R6, 6, 4), + BPF_JMP_IMM(BPF_JNE, R7, 7, 3), + BPF_JMP_IMM(BPF_JNE, R8, 8, 2), + BPF_JMP_IMM(BPF_JNE, R9, 9, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, + { + /* + * Test 32-bit JITs that implement complex ALU64 operations as + * function calls R0 = f(R1, R2), and must re-arrange operands. + */ +#define NUMER 0xfedcba9876543210ULL +#define DENOM 0x0123456789abcdefULL + "ALU64_DIV X: Operand register permutations", + .u.insns_int = { + /* R0 / R2 */ + BPF_LD_IMM64(R0, NUMER), + BPF_LD_IMM64(R2, DENOM), + BPF_ALU64_REG(BPF_DIV, R0, R2), + BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R1 / R0 */ + BPF_LD_IMM64(R1, NUMER), + BPF_LD_IMM64(R0, DENOM), + BPF_ALU64_REG(BPF_DIV, R1, R0), + BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R0 / R1 */ + BPF_LD_IMM64(R0, NUMER), + BPF_LD_IMM64(R1, DENOM), + BPF_ALU64_REG(BPF_DIV, R0, R1), + BPF_JMP_IMM(BPF_JEQ, R0, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R2 / R0 */ + BPF_LD_IMM64(R2, NUMER), + BPF_LD_IMM64(R0, DENOM), + BPF_ALU64_REG(BPF_DIV, R2, R0), + BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R2 / R1 */ + BPF_LD_IMM64(R2, NUMER), + BPF_LD_IMM64(R1, DENOM), + BPF_ALU64_REG(BPF_DIV, R2, R1), + BPF_JMP_IMM(BPF_JEQ, R2, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R1 / R2 */ + BPF_LD_IMM64(R1, NUMER), + BPF_LD_IMM64(R2, DENOM), + BPF_ALU64_REG(BPF_DIV, R1, R2), + BPF_JMP_IMM(BPF_JEQ, R1, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* R1 / R1 */ + BPF_LD_IMM64(R1, NUMER), + BPF_ALU64_REG(BPF_DIV, R1, R1), + BPF_JMP_IMM(BPF_JEQ, R1, 1, 1), + BPF_EXIT_INSN(), + /* R2 / R2 */ + BPF_LD_IMM64(R2, DENOM), + BPF_ALU64_REG(BPF_DIV, R2, R2), + BPF_JMP_IMM(BPF_JEQ, R2, 1, 1), + BPF_EXIT_INSN(), + /* R3 / R4 */ + BPF_LD_IMM64(R3, NUMER), + BPF_LD_IMM64(R4, DENOM), + BPF_ALU64_REG(BPF_DIV, R3, R4), + BPF_JMP_IMM(BPF_JEQ, R3, NUMER / DENOM, 1), + BPF_EXIT_INSN(), + /* Successful return */ + BPF_LD_IMM64(R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } }, +#undef NUMER +#undef DENOM + }, { "check: missing ret", .u.insns = { From e5009b4636cb593c06243197fd0742ed2e6ac510 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:24 +0200 Subject: [PATCH 26/31] bpf, tests: Add word-order tests for load/store of double words A double word (64-bit) load/store may be implemented as two successive 32-bit operations, one for each word. Check that the order of those operations is consistent with the machine endianness. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210809091829.810076-10-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index e3c2569630209..402c199cc119d 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5420,6 +5420,42 @@ static struct bpf_test tests[] = { { { 0, 0xffffffff } }, .stack_depth = 40, }, + { + "STX_MEM_DW: Store double word: first word in memory", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0x0123456789abcdefLL), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, +#ifdef __BIG_ENDIAN + { { 0, 0x01234567 } }, +#else + { { 0, 0x89abcdef } }, +#endif + .stack_depth = 40, + }, + { + "STX_MEM_DW: Store double word: second word in memory", + .u.insns_int = { + BPF_LD_IMM64(R0, 0), + BPF_LD_IMM64(R1, 0x0123456789abcdefLL), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -36), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, +#ifdef __BIG_ENDIAN + { { 0, 0x89abcdef } }, +#else + { { 0, 0x01234567 } }, +#endif + .stack_depth = 40, + }, /* BPF_STX | BPF_ATOMIC | BPF_W/DW */ { "STX_XADD_W: Test: 0x12 + 0x10 = 0x22", From 66e5eb8474554bc021e8a221c336bdaef13f7a69 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:25 +0200 Subject: [PATCH 27/31] bpf, tests: Add branch conversion JIT test Some JITs may need to convert a conditional jump instruction to to short PC-relative branch and a long unconditional jump, if the PC-relative offset exceeds offset field width in the CPU instruction. This test triggers such branch conversion on the 32-bit MIPS JIT. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210809091829.810076-11-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 402c199cc119d..896d37f4f4b3b 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -461,6 +461,41 @@ static int bpf_fill_stxdw(struct bpf_test *self) return __bpf_fill_stxdw(self, BPF_DW); } +static int bpf_fill_long_jmp(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct bpf_insn *insn; + int i; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[0] = BPF_ALU64_IMM(BPF_MOV, R0, 1); + insn[1] = BPF_JMP_IMM(BPF_JEQ, R0, 1, len - 2 - 1); + + /* + * Fill with a complex 64-bit operation that expands to a lot of + * instructions on 32-bit JITs. The large jump offset can then + * overflow the conditional branch field size, triggering a branch + * conversion mechanism in some JITs. + * + * Note: BPF_MAXINSNS of ALU64 MUL is enough to trigger such branch + * conversion on the 32-bit MIPS JIT. For other JITs, the instruction + * count and/or operation may need to be modified to trigger the + * branch conversion. + */ + for (i = 2; i < len - 1; i++) + insn[i] = BPF_ALU64_IMM(BPF_MUL, R0, (i << 16) + i); + + insn[len - 1] = BPF_EXIT_INSN(); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + static struct bpf_test tests[] = { { "TAX", @@ -6895,6 +6930,14 @@ static struct bpf_test tests[] = { { }, { { 0, 1 } }, }, + { /* Mainly checking JIT here. */ + "BPF_MAXINSNS: Very long conditional jump", + { }, + INTERNAL | FLAG_NO_DATA, + { }, + { { 0, 1 } }, + .fill_helper = bpf_fill_long_jmp, + }, { "JMP_JA: Jump, gap, jump, ...", { }, From 53e33f9928cd61272e8e7902a876cb8cdf3f5c07 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:26 +0200 Subject: [PATCH 28/31] bpf, tests: Add test for 32-bit context pointer argument passing On a 32-bit architecture, the context pointer will occupy the low half of R1, and the other half will be zero. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210809091829.810076-12-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 896d37f4f4b3b..fcfaf45ae58a6 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -2092,6 +2092,22 @@ static struct bpf_test tests[] = { #undef NUMER #undef DENOM }, +#ifdef CONFIG_32BIT + { + "INT: 32-bit context pointer word order and zero-extension", + .u.insns_int = { + BPF_ALU32_IMM(BPF_MOV, R0, 0), + BPF_JMP32_IMM(BPF_JEQ, R1, 0, 3), + BPF_ALU64_IMM(BPF_RSH, R1, 32), + BPF_JMP32_IMM(BPF_JNE, R1, 0, 1), + BPF_ALU32_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, +#endif { "check: missing ret", .u.insns = { From e4517b3637c648b215307e3343900ec675fde607 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:27 +0200 Subject: [PATCH 29/31] bpf, tests: Add tests for atomic operations Tests for each atomic arithmetic operation and BPF_XCHG, derived from old BPF_XADD tests. The tests include BPF_W/DW and BPF_FETCH variants. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210809091829.810076-13-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 252 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 166 insertions(+), 86 deletions(-) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index fcfaf45ae58a6..855f64093ca79 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5508,49 +5508,6 @@ static struct bpf_test tests[] = { .stack_depth = 40, }, /* BPF_STX | BPF_ATOMIC | BPF_W/DW */ - { - "STX_XADD_W: Test: 0x12 + 0x10 = 0x22", - .u.insns_int = { - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_W, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40), - BPF_LDX_MEM(BPF_W, R0, R10, -40), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0x22 } }, - .stack_depth = 40, - }, - { - "STX_XADD_W: Test side-effects, r10: 0x12 + 0x10 = 0x22", - .u.insns_int = { - BPF_ALU64_REG(BPF_MOV, R1, R10), - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_W, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40), - BPF_ALU64_REG(BPF_MOV, R0, R10), - BPF_ALU64_REG(BPF_SUB, R0, R1), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 40, - }, - { - "STX_XADD_W: Test side-effects, r0: 0x12 + 0x10 = 0x22", - .u.insns_int = { - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_W, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0x12 } }, - .stack_depth = 40, - }, { "STX_XADD_W: X + 1 + 1 + 1 + ...", { }, @@ -5559,49 +5516,6 @@ static struct bpf_test tests[] = { { { 0, 4134 } }, .fill_helper = bpf_fill_stxw, }, - { - "STX_XADD_DW: Test: 0x12 + 0x10 = 0x22", - .u.insns_int = { - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_DW, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40), - BPF_LDX_MEM(BPF_DW, R0, R10, -40), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0x22 } }, - .stack_depth = 40, - }, - { - "STX_XADD_DW: Test side-effects, r10: 0x12 + 0x10 = 0x22", - .u.insns_int = { - BPF_ALU64_REG(BPF_MOV, R1, R10), - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_DW, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40), - BPF_ALU64_REG(BPF_MOV, R0, R10), - BPF_ALU64_REG(BPF_SUB, R0, R1), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0 } }, - .stack_depth = 40, - }, - { - "STX_XADD_DW: Test side-effects, r0: 0x12 + 0x10 = 0x22", - .u.insns_int = { - BPF_ALU32_IMM(BPF_MOV, R0, 0x12), - BPF_ST_MEM(BPF_DW, R10, -40, 0x10), - BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40), - BPF_EXIT_INSN(), - }, - INTERNAL, - { }, - { { 0, 0x12 } }, - .stack_depth = 40, - }, { "STX_XADD_DW: X + 1 + 1 + 1 + ...", { }, @@ -5610,6 +5524,172 @@ static struct bpf_test tests[] = { { { 0, 4134 } }, .fill_helper = bpf_fill_stxdw, }, + /* + * Exhaustive tests of atomic operation variants. + * Individual tests are expanded from template macros for all + * combinations of ALU operation, word size and fetching. + */ +#define BPF_ATOMIC_OP_TEST1(width, op, logic, old, update, result) \ +{ \ + "BPF_ATOMIC | " #width ", " #op ": Test: " \ + #old " " #logic " " #update " = " #result, \ + .u.insns_int = { \ + BPF_ALU32_IMM(BPF_MOV, R5, update), \ + BPF_ST_MEM(width, R10, -40, old), \ + BPF_ATOMIC_OP(width, op, R10, R5, -40), \ + BPF_LDX_MEM(width, R0, R10, -40), \ + BPF_EXIT_INSN(), \ + }, \ + INTERNAL, \ + { }, \ + { { 0, result } }, \ + .stack_depth = 40, \ +} +#define BPF_ATOMIC_OP_TEST2(width, op, logic, old, update, result) \ +{ \ + "BPF_ATOMIC | " #width ", " #op ": Test side effects, r10: " \ + #old " " #logic " " #update " = " #result, \ + .u.insns_int = { \ + BPF_ALU64_REG(BPF_MOV, R1, R10), \ + BPF_ALU32_IMM(BPF_MOV, R0, update), \ + BPF_ST_MEM(BPF_W, R10, -40, old), \ + BPF_ATOMIC_OP(width, op, R10, R0, -40), \ + BPF_ALU64_REG(BPF_MOV, R0, R10), \ + BPF_ALU64_REG(BPF_SUB, R0, R1), \ + BPF_EXIT_INSN(), \ + }, \ + INTERNAL, \ + { }, \ + { { 0, 0 } }, \ + .stack_depth = 40, \ +} +#define BPF_ATOMIC_OP_TEST3(width, op, logic, old, update, result) \ +{ \ + "BPF_ATOMIC | " #width ", " #op ": Test side effects, r0: " \ + #old " " #logic " " #update " = " #result, \ + .u.insns_int = { \ + BPF_ALU64_REG(BPF_MOV, R0, R10), \ + BPF_ALU32_IMM(BPF_MOV, R1, update), \ + BPF_ST_MEM(width, R10, -40, old), \ + BPF_ATOMIC_OP(width, op, R10, R1, -40), \ + BPF_ALU64_REG(BPF_SUB, R0, R10), \ + BPF_EXIT_INSN(), \ + }, \ + INTERNAL, \ + { }, \ + { { 0, 0 } }, \ + .stack_depth = 40, \ +} +#define BPF_ATOMIC_OP_TEST4(width, op, logic, old, update, result) \ +{ \ + "BPF_ATOMIC | " #width ", " #op ": Test fetch: " \ + #old " " #logic " " #update " = " #result, \ + .u.insns_int = { \ + BPF_ALU32_IMM(BPF_MOV, R3, update), \ + BPF_ST_MEM(width, R10, -40, old), \ + BPF_ATOMIC_OP(width, op, R10, R3, -40), \ + BPF_ALU64_REG(BPF_MOV, R0, R3), \ + BPF_EXIT_INSN(), \ + }, \ + INTERNAL, \ + { }, \ + { { 0, (op) & BPF_FETCH ? old : update } }, \ + .stack_depth = 40, \ +} + /* BPF_ATOMIC | BPF_W: BPF_ADD */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD, +, 0x12, 0xab, 0xbd), + /* BPF_ATOMIC | BPF_W: BPF_ADD | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + /* BPF_ATOMIC | BPF_DW: BPF_ADD */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD, +, 0x12, 0xab, 0xbd), + /* BPF_ATOMIC | BPF_DW: BPF_ADD | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_ADD | BPF_FETCH, +, 0x12, 0xab, 0xbd), + /* BPF_ATOMIC | BPF_W: BPF_AND */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND, &, 0x12, 0xab, 0x02), + /* BPF_ATOMIC | BPF_W: BPF_AND | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + /* BPF_ATOMIC | BPF_DW: BPF_AND */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND, &, 0x12, 0xab, 0x02), + /* BPF_ATOMIC | BPF_DW: BPF_AND | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_AND | BPF_FETCH, &, 0x12, 0xab, 0x02), + /* BPF_ATOMIC | BPF_W: BPF_OR */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR, |, 0x12, 0xab, 0xbb), + /* BPF_ATOMIC | BPF_W: BPF_OR | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + /* BPF_ATOMIC | BPF_DW: BPF_OR */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR, |, 0x12, 0xab, 0xbb), + /* BPF_ATOMIC | BPF_DW: BPF_OR | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_OR | BPF_FETCH, |, 0x12, 0xab, 0xbb), + /* BPF_ATOMIC | BPF_W: BPF_XOR */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR, ^, 0x12, 0xab, 0xb9), + /* BPF_ATOMIC | BPF_W: BPF_XOR | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + /* BPF_ATOMIC | BPF_DW: BPF_XOR */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR, ^, 0x12, 0xab, 0xb9), + /* BPF_ATOMIC | BPF_DW: BPF_XOR | BPF_FETCH */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XOR | BPF_FETCH, ^, 0x12, 0xab, 0xb9), + /* BPF_ATOMIC | BPF_W: BPF_XCHG */ + BPF_ATOMIC_OP_TEST1(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST2(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST3(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST4(BPF_W, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + /* BPF_ATOMIC | BPF_DW: BPF_XCHG */ + BPF_ATOMIC_OP_TEST1(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST2(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST3(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), + BPF_ATOMIC_OP_TEST4(BPF_DW, BPF_XCHG, xchg, 0x12, 0xab, 0xab), +#undef BPF_ATOMIC_OP_TEST1 +#undef BPF_ATOMIC_OP_TEST2 +#undef BPF_ATOMIC_OP_TEST3 +#undef BPF_ATOMIC_OP_TEST4 /* BPF_JMP32 | BPF_JEQ | BPF_K */ { "JMP32_JEQ_K: Small immediate", From 6a3b24ca489ea01d5b4d5a2539e75dfb5e1e18be Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:28 +0200 Subject: [PATCH 30/31] bpf, tests: Add tests for BPF_CMPXCHG Tests for BPF_CMPXCHG with both word and double word operands. As with the tests for other atomic operations, these tests only check the result of the arithmetic operation. The atomicity of the operations is not tested. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210809091829.810076-14-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 166 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 855f64093ca79..d05fe7b4a9cb1 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5690,6 +5690,172 @@ static struct bpf_test tests[] = { #undef BPF_ATOMIC_OP_TEST2 #undef BPF_ATOMIC_OP_TEST3 #undef BPF_ATOMIC_OP_TEST4 + /* BPF_ATOMIC | BPF_W, BPF_CMPXCHG */ + { + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful return", + .u.insns_int = { + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x01234567 } }, + .stack_depth = 40, + }, + { + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test successful store", + .u.insns_int = { + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } }, + .stack_depth = 40, + }, + { + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure return", + .u.insns_int = { + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x01234567 } }, + .stack_depth = 40, + }, + { + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test failure store", + .u.insns_int = { + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x76543210), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_LDX_MEM(BPF_W, R0, R10, -40), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x01234567 } }, + .stack_depth = 40, + }, + { + "BPF_ATOMIC | BPF_W, BPF_CMPXCHG: Test side effects", + .u.insns_int = { + BPF_ST_MEM(BPF_W, R10, -40, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R0, 0x01234567), + BPF_ALU32_IMM(BPF_MOV, R3, 0x89abcdef), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, R10, R3, -40), + BPF_ALU32_REG(BPF_MOV, R0, R3), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0x89abcdef } }, + .stack_depth = 40, + }, + /* BPF_ATOMIC | BPF_DW, BPF_CMPXCHG */ + { + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful return", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), + BPF_JMP_REG(BPF_JNE, R0, R1, 1), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + .stack_depth = 40, + }, + { + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test successful store", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_STX_MEM(BPF_DW, R10, R0, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_REG(BPF_SUB, R0, R2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + .stack_depth = 40, + }, + { + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure return", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_ALU64_IMM(BPF_ADD, R0, 1), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), + BPF_JMP_REG(BPF_JNE, R0, R1, 1), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + .stack_depth = 40, + }, + { + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test failure store", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_ALU64_IMM(BPF_ADD, R0, 1), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), + BPF_LDX_MEM(BPF_DW, R0, R10, -40), + BPF_JMP_REG(BPF_JNE, R0, R1, 1), + BPF_ALU64_REG(BPF_SUB, R0, R1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + .stack_depth = 40, + }, + { + "BPF_ATOMIC | BPF_DW, BPF_CMPXCHG: Test side effects", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x0123456789abcdefULL), + BPF_LD_IMM64(R2, 0xfecdba9876543210ULL), + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_STX_MEM(BPF_DW, R10, R1, -40), + BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, R10, R2, -40), + BPF_LD_IMM64(R0, 0xfecdba9876543210ULL), + BPF_JMP_REG(BPF_JNE, R0, R2, 1), + BPF_ALU64_REG(BPF_SUB, R0, R2), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 0 } }, + .stack_depth = 40, + }, /* BPF_JMP32 | BPF_JEQ | BPF_K */ { "JMP32_JEQ_K: Small immediate", From 874be05f525e87768daf0f47b494dc83b9537243 Mon Sep 17 00:00:00 2001 From: Johan Almbladh Date: Mon, 9 Aug 2021 11:18:29 +0200 Subject: [PATCH 31/31] bpf, tests: Add tail call test suite While BPF_CALL instructions were tested implicitly by the cBPF-to-eBPF translation, there has not been any tests for BPF_TAIL_CALL instructions. The new test suite includes tests for tail call chaining, tail call count tracking and error paths. It is mainly intended for JIT development and testing. Signed-off-by: Johan Almbladh Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20210809091829.810076-15-johan.almbladh@anyfinetworks.com --- lib/test_bpf.c | 248 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 248 insertions(+) diff --git a/lib/test_bpf.c b/lib/test_bpf.c index d05fe7b4a9cb1..44d8197bbffb2 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -8989,8 +8989,248 @@ static __init int test_bpf(void) return err_cnt ? -EINVAL : 0; } +struct tail_call_test { + const char *descr; + struct bpf_insn insns[MAX_INSNS]; + int result; + int stack_depth; +}; + +/* + * Magic marker used in test snippets for tail calls below. + * BPF_LD/MOV to R2 and R2 with this immediate value is replaced + * with the proper values by the test runner. + */ +#define TAIL_CALL_MARKER 0x7a11ca11 + +/* Special offset to indicate a NULL call target */ +#define TAIL_CALL_NULL 0x7fff + +/* Special offset to indicate an out-of-range index */ +#define TAIL_CALL_INVALID 0x7ffe + +#define TAIL_CALL(offset) \ + BPF_LD_IMM64(R2, TAIL_CALL_MARKER), \ + BPF_RAW_INSN(BPF_ALU | BPF_MOV | BPF_K, R3, 0, \ + offset, TAIL_CALL_MARKER), \ + BPF_JMP_IMM(BPF_TAIL_CALL, 0, 0, 0) + +/* + * Tail call tests. Each test case may call any other test in the table, + * including itself, specified as a relative index offset from the calling + * test. The index TAIL_CALL_NULL can be used to specify a NULL target + * function to test the JIT error path. Similarly, the index TAIL_CALL_INVALID + * results in a target index that is out of range. + */ +static struct tail_call_test tail_call_tests[] = { + { + "Tail call leaf", + .insns = { + BPF_ALU64_REG(BPF_MOV, R0, R1), + BPF_ALU64_IMM(BPF_ADD, R0, 1), + BPF_EXIT_INSN(), + }, + .result = 1, + }, + { + "Tail call 2", + .insns = { + BPF_ALU64_IMM(BPF_ADD, R1, 2), + TAIL_CALL(-1), + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_EXIT_INSN(), + }, + .result = 3, + }, + { + "Tail call 3", + .insns = { + BPF_ALU64_IMM(BPF_ADD, R1, 3), + TAIL_CALL(-1), + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_EXIT_INSN(), + }, + .result = 6, + }, + { + "Tail call 4", + .insns = { + BPF_ALU64_IMM(BPF_ADD, R1, 4), + TAIL_CALL(-1), + BPF_ALU64_IMM(BPF_MOV, R0, -1), + BPF_EXIT_INSN(), + }, + .result = 10, + }, + { + "Tail call error path, max count reached", + .insns = { + BPF_ALU64_IMM(BPF_ADD, R1, 1), + BPF_ALU64_REG(BPF_MOV, R0, R1), + TAIL_CALL(0), + BPF_EXIT_INSN(), + }, + .result = MAX_TAIL_CALL_CNT + 1, + }, + { + "Tail call error path, NULL target", + .insns = { + BPF_ALU64_IMM(BPF_MOV, R0, -1), + TAIL_CALL(TAIL_CALL_NULL), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + .result = 1, + }, + { + "Tail call error path, index out of range", + .insns = { + BPF_ALU64_IMM(BPF_MOV, R0, -1), + TAIL_CALL(TAIL_CALL_INVALID), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + .result = 1, + }, +}; + +static void __init destroy_tail_call_tests(struct bpf_array *progs) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) + if (progs->ptrs[i]) + bpf_prog_free(progs->ptrs[i]); + kfree(progs); +} + +static __init int prepare_tail_call_tests(struct bpf_array **pprogs) +{ + int ntests = ARRAY_SIZE(tail_call_tests); + struct bpf_array *progs; + int which, err; + + /* Allocate the table of programs to be used for tall calls */ + progs = kzalloc(sizeof(*progs) + (ntests + 1) * sizeof(progs->ptrs[0]), + GFP_KERNEL); + if (!progs) + goto out_nomem; + + /* Create all eBPF programs and populate the table */ + for (which = 0; which < ntests; which++) { + struct tail_call_test *test = &tail_call_tests[which]; + struct bpf_prog *fp; + int len, i; + + /* Compute the number of program instructions */ + for (len = 0; len < MAX_INSNS; len++) { + struct bpf_insn *insn = &test->insns[len]; + + if (len < MAX_INSNS - 1 && + insn->code == (BPF_LD | BPF_DW | BPF_IMM)) + len++; + if (insn->code == 0) + break; + } + + /* Allocate and initialize the program */ + fp = bpf_prog_alloc(bpf_prog_size(len), 0); + if (!fp) + goto out_nomem; + + fp->len = len; + fp->type = BPF_PROG_TYPE_SOCKET_FILTER; + fp->aux->stack_depth = test->stack_depth; + memcpy(fp->insnsi, test->insns, len * sizeof(struct bpf_insn)); + + /* Relocate runtime tail call offsets and addresses */ + for (i = 0; i < len; i++) { + struct bpf_insn *insn = &fp->insnsi[i]; + + if (insn->imm != TAIL_CALL_MARKER) + continue; + + switch (insn->code) { + case BPF_LD | BPF_DW | BPF_IMM: + insn[0].imm = (u32)(long)progs; + insn[1].imm = ((u64)(long)progs) >> 32; + break; + + case BPF_ALU | BPF_MOV | BPF_K: + if (insn->off == TAIL_CALL_NULL) + insn->imm = ntests; + else if (insn->off == TAIL_CALL_INVALID) + insn->imm = ntests + 1; + else + insn->imm = which + insn->off; + insn->off = 0; + } + } + + fp = bpf_prog_select_runtime(fp, &err); + if (err) + goto out_err; + + progs->ptrs[which] = fp; + } + + /* The last entry contains a NULL program pointer */ + progs->map.max_entries = ntests + 1; + *pprogs = progs; + return 0; + +out_nomem: + err = -ENOMEM; + +out_err: + if (progs) + destroy_tail_call_tests(progs); + return err; +} + +static __init int test_tail_calls(struct bpf_array *progs) +{ + int i, err_cnt = 0, pass_cnt = 0; + int jit_cnt = 0, run_cnt = 0; + + for (i = 0; i < ARRAY_SIZE(tail_call_tests); i++) { + struct tail_call_test *test = &tail_call_tests[i]; + struct bpf_prog *fp = progs->ptrs[i]; + u64 duration; + int ret; + + cond_resched(); + + pr_info("#%d %s ", i, test->descr); + if (!fp) { + err_cnt++; + continue; + } + pr_cont("jited:%u ", fp->jited); + + run_cnt++; + if (fp->jited) + jit_cnt++; + + ret = __run_one(fp, NULL, MAX_TESTRUNS, &duration); + if (ret == test->result) { + pr_cont("%lld PASS", duration); + pass_cnt++; + } else { + pr_cont("ret %d != %d FAIL", ret, test->result); + err_cnt++; + } + } + + pr_info("%s: Summary: %d PASSED, %d FAILED, [%d/%d JIT'ed]\n", + __func__, pass_cnt, err_cnt, jit_cnt, run_cnt); + + return err_cnt ? -EINVAL : 0; +} + static int __init test_bpf_init(void) { + struct bpf_array *progs = NULL; int ret; ret = prepare_bpf_tests(); @@ -9002,6 +9242,14 @@ static int __init test_bpf_init(void) if (ret) return ret; + ret = prepare_tail_call_tests(&progs); + if (ret) + return ret; + ret = test_tail_calls(progs); + destroy_tail_call_tests(progs); + if (ret) + return ret; + return test_skb_segment(); }