From 6715df8d5d24655b9fd368e904028112b54c7de1 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sun, 19 Feb 2023 22:04:26 +0200 Subject: [PATCH 1/8] bpf: Allow reads from uninit stack This commits updates the following functions to allow reads from uninitialized stack locations when env->allow_uninit_stack option is enabled: - check_stack_read_fixed_off() - check_stack_range_initialized(), called from: - check_stack_read_var_off() - check_helper_mem_access() Such change allows to relax logic in stacksafe() to treat STACK_MISC and STACK_INVALID in a same way and make the following stack slot configurations equivalent: | Cached state | Current state | | stack slot | stack slot | |------------------+------------------| | STACK_INVALID or | STACK_INVALID or | | STACK_MISC | STACK_SPILL or | | | STACK_MISC or | | | STACK_ZERO or | | | STACK_DYNPTR | This leads to significant verification speed gains (see below). The idea was suggested by Andrii Nakryiko [1] and initial patch was created by Alexei Starovoitov [2]. Currently the env->allow_uninit_stack is allowed for programs loaded by users with CAP_PERFMON or CAP_SYS_ADMIN capabilities. A number of test cases from verifier/*.c were expecting uninitialized stack access to be an error. These test cases were updated to execute in unprivileged mode (thus preserving the tests). The test progs/test_global_func10.c expected "invalid indirect read from stack" error message because of the access to uninitialized memory region. This error is no longer possible in privileged mode. The test is updated to provoke an error "invalid indirect access to stack" because of access to invalid stack address (such error is not verified by progs/test_global_func*.c series of tests). The following tests had to be removed because these can't be made unprivileged: - verifier/sock.c: - "sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value" BPF_PROG_TYPE_SCHED_CLS programs are not executed in unprivileged mode. - verifier/var_off.c: - "indirect variable-offset stack access, max_off+size > max_initialized" - "indirect variable-offset stack access, uninitialized" These tests verify that access to uninitialized stack values is detected when stack offset is not a constant. However, variable stack access is prohibited in unprivileged mode, thus these tests are no longer valid. * * * Here is veristat log comparing this patch with current master on a set of selftest binaries listed in tools/testing/selftests/bpf/veristat.cfg and cilium BPF binaries (see [3]): $ ./veristat -e file,prog,states -C -f 'states_pct<-30' master.log current.log File Program States (A) States (B) States (DIFF) -------------------------- -------------------------- ---------- ---------- ---------------- bpf_host.o tail_handle_ipv6_from_host 349 244 -105 (-30.09%) bpf_host.o tail_handle_nat_fwd_ipv4 1320 895 -425 (-32.20%) bpf_lxc.o tail_handle_nat_fwd_ipv4 1320 895 -425 (-32.20%) bpf_sock.o cil_sock4_connect 70 48 -22 (-31.43%) bpf_sock.o cil_sock4_sendmsg 68 46 -22 (-32.35%) bpf_xdp.o tail_handle_nat_fwd_ipv4 1554 803 -751 (-48.33%) bpf_xdp.o tail_lb_ipv4 6457 2473 -3984 (-61.70%) bpf_xdp.o tail_lb_ipv6 7249 3908 -3341 (-46.09%) pyperf600_bpf_loop.bpf.o on_event 287 145 -142 (-49.48%) strobemeta.bpf.o on_event 15915 4772 -11143 (-70.02%) strobemeta_nounroll2.bpf.o on_event 17087 3820 -13267 (-77.64%) xdp_synproxy_kern.bpf.o syncookie_tc 21271 6635 -14636 (-68.81%) xdp_synproxy_kern.bpf.o syncookie_xdp 23122 6024 -17098 (-73.95%) -------------------------- -------------------------- ---------- ---------- ---------------- Note: I limited selection by states_pct<-30%. Inspection of differences in pyperf600_bpf_loop behavior shows that the following patch for the test removes almost all differences: - a/tools/testing/selftests/bpf/progs/pyperf.h + b/tools/testing/selftests/bpf/progs/pyperf.h @ -266,8 +266,8 @ int __on_event(struct bpf_raw_tracepoint_args *ctx) } if (event->pthread_match || !pidData->use_tls) { - void* frame_ptr; - FrameData frame; + void* frame_ptr = 0; + FrameData frame = {}; Symbol sym = {}; int cur_cpu = bpf_get_smp_processor_id(); W/o this patch the difference comes from the following pattern (for different variables): static bool get_frame_data(... FrameData *frame ...) { ... bpf_probe_read_user(&frame->f_code, ...); if (!frame->f_code) return false; ... bpf_probe_read_user(&frame->co_name, ...); if (frame->co_name) ...; } int __on_event(struct bpf_raw_tracepoint_args *ctx) { FrameData frame; ... get_frame_data(... &frame ...) // indirectly via a bpf_loop & callback ... } SEC("raw_tracepoint/kfree_skb") int on_event(struct bpf_raw_tracepoint_args* ctx) { ... ret |= __on_event(ctx); ret |= __on_event(ctx); ... } With regards to value `frame->co_name` the following is important: - Because of the conditional `if (!frame->f_code)` each call to __on_event() produces two states, one with `frame->co_name` marked as STACK_MISC, another with it as is (and marked STACK_INVALID on a first call). - The call to bpf_probe_read_user() does not mark stack slots corresponding to `&frame->co_name` as REG_LIVE_WRITTEN but it marks these slots as BPF_MISC, this happens because of the following loop in the check_helper_call(): for (i = 0; i < meta.access_size; i++) { err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1, false); if (err) return err; } Note the size of the write, it is a one byte write for each byte touched by a helper. The BPF_B write does not lead to write marks for the target stack slot. - Which means that w/o this patch when second __on_event() call is verified `if (frame->co_name)` will propagate read marks first to a stack slot with STACK_MISC marks and second to a stack slot with STACK_INVALID marks and these states would be considered different. [1] https://lore.kernel.org/bpf/CAEf4BzY3e+ZuC6HUa8dCiUovQRg2SzEk7M-dSkqNZyn=xEmnPA@mail.gmail.com/ [2] https://lore.kernel.org/bpf/CAADnVQKs2i1iuZ5SUGuJtxWVfGYR9kDgYKhq3rNV+kBLQCu7rA@mail.gmail.com/ [3] git@github.com:anakryiko/cilium.git Suggested-by: Andrii Nakryiko Co-developed-by: Alexei Starovoitov Signed-off-by: Eduard Zingerman Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230219200427.606541-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 11 +- .../selftests/bpf/progs/test_global_func10.c | 8 +- tools/testing/selftests/bpf/verifier/calls.c | 13 ++- .../bpf/verifier/helper_access_var_len.c | 104 ++++++++++++------ .../testing/selftests/bpf/verifier/int_ptr.c | 9 +- .../selftests/bpf/verifier/search_pruning.c | 13 ++- tools/testing/selftests/bpf/verifier/sock.c | 27 ----- .../selftests/bpf/verifier/spill_fill.c | 7 +- .../testing/selftests/bpf/verifier/var_off.c | 52 --------- 9 files changed, 108 insertions(+), 136 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 272563a0b7702..d517d13878cfe 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3826,6 +3826,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, continue; if (type == STACK_MISC) continue; + if (type == STACK_INVALID && env->allow_uninit_stack) + continue; verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size); return -EACCES; @@ -3863,6 +3865,8 @@ static int check_stack_read_fixed_off(struct bpf_verifier_env *env, continue; if (type == STACK_ZERO) continue; + if (type == STACK_INVALID && env->allow_uninit_stack) + continue; verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size); return -EACCES; @@ -5754,7 +5758,8 @@ static int check_stack_range_initialized( stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE]; if (*stype == STACK_MISC) goto mark; - if (*stype == STACK_ZERO) { + if ((*stype == STACK_ZERO) || + (*stype == STACK_INVALID && env->allow_uninit_stack)) { if (clobber) { /* helper can write anything into the stack */ *stype = STACK_MISC; @@ -13936,6 +13941,10 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) continue; + if (env->allow_uninit_stack && + old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC) + continue; + /* explored stack has more populated slots than current stack * and these slots were used */ diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c index 98327bdbbfd24..8fba3f3649e22 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func10.c +++ b/tools/testing/selftests/bpf/progs/test_global_func10.c @@ -5,12 +5,12 @@ #include "bpf_misc.h" struct Small { - int x; + long x; }; struct Big { - int x; - int y; + long x; + long y; }; __noinline int foo(const struct Big *big) @@ -22,7 +22,7 @@ __noinline int foo(const struct Big *big) } SEC("cgroup_skb/ingress") -__failure __msg("invalid indirect read from stack") +__failure __msg("invalid indirect access to stack") int global_func10(struct __sk_buff *skb) { const struct Small small = {.x = skb->len }; diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 9d993926bf0ef..289ed202ec66a 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -2221,19 +2221,22 @@ * that fp-8 stack slot was unused in the fall-through * branch and will accept the program incorrectly */ - BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 2, 2), BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), BPF_JMP_IMM(BPF_JA, 0, 0, 0), BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), BPF_LD_MAP_FD(BPF_REG_1, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .fixup_map_hash_48b = { 6 }, - .errstr = "invalid indirect read from stack R2 off -8+0 size 8", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_XDP, + .fixup_map_hash_48b = { 7 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "calls: ctx read at start of subprog", diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c index a6c869a7319cd..9c4885885aba0 100644 --- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c +++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c @@ -29,19 +29,30 @@ { "helper access to variable memory: stack, bitwise AND, zero included", .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), + /* set max stack size */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), + /* set r3 to a random value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + /* use bitwise AND to limit r3 range to [0, 64] */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 64), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_4, 0), + /* Call bpf_ringbuf_output(), it is one of a few helper functions with + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. + * For unpriv this should signal an error, because memory at &fp[-64] is + * not initialized. + */ + BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack R1 off -64+0 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_ringbuf = { 4 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "helper access to variable memory: stack, bitwise AND + JMP, wrong max", @@ -183,20 +194,31 @@ { "helper access to variable memory: stack, JMP, no min check", .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), - BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128), - BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), + /* set max stack size */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), + /* set r3 to a random value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + /* use JMP to limit r3 range to [0, 64] */ + BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 64, 6), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), + BPF_MOV64_IMM(BPF_REG_4, 0), + /* Call bpf_ringbuf_output(), it is one of a few helper functions with + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. + * For unpriv this should signal an error, because memory at &fp[-64] is + * not initialized. + */ + BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack R1 off -64+0 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_ringbuf = { 4 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -64+0 size 64", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "helper access to variable memory: stack, JMP (signed), no min check", @@ -564,29 +586,41 @@ { "helper access to variable memory: 8 bytes leak", .insns = { - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8), - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64), + /* set max stack size */ + BPF_ST_MEM(BPF_DW, BPF_REG_10, -128, 0), + /* set r3 to a random value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40), + /* Note: fp[-32] left uninitialized */ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16), BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), - BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128), - BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128), - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), - BPF_MOV64_IMM(BPF_REG_3, 0), - BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16), + /* Limit r3 range to [1, 64] */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 63), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1), + BPF_MOV64_IMM(BPF_REG_4, 0), + /* Call bpf_ringbuf_output(), it is one of a few helper functions with + * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode. + * For unpriv this should signal an error, because memory region [1, 64] + * at &fp[-64] is not fully initialized. + */ + BPF_EMIT_CALL(BPF_FUNC_ringbuf_output), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "invalid indirect read from stack R1 off -64+32 size 64", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .fixup_map_ringbuf = { 3 }, + .errstr_unpriv = "invalid indirect read from stack R2 off -64+32 size 64", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "helper access to variable memory: 8 bytes no leak (init memory)", diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c index 070893fb29007..02d9e004260b3 100644 --- a/tools/testing/selftests/bpf/verifier/int_ptr.c +++ b/tools/testing/selftests/bpf/verifier/int_ptr.c @@ -54,12 +54,13 @@ /* bpf_strtoul() */ BPF_EMIT_CALL(BPF_FUNC_strtoul), - BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .result = REJECT, - .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL, - .errstr = "invalid indirect read from stack R4 off -16+4 size 8", + .result_unpriv = REJECT, + .errstr_unpriv = "invalid indirect read from stack R4 off -16+4 size 8", + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "ARG_PTR_TO_LONG misaligned", diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c index d63fd8991b03a..745d6b5842fd4 100644 --- a/tools/testing/selftests/bpf/verifier/search_pruning.c +++ b/tools/testing/selftests/bpf/verifier/search_pruning.c @@ -128,9 +128,10 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_8b = { 3 }, - .errstr = "invalid read from stack off -16+0 size 8", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .errstr_unpriv = "invalid read from stack off -16+0 size 8", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "precision tracking for u32 spill/fill", @@ -258,6 +259,8 @@ BPF_EXIT_INSN(), }, .flags = BPF_F_TEST_STATE_FREQ, - .errstr = "invalid read from stack off -8+1 size 8", - .result = REJECT, + .errstr_unpriv = "invalid read from stack off -8+1 size 8", + .result_unpriv = REJECT, + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index d11d0b28be416..108dd3ee1edda 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -530,33 +530,6 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, -{ - "sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8), - BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), - BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), - BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - BPF_MOV64_IMM(BPF_REG_4, 1), - BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_sk_storage_get), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_sk_storage_map = { 14 }, - .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .result = REJECT, - .errstr = "invalid indirect read from stack", -}, { "bpf_map_lookup_elem(smap, &key)", .insns = { diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c index 9bb302dade237..d1463bf4949af 100644 --- a/tools/testing/selftests/bpf/verifier/spill_fill.c +++ b/tools/testing/selftests/bpf/verifier/spill_fill.c @@ -171,9 +171,10 @@ BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .result = REJECT, - .errstr = "invalid read from stack off -4+0 size 4", - .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result_unpriv = REJECT, + .errstr_unpriv = "invalid read from stack off -4+0 size 4", + /* in privileged mode reads from uninitialized stack locations are permitted */ + .result = ACCEPT, }, { "Spill a u32 const scalar. Refill as u16. Offset to skb->data", diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c index d37f512fad16e..b183e26c03f10 100644 --- a/tools/testing/selftests/bpf/verifier/var_off.c +++ b/tools/testing/selftests/bpf/verifier/var_off.c @@ -212,31 +212,6 @@ .result = REJECT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, -{ - "indirect variable-offset stack access, max_off+size > max_initialized", - .insns = { - /* Fill only the second from top 8 bytes of the stack. */ - BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0), - /* Get an unknown value. */ - BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0), - /* Make it small and 4-byte aligned. */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16), - /* Add it to fp. We now have either fp-12 or fp-16, but we don't know - * which. fp-12 size 8 is partially uninitialized stack. - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10), - /* Dereference it indirectly. */ - BPF_LD_MAP_FD(BPF_REG_1, 0), - BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .fixup_map_hash_8b = { 5 }, - .errstr = "invalid indirect read from stack R2 var_off", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_LWT_IN, -}, { "indirect variable-offset stack access, min_off < min_initialized", .insns = { @@ -289,33 +264,6 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, -{ - "indirect variable-offset stack access, uninitialized", - .insns = { - BPF_MOV64_IMM(BPF_REG_2, 6), - BPF_MOV64_IMM(BPF_REG_3, 28), - /* Fill the top 16 bytes of the stack. */ - BPF_ST_MEM(BPF_W, BPF_REG_10, -16, 0), - BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), - /* Get an unknown value. */ - BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 0), - /* Make it small and 4-byte aligned. */ - BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 4), - BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16), - /* Add it to fp. We now have either fp-12 or fp-16, we don't know - * which, but either way it points to initialized stack. - */ - BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10), - BPF_MOV64_IMM(BPF_REG_5, 8), - /* Dereference it indirectly. */ - BPF_EMIT_CALL(BPF_FUNC_getsockopt), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }, - .errstr = "invalid indirect read from stack R4 var_off", - .result = REJECT, - .prog_type = BPF_PROG_TYPE_SOCK_OPS, -}, { "indirect variable-offset stack access, ok", .insns = { From 6338a94d5ab42a94e96ea36edc5f7df1fe73e68e Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sun, 19 Feb 2023 22:04:27 +0200 Subject: [PATCH 2/8] selftests/bpf: Tests for uninitialized stack reads Three testcases to make sure that stack reads from uninitialized locations are accepted by verifier when executed in privileged mode: - read from a fixed offset; - read from a variable offset; - passing a pointer to stack to a helper converts STACK_INVALID to STACK_MISC. Signed-off-by: Eduard Zingerman Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/r/20230219200427.606541-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/uninit_stack.c | 9 ++ .../selftests/bpf/progs/uninit_stack.c | 87 +++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/uninit_stack.c create mode 100644 tools/testing/selftests/bpf/progs/uninit_stack.c diff --git a/tools/testing/selftests/bpf/prog_tests/uninit_stack.c b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c new file mode 100644 index 0000000000000..e64c71948491f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "uninit_stack.skel.h" + +void test_uninit_stack(void) +{ + RUN_TESTS(uninit_stack); +} diff --git a/tools/testing/selftests/bpf/progs/uninit_stack.c b/tools/testing/selftests/bpf/progs/uninit_stack.c new file mode 100644 index 0000000000000..8a403470e557f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uninit_stack.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" + +/* Read an uninitialized value from stack at a fixed offset */ +SEC("socket") +__naked int read_uninit_stack_fixed_off(void *ctx) +{ + asm volatile (" \ + r0 = 0; \ + /* force stack depth to be 128 */ \ + *(u64*)(r10 - 128) = r1; \ + r1 = *(u8 *)(r10 - 8 ); \ + r0 += r1; \ + r1 = *(u8 *)(r10 - 11); \ + r1 = *(u8 *)(r10 - 13); \ + r1 = *(u8 *)(r10 - 15); \ + r1 = *(u16*)(r10 - 16); \ + r1 = *(u32*)(r10 - 32); \ + r1 = *(u64*)(r10 - 64); \ + /* read from a spill of a wrong size, it is a separate \ + * branch in check_stack_read_fixed_off() \ + */ \ + *(u32*)(r10 - 72) = r1; \ + r1 = *(u64*)(r10 - 72); \ + r0 = 0; \ + exit; \ +" + ::: __clobber_all); +} + +/* Read an uninitialized value from stack at a variable offset */ +SEC("socket") +__naked int read_uninit_stack_var_off(void *ctx) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + /* force stack depth to be 64 */ \ + *(u64*)(r10 - 64) = r0; \ + r0 = -r0; \ + /* give r0 a range [-31, -1] */ \ + if r0 s<= -32 goto exit_%=; \ + if r0 s>= 0 goto exit_%=; \ + /* access stack using r0 */ \ + r1 = r10; \ + r1 += r0; \ + r2 = *(u8*)(r1 + 0); \ +exit_%=: r0 = 0; \ + exit; \ +" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +static __noinline void dummy(void) {} + +/* Pass a pointer to uninitialized stack memory to a helper. + * Passed memory block should be marked as STACK_MISC after helper call. + */ +SEC("socket") +__log_level(7) __msg("fp-104=mmmmmmmm") +__naked int helper_uninit_to_misc(void *ctx) +{ + asm volatile (" \ + /* force stack depth to be 128 */ \ + *(u64*)(r10 - 128) = r1; \ + r1 = r10; \ + r1 += -128; \ + r2 = 32; \ + call %[bpf_trace_printk]; \ + /* Call to dummy() forces print_verifier_state(..., true), \ + * thus showing the stack state, matched by __msg(). \ + */ \ + call %[dummy]; \ + r0 = 0; \ + exit; \ +" + : + : __imm(bpf_trace_printk), + __imm(dummy) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; From d6f7ff9dd387861fa30cbc6375d15b586da17d33 Mon Sep 17 00:00:00 2001 From: Jesus Sanchez-Palencia Date: Wed, 8 Mar 2023 16:48:36 -0800 Subject: [PATCH 3/8] libbpf: Revert poisoning of strlcpy This reverts commit 6d0c4b11e743("libbpf: Poison strlcpy()"). It added the pragma poison directive to libbpf_internal.h to protect against accidental usage of strlcpy but ended up breaking the build for toolchains based on libcs which provide the strlcpy() declaration from string.h (e.g. uClibc-ng). The include order which causes the issue is: string.h, from Iibbpf_common.h:12, from libbpf.h:20, from libbpf_internal.h:26, from strset.c:9: Fixes: 6d0c4b11e743 ("libbpf: Poison strlcpy()") Signed-off-by: Jesus Sanchez-Palencia Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20230309004836.2808610-1-jesussanp@google.com Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/libbpf_internal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index fbaf683353945..e4d05662a96ce 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -20,8 +20,8 @@ /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 -/* prevent accidental re-addition of reallocarray()/strlcpy() */ -#pragma GCC poison reallocarray strlcpy +/* prevent accidental re-addition of reallocarray() */ +#pragma GCC poison reallocarray #include "libbpf.h" #include "btf.h" From 32513d40d908b267508d37994753d9bd1600914b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 10 Mar 2023 12:41:18 -0800 Subject: [PATCH 4/8] selftests/bpf: Fix progs/find_vma_fail1.c build error. The commit 11e456cae91e ("selftests/bpf: Fix compilation errors: Assign a value to a constant") fixed the issue cleanly in bpf-next. This is an alternative fix in bpf tree to avoid merge conflict between bpf and bpf-next. Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/find_vma_fail1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c index b3b326b8e2d1c..6dab9cffda132 100644 --- a/tools/testing/selftests/bpf/progs/find_vma_fail1.c +++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include "vmlinux.h" #include +#define vm_flags vm_start char _license[] SEC("license") = "GPL"; From e8c8361cfdbf450f760e8a2bdbd4222d1947366b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 10 Mar 2023 12:47:51 -0800 Subject: [PATCH 5/8] selftests/bpf: Fix progs/test_deny_namespace.c issues. The following build error can be seen: progs/test_deny_namespace.c:22:19: error: call to undeclared function 'BIT_LL'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] __u64 cap_mask = BIT_LL(CAP_SYS_ADMIN); The struct kernel_cap_struct no longer exists in the kernel as well. Adjust bpf prog to fix both issues. Fixes: f122a08b197d ("capability: just use a 'u64' instead of a 'u32[2]' array") Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/test_deny_namespace.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c index 591104e79812e..e96b901a733c5 100644 --- a/tools/testing/selftests/bpf/progs/test_deny_namespace.c +++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c @@ -5,12 +5,10 @@ #include #include -struct kernel_cap_struct { - __u64 val; -} __attribute__((preserve_access_index)); +typedef struct { unsigned long long val; } kernel_cap_t; struct cred { - struct kernel_cap_struct cap_effective; + kernel_cap_t cap_effective; } __attribute__((preserve_access_index)); char _license[] SEC("license") = "GPL"; @@ -18,8 +16,8 @@ char _license[] SEC("license") = "GPL"; SEC("lsm.s/userns_create") int BPF_PROG(test_userns_create, const struct cred *cred, int ret) { - struct kernel_cap_struct caps = cred->cap_effective; - __u64 cap_mask = BIT_LL(CAP_SYS_ADMIN); + kernel_cap_t caps = cred->cap_effective; + __u64 cap_mask = 1ULL << CAP_SYS_ADMIN; if (ret) return 0; From c7df4813b149362248d6ef7be41a311e27bf75fe Mon Sep 17 00:00:00 2001 From: Kal Conley Date: Wed, 8 Mar 2023 18:40:13 +0100 Subject: [PATCH 6/8] xsk: Add missing overflow check in xdp_umem_reg The number of chunks can overflow u32. Make sure to return -EINVAL on overflow. Also remove a redundant u32 cast assigning umem->npgs. Fixes: bbff2f321a86 ("xsk: new descriptor addressing scheme") Signed-off-by: Kal Conley Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20230308174013.1114745-1-kal.conley@dectris.com --- net/xdp/xdp_umem.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 4681e8e8ad943..02207e852d796 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -150,10 +150,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { - u32 npgs_rem, chunk_size = mr->chunk_size, headroom = mr->headroom; bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; - u64 npgs, addr = mr->addr, size = mr->len; - unsigned int chunks, chunks_rem; + u32 chunk_size = mr->chunk_size, headroom = mr->headroom; + u64 addr = mr->addr, size = mr->len; + u32 chunks_rem, npgs_rem; + u64 chunks, npgs; int err; if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { @@ -188,8 +189,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (npgs > U32_MAX) return -EINVAL; - chunks = (unsigned int)div_u64_rem(size, chunk_size, &chunks_rem); - if (chunks == 0) + chunks = div_u64_rem(size, chunk_size, &chunks_rem); + if (!chunks || chunks > U32_MAX) return -EINVAL; if (!unaligned_chunks && chunks_rem) @@ -202,7 +203,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->headroom = headroom; umem->chunk_size = chunk_size; umem->chunks = chunks; - umem->npgs = (u32)npgs; + umem->npgs = npgs; umem->pgs = NULL; umem->user = NULL; umem->flags = mr->flags; From 10ec8ca8ec1a2f04c4ed90897225231c58c124a7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 20 Mar 2023 15:37:25 +0100 Subject: [PATCH 7/8] bpf: Adjust insufficient default bpf_jit_limit We've seen recent AWS EKS (Kubernetes) user reports like the following: After upgrading EKS nodes from v20230203 to v20230217 on our 1.24 EKS clusters after a few days a number of the nodes have containers stuck in ContainerCreating state or liveness/readiness probes reporting the following error: Readiness probe errored: rpc error: code = Unknown desc = failed to exec in container: failed to start exec "4a11039f730203ffc003b7[...]": OCI runtime exec failed: exec failed: unable to start container process: unable to init seccomp: error loading seccomp filter into kernel: error loading seccomp filter: errno 524: unknown However, we had not been seeing this issue on previous AMIs and it only started to occur on v20230217 (following the upgrade from kernel 5.4 to 5.10) with no other changes to the underlying cluster or workloads. We tried the suggestions from that issue (sysctl net.core.bpf_jit_limit=452534528) which helped to immediately allow containers to be created and probes to execute but after approximately a day the issue returned and the value returned by cat /proc/vmallocinfo | grep bpf_jit | awk '{s+=$2} END {print s}' was steadily increasing. I tested bpf tree to observe bpf_jit_charge_modmem, bpf_jit_uncharge_modmem their sizes passed in as well as bpf_jit_current under tcpdump BPF filter, seccomp BPF and native (e)BPF programs, and the behavior all looks sane and expected, that is nothing "leaking" from an upstream perspective. The bpf_jit_limit knob was originally added in order to avoid a situation where unprivileged applications loading BPF programs (e.g. seccomp BPF policies) consuming all the module memory space via BPF JIT such that loading of kernel modules would be prevented. The default limit was defined back in 2018 and while good enough back then, we are generally seeing far more BPF consumers today. Adjust the limit for the BPF JIT pool from originally 1/4 to now 1/2 of the module memory space to better reflect today's needs and avoid more users running into potentially hard to debug issues. Fixes: fdadd04931c2 ("bpf: fix bpf_jit_limit knob for PAGE_SIZE >= 64K") Reported-by: Stephen Haynes Reported-by: Lefteris Alexakis Signed-off-by: Daniel Borkmann Link: https://github.com/awslabs/amazon-eks-ami/issues/1179 Link: https://github.com/awslabs/amazon-eks-ami/issues/1219 Reviewed-by: Kuniyuki Iwashima Link: https://lore.kernel.org/r/20230320143725.8394-1-daniel@iogearbox.net Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b297e9f60ca10..e2d256c820723 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -972,7 +972,7 @@ static int __init bpf_jit_charge_init(void) { /* Only used as heuristic here to derive limit. */ bpf_jit_limit_max = bpf_jit_alloc_exec_limit(); - bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 2, + bpf_jit_limit = min_t(u64, round_up(bpf_jit_limit_max >> 1, PAGE_SIZE), LONG_MAX); return 0; } From 915efd8a446b74442039d31689d5d863caf82517 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 21 Mar 2023 14:52:31 +0100 Subject: [PATCH 8/8] xdp: bpf_xdp_metadata use EOPNOTSUPP for no driver support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When driver doesn't implement a bpf_xdp_metadata kfunc the fallback implementation returns EOPNOTSUPP, which indicate device driver doesn't implement this kfunc. Currently many drivers also return EOPNOTSUPP when the hint isn't available, which is ambiguous from an API point of view. Instead change drivers to return ENODATA in these cases. There can be natural cases why a driver doesn't provide any hardware info for a specific hint, even on a frame to frame basis (e.g. PTP). Lets keep these cases as separate return codes. When describing the return values, adjust the function kernel-doc layout to get proper rendering for the return values. Fixes: ab46182d0dcb ("net/mlx4_en: Support RX XDP metadata") Fixes: bc8d405b1ba9 ("net/mlx5e: Support RX XDP metadata") Fixes: 306531f0249f ("veth: Support RX XDP metadata") Fixes: 3d76a4d3d4e5 ("bpf: XDP metadata RX kfuncs") Signed-off-by: Jesper Dangaard Brouer Acked-by: Stanislav Fomichev Acked-by: Toke Høiland-Jørgensen Acked-by: Tariq Toukan Link: https://lore.kernel.org/r/167940675120.2718408.8176058626864184420.stgit@firesoul Signed-off-by: Alexei Starovoitov --- Documentation/networking/xdp-rx-metadata.rst | 7 +++++-- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 4 ++-- drivers/net/veth.c | 4 ++-- net/core/xdp.c | 10 ++++++++-- 5 files changed, 19 insertions(+), 10 deletions(-) diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst index aac63fc2d08bd..25ce72af81c21 100644 --- a/Documentation/networking/xdp-rx-metadata.rst +++ b/Documentation/networking/xdp-rx-metadata.rst @@ -23,10 +23,13 @@ metadata is supported, this set will grow: An XDP program can use these kfuncs to read the metadata into stack variables for its own consumption. Or, to pass the metadata on to other consumers, an XDP program can store it into the metadata area carried -ahead of the packet. +ahead of the packet. Not all packets will necessary have the requested +metadata available in which case the driver returns ``-ENODATA``. Not all kfuncs have to be implemented by the device driver; when not -implemented, the default ones that return ``-EOPNOTSUPP`` will be used. +implemented, the default ones that return ``-EOPNOTSUPP`` will be used +to indicate the device driver have not implemented this kfunc. + Within an XDP frame, the metadata layout (accessed via ``xdp_buff``) is as follows:: diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 0869d4fff17b1..4b5e459b6d49f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -674,7 +674,7 @@ int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) struct mlx4_en_xdp_buff *_ctx = (void *)ctx; if (unlikely(_ctx->ring->hwtstamp_rx_filter != HWTSTAMP_FILTER_ALL)) - return -EOPNOTSUPP; + return -ENODATA; *timestamp = mlx4_en_get_hwtstamp(_ctx->mdev, mlx4_en_get_cqe_ts(_ctx->cqe)); @@ -686,7 +686,7 @@ int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) struct mlx4_en_xdp_buff *_ctx = (void *)ctx; if (unlikely(!(_ctx->dev->features & NETIF_F_RXHASH))) - return -EOPNOTSUPP; + return -ENODATA; *hash = be32_to_cpu(_ctx->cqe->immed_rss_invalid); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index bcd6370de440f..c5dae48b7932f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -162,7 +162,7 @@ static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) const struct mlx5e_xdp_buff *_ctx = (void *)ctx; if (unlikely(!mlx5e_rx_hw_stamp(_ctx->rq->tstamp))) - return -EOPNOTSUPP; + return -ENODATA; *timestamp = mlx5e_cqe_ts_to_ns(_ctx->rq->ptp_cyc2time, _ctx->rq->clock, get_cqe_ts(_ctx->cqe)); @@ -174,7 +174,7 @@ static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) const struct mlx5e_xdp_buff *_ctx = (void *)ctx; if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH))) - return -EOPNOTSUPP; + return -ENODATA; *hash = be32_to_cpu(_ctx->cqe->rss_hash_result); return 0; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 1bb54de7124d9..046461ee42ead 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1610,7 +1610,7 @@ static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) struct veth_xdp_buff *_ctx = (void *)ctx; if (!_ctx->skb) - return -EOPNOTSUPP; + return -ENODATA; *timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp; return 0; @@ -1621,7 +1621,7 @@ static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash) struct veth_xdp_buff *_ctx = (void *)ctx; if (!_ctx->skb) - return -EOPNOTSUPP; + return -ENODATA; *hash = skb_get_hash(_ctx->skb); return 0; diff --git a/net/core/xdp.c b/net/core/xdp.c index 8c92fc5533177..247797168579c 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -720,7 +720,10 @@ __diag_ignore_all("-Wmissing-prototypes", * @ctx: XDP context pointer. * @timestamp: Return value pointer. * - * Returns 0 on success or ``-errno`` on error. + * Return: + * * Returns 0 on success or ``-errno`` on error. + * * ``-EOPNOTSUPP`` : means device driver does not implement kfunc + * * ``-ENODATA`` : means no RX-timestamp available for this frame */ __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp) { @@ -732,7 +735,10 @@ __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *tim * @ctx: XDP context pointer. * @hash: Return value pointer. * - * Returns 0 on success or ``-errno`` on error. + * Return: + * * Returns 0 on success or ``-errno`` on error. + * * ``-EOPNOTSUPP`` : means device driver doesn't implement kfunc + * * ``-ENODATA`` : means no RX-hash available for this frame */ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash) {