From bbbc02b7445ebfda13e4847f4f1413c6480a85a9 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 15 Feb 2025 03:03:52 -0800 Subject: [PATCH 01/10] bpf: copy_verifier_state() should copy 'loop_entry' field The bpf_verifier_state.loop_entry state should be copied by copy_verifier_state(). Otherwise, .loop_entry values from unrelated states would poison env->cur_state. Additionally, env->stack should not contain any states with .loop_entry != NULL. The states in env->stack are yet to be verified, while .loop_entry is set for states that reached an equivalent state. This means that env->cur_state->loop_entry should always be NULL after pop_stack(). See the selftest in the next commit for an example of the program that is not safe yet is accepted by verifier w/o this fix. This change has some verification performance impact for selftests: File Program Insns (A) Insns (B) Insns (DIFF) States (A) States (B) States (DIFF) ---------------------------------- ---------------------------- --------- --------- -------------- ---------- ---------- ------------- arena_htab.bpf.o arena_htab_llvm 717 426 -291 (-40.59%) 57 37 -20 (-35.09%) arena_htab_asm.bpf.o arena_htab_asm 597 445 -152 (-25.46%) 47 37 -10 (-21.28%) arena_list.bpf.o arena_list_del 309 279 -30 (-9.71%) 23 14 -9 (-39.13%) iters.bpf.o iter_subprog_check_stacksafe 155 141 -14 (-9.03%) 15 14 -1 (-6.67%) iters.bpf.o iter_subprog_iters 1094 1003 -91 (-8.32%) 88 83 -5 (-5.68%) iters.bpf.o loop_state_deps2 479 725 +246 (+51.36%) 46 63 +17 (+36.96%) kmem_cache_iter.bpf.o open_coded_iter 63 59 -4 (-6.35%) 7 6 -1 (-14.29%) verifier_bits_iter.bpf.o max_words 92 84 -8 (-8.70%) 8 7 -1 (-12.50%) verifier_iterating_callbacks.bpf.o cond_break2 113 107 -6 (-5.31%) 12 12 +0 (+0.00%) And significant negative impact for sched_ext: File Program Insns (A) Insns (B) Insns (DIFF) States (A) States (B) States (DIFF) ----------------- ---------------------- --------- --------- -------------------- ---------- ---------- ------------------ bpf.bpf.o lavd_init 7039 14723 +7684 (+109.16%) 490 1139 +649 (+132.45%) bpf.bpf.o layered_dispatch 11485 10548 -937 (-8.16%) 848 762 -86 (-10.14%) bpf.bpf.o layered_dump 7422 1000001 +992579 (+13373.47%) 681 31178 +30497 (+4478.27%) bpf.bpf.o layered_enqueue 16854 71127 +54273 (+322.02%) 1611 6450 +4839 (+300.37%) bpf.bpf.o p2dq_dispatch 665 791 +126 (+18.95%) 68 78 +10 (+14.71%) bpf.bpf.o p2dq_init 2343 2980 +637 (+27.19%) 201 237 +36 (+17.91%) bpf.bpf.o refresh_layer_cpumasks 16487 674760 +658273 (+3992.68%) 1770 65370 +63600 (+3593.22%) bpf.bpf.o rusty_select_cpu 1937 40872 +38935 (+2010.07%) 177 3210 +3033 (+1713.56%) scx_central.bpf.o central_dispatch 636 2687 +2051 (+322.48%) 63 227 +164 (+260.32%) scx_nest.bpf.o nest_init 636 815 +179 (+28.14%) 60 73 +13 (+21.67%) scx_qmap.bpf.o qmap_dispatch 2393 3580 +1187 (+49.60%) 196 253 +57 (+29.08%) scx_qmap.bpf.o qmap_dump 233 318 +85 (+36.48%) 22 30 +8 (+36.36%) scx_qmap.bpf.o qmap_init 16367 17436 +1069 (+6.53%) 603 669 +66 (+10.95%) Note 'layered_dump' program, which now hits 1M instructions limit. This impact would be mitigated in the next patch. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250215110411.3236773-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e7bc74171c99..9945466a983f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1670,6 +1670,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->callback_unroll_depth = src->callback_unroll_depth; dst_state->used_as_loop_entry = src->used_as_loop_entry; dst_state->may_goto_depth = src->may_goto_depth; + dst_state->loop_entry = src->loop_entry; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { @@ -19292,6 +19293,8 @@ static int do_check(struct bpf_verifier_env *env) return err; break; } else { + if (WARN_ON_ONCE(env->cur_state->loop_entry)) + env->cur_state->loop_entry = NULL; do_print_state = true; continue; } From 6da35da1a36c8c7a4cb9b25695c5e95f0c1620d8 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 15 Feb 2025 03:03:53 -0800 Subject: [PATCH 02/10] selftests/bpf: test correct loop_entry update in copy_verifier_state A somewhat cumbersome test case sensitive to correct copying of bpf_verifier_state->loop_entry fields in verifier.c:copy_verifier_state(). W/o the fix from a previous commit the program is accepted as safe. 1: /* poison block */ 2: if (random() != 24) { // assume false branch is placed first 3: i = iter_new(); 4: while (iter_next(i)); 5: iter_destroy(i); 6: return; 7: } 8: 9: /* dfs_depth block */ 10: for (i = 10; i > 0; i--); 11: 12: /* main block */ 13: i = iter_new(); // fp[-16] 14: b = -24; // r8 15: for (;;) { 16: if (iter_next(i)) 17: break; 18: if (random() == 77) { // assume false branch is placed first 19: *(u64 *)(r10 + b) = 7; // this is not safe when b == -25 20: iter_destroy(i); 21: return; 22: } 23: if (random() == 42) { // assume false branch is placed first 24: b = -25; 25: } 26: } 27: iter_destroy(i); The goal of this example is to: (a) poison env->cur_state->loop_entry with a state S, such that S->branches == 0; (b) set state S as a loop_entry for all checkpoints in /* main block */, thus forcing NOT_EXACT states comparisons; (c) exploit incorrect loop_entry set for checkpoint at line 18 by first creating a checkpoint with b == -24 and then pruning the state with b == -25 using that checkpoint. The /* poison block */ is responsible for goal (a). It forces verifier to first validate some unrelated iterator based loop, which leads to an update_loop_entry() call in is_state_visited(), which places checkpoint created at line 4 as env->cur_state->loop_entry. Starting from line 8, the branch count for that checkpoint is 0. The /* dfs_depth block */ is responsible for goal (b). It abuses the fact that update_loop_entry(cur, hdr) only updates cur->loop_entry when hdr->dfs_depth <= cur->dfs_depth. After line 12 every state has dfs_depth bigger then dfs_depth of poisoned env->cur_state->loop_entry. Thus the above condition is never true for lines 12-27. The /* main block */ is responsible for goal (c). Verification proceeds as follows: - checkpoint {b=-24,i=active} created at line 16; - jump 18->23 is verified first, jump to 19 pushed to stack; - jump 23->26 is verified first, jump to 24 pushed to stack; - checkpoint {b=-24,i=active} created at line 15; - current state is pruned by checkpoint created at line 16, this sets branches count for checkpoint at line 15 to 0; - jump to 24 is popped from stack; - line 16 is reached in state {b=-25,i=active}; - this is pruned by a previous checkpoint {b=-24,i=active}: - checkpoint's loop_entry is poisoned and has branch count of 0, hence states are compared using NOT_EXACT rules; - b is not marked precise yet. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250215110411.3236773-3-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/iters.c | 116 ++++++++++++++++++++++ 1 file changed, 116 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 190822b2f08b..007831dc8c46 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1174,6 +1174,122 @@ __naked int loop_state_deps2(void) ); } +SEC("?raw_tp") +__failure +__msg("math between fp pointer and register with unbounded") +__flag(BPF_F_TEST_STATE_FREQ) +__naked int loop_state_deps3(void) +{ + /* This is equivalent to a C program below. + * + * if (random() != 24) { // assume false branch is placed first + * i = iter_new(); // fp[-8] + * while (iter_next(i)); + * iter_destroy(i); + * return; + * } + * + * for (i = 10; i > 0; i--); // increase dfs_depth for child states + * + * i = iter_new(); // fp[-8] + * b = -24; // r8 + * for (;;) { // checkpoint (L) + * if (iter_next(i)) // checkpoint (N) + * break; + * if (random() == 77) { // assume false branch is placed first + * *(u64 *)(r10 + b) = 7; // this is not safe when b == -25 + * iter_destroy(i); + * return; + * } + * if (random() == 42) { // assume false branch is placed first + * b = -25; + * } + * } + * iter_destroy(i); + * + * In case of a buggy verifier first loop might poison + * env->cur_state->loop_entry with a state having 0 branches + * and small dfs_depth. This would trigger NOT_EXACT states + * comparison for some states within second loop. + * Specifically, checkpoint (L) might be problematic if: + * - branch with '*(u64 *)(r10 + b) = 7' is not explored yet; + * - checkpoint (L) is first reached in state {b=-24}; + * - traversal is pruned at checkpoint (N) setting checkpoint's (L) + * branch count to 0, thus making it eligible for use in pruning; + * - checkpoint (L) is next reached in state {b=-25}, + * this would cause NOT_EXACT comparison with a state {b=-24} + * while 'b' is not marked precise yet. + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 == 24 goto 2f;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 5;" + "call %[bpf_iter_num_new];" + "1:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 != 0 goto 1b;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "2:" + /* loop to increase dfs_depth */ + "r0 = 10;" + "3:" + "r0 -= 1;" + "if r0 != 0 goto 3b;" + /* end of loop */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r8 = -24;" + "main_loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto main_loop_end_%=;" + /* first if */ + "call %[bpf_get_prandom_u32];" + "if r0 == 77 goto unsafe_write_%=;" + /* second if */ + "call %[bpf_get_prandom_u32];" + "if r0 == 42 goto poison_r8_%=;" + /* iterate */ + "goto main_loop_%=;" + "main_loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + + "unsafe_write_%=:" + "r0 = r10;" + "r0 += r8;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto main_loop_end_%=;" + + "poison_r8_%=:" + "r8 = -25;" + "goto main_loop_%=;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + SEC("?raw_tp") __success __naked int triple_continue(void) From 9e63fdb0cbdf3268c86638a8274f4d5549a82820 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 15 Feb 2025 03:03:54 -0800 Subject: [PATCH 03/10] bpf: don't do clean_live_states when state->loop_entry->branches > 0 verifier.c:is_state_visited() uses RANGE_WITHIN states comparison rules for cached states that have loop_entry with non-zero branches count (meaning that loop_entry's verification is not yet done). The RANGE_WITHIN rules in regsafe()/stacksafe() require register and stack objects types to be identical in current and old states. verifier.c:clean_live_states() replaces registers and stack spills with NOT_INIT/STACK_INVALID marks, if these registers/stack spills are not read in any child state. This means that clean_live_states() works against loop convergence logic under some conditions. See selftest in the next patch for a specific example. Mitigate this by prohibiting clean_verifier_state() when state->loop_entry->branches > 0. This undoes negative verification performance impact of the copy_verifier_state() fix from the previous patch. Below is comparison between master and current patch. selftests: File Program Insns (A) Insns (B) Insns (DIFF) States (A) States (B) States (DIFF) ---------------------------------- ---------------------------- --------- --------- --------------- ---------- ---------- -------------- arena_htab.bpf.o arena_htab_llvm 717 423 -294 (-41.00%) 57 37 -20 (-35.09%) arena_htab_asm.bpf.o arena_htab_asm 597 445 -152 (-25.46%) 47 37 -10 (-21.28%) arena_list.bpf.o arena_list_add 1493 1822 +329 (+22.04%) 30 37 +7 (+23.33%) arena_list.bpf.o arena_list_del 309 261 -48 (-15.53%) 23 15 -8 (-34.78%) iters.bpf.o checkpoint_states_deletion 18125 22154 +4029 (+22.23%) 818 918 +100 (+12.22%) iters.bpf.o iter_nested_deeply_iters 593 367 -226 (-38.11%) 67 43 -24 (-35.82%) iters.bpf.o iter_nested_iters 813 772 -41 (-5.04%) 79 72 -7 (-8.86%) iters.bpf.o iter_subprog_check_stacksafe 155 135 -20 (-12.90%) 15 14 -1 (-6.67%) iters.bpf.o iter_subprog_iters 1094 808 -286 (-26.14%) 88 68 -20 (-22.73%) iters.bpf.o loop_state_deps2 479 356 -123 (-25.68%) 46 35 -11 (-23.91%) iters.bpf.o triple_continue 35 31 -4 (-11.43%) 3 3 +0 (+0.00%) kmem_cache_iter.bpf.o open_coded_iter 63 59 -4 (-6.35%) 7 6 -1 (-14.29%) mptcp_subflow.bpf.o _getsockopt_subflow 501 446 -55 (-10.98%) 25 23 -2 (-8.00%) pyperf600_iter.bpf.o on_event 12339 6379 -5960 (-48.30%) 441 286 -155 (-35.15%) verifier_bits_iter.bpf.o max_words 92 84 -8 (-8.70%) 8 7 -1 (-12.50%) verifier_iterating_callbacks.bpf.o cond_break2 113 192 +79 (+69.91%) 12 21 +9 (+75.00%) sched_ext: File Program Insns (A) Insns (B) Insns (DIFF) States (A) States (B) States (DIFF) ----------------- ---------------------- --------- --------- ----------------- ---------- ---------- ---------------- bpf.bpf.o layered_dispatch 11485 9039 -2446 (-21.30%) 848 662 -186 (-21.93%) bpf.bpf.o layered_dump 7422 5022 -2400 (-32.34%) 681 298 -383 (-56.24%) bpf.bpf.o layered_enqueue 16854 13753 -3101 (-18.40%) 1611 1308 -303 (-18.81%) bpf.bpf.o layered_init 1000001 5549 -994452 (-99.45%) 84672 523 -84149 (-99.38%) bpf.bpf.o layered_runnable 3149 1899 -1250 (-39.70%) 288 151 -137 (-47.57%) bpf.bpf.o p2dq_init 2343 1936 -407 (-17.37%) 201 170 -31 (-15.42%) bpf.bpf.o refresh_layer_cpumasks 16487 1285 -15202 (-92.21%) 1770 120 -1650 (-93.22%) bpf.bpf.o rusty_select_cpu 1937 1386 -551 (-28.45%) 177 125 -52 (-29.38%) scx_central.bpf.o central_dispatch 636 600 -36 (-5.66%) 63 59 -4 (-6.35%) scx_central.bpf.o central_init 913 632 -281 (-30.78%) 48 39 -9 (-18.75%) scx_nest.bpf.o nest_init 636 601 -35 (-5.50%) 60 58 -2 (-3.33%) scx_pair.bpf.o pair_dispatch 1000001 1914 -998087 (-99.81%) 58169 142 -58027 (-99.76%) scx_qmap.bpf.o qmap_dispatch 2393 2187 -206 (-8.61%) 196 174 -22 (-11.22%) scx_qmap.bpf.o qmap_init 16367 22777 +6410 (+39.16%) 603 768 +165 (+27.36%) 'layered_init' and 'pair_dispatch' hit 1M on master, but are verified ok with this patch. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250215110411.3236773-4-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9945466a983f..8963ec7e2032 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17863,12 +17863,16 @@ static void clean_verifier_state(struct bpf_verifier_env *env, static void clean_live_states(struct bpf_verifier_env *env, int insn, struct bpf_verifier_state *cur) { + struct bpf_verifier_state *loop_entry; struct bpf_verifier_state_list *sl; sl = *explored_state(env, insn); while (sl) { if (sl->state.branches) goto next; + loop_entry = get_loop_entry(&sl->state); + if (loop_entry && loop_entry->branches) + goto next; if (sl->state.insn_idx != insn || !same_callsites(&sl->state, cur)) goto next; From 6361cd26e4028598082596c3f1768671b9ba7925 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 15 Feb 2025 03:03:55 -0800 Subject: [PATCH 04/10] selftests/bpf: check states pruning for deeply nested iterator A test case with ridiculously deep bpf_for() nesting and a conditional update of a stack location. Consider the innermost loop structure: 1: bpf_for(o, 0, 10) 2: if (unlikely(bpf_get_prandom_u32())) 3: buf[0] = 42; 4: Assuming that verifier.c:clean_live_states() operates w/o change from the previous patch (e.g. as on current master) verification would proceed as follows: - at (1) state {buf[0]=?,o=drained}: - checkpoint - push visit to (2) for later - at (4) {buf[0]=?,o=drained} - pop (2) {buf[0]=?,o=active}, push visit to (3) for later - at (1) {buf[0]=?,o=active} - checkpoint - push visit to (2) for later - at (4) {buf[0]=?,o=drained} - pop (2) {buf[0]=?,o=active}, push visit to (3) for later - at (1) {buf[0]=?,o=active}: - checkpoint reached, checkpoint's branch count becomes 0 - checkpoint is processed by clean_live_states() and becomes {o=active} - pop (3) {buf[0]=42,o=active} - at (1), {buf[0]=42,o=active} - checkpoint - push visit to (2) for later - at (4) {buf[0]=42,o=drained} - pop (2) {buf[0]=42,o=active}, push visit to (3) for later - at (1) {buf[0]=42,o=active}, checkpoint reached - pop (3) {buf[0]=42,o=active} - at (1) {buf[0]=42,o=active}: - checkpoint reached, checkpoint's branch count becomes 0 - checkpoint is processed by clean_live_states() and becomes {o=active} - ... Note how clean_live_states() converted the checkpoint {buf[0]=42,o=active} to {o=active} and it can no longer be matched against {buf[0]=,o=active}, because iterator based states are compared using stacksafe(... RANGE_WITHIN), that requires stack slots to have same types. At the same time there are still states {buf[0]=42,o=active} pushed to DFS stack. This behaviour becomes exacerbated with multiple nesting levels, here are veristat results: - nesting level 1: 69 insns - nesting level 2: 258 insns - nesting level 3: 900 insns - nesting level 4: 4754 insns - nesting level 5: 35944 insns - nesting level 6: 312558 insns - nesting level 7: 1M limit Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250215110411.3236773-5-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/iters.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 007831dc8c46..427b72954b87 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -7,6 +7,8 @@ #include "bpf_misc.h" #include "bpf_compiler.h" +#define unlikely(x) __builtin_expect(!!(x), 0) + static volatile int zero = 0; int my_pid; @@ -1628,4 +1630,25 @@ int iter_destroy_bad_arg(const void *ctx) return 0; } +SEC("raw_tp") +__success +int clean_live_states(const void *ctx) +{ + char buf[1]; + int i, j, k, l, m, n, o; + + bpf_for(i, 0, 10) + bpf_for(j, 0, 10) + bpf_for(k, 0, 10) + bpf_for(l, 0, 10) + bpf_for(m, 0, 10) + bpf_for(n, 0, 10) + bpf_for(o, 0, 10) { + if (unlikely(bpf_get_prandom_u32())) + buf[0] = 42; + bpf_printk("%s", buf); + } + return 0; +} + char _license[] SEC("license") = "GPL"; From c1ce66357f8f7d73ff70353ec15f1fba164bc7e1 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 15 Feb 2025 03:03:56 -0800 Subject: [PATCH 05/10] bpf: detect infinite loop in get_loop_entry() Tejun Heo reported an infinite loop in get_loop_entry(), when verifying a sched_ext program layered_dispatch in [1]. After some investigation I'm sure that root cause is fixed by patches 1,3 in this patch-set. To err on the safe side, this commit modifies get_loop_entry() to detect infinite loops and abort verification in such cases. The number of steps get_loop_entry(S) can make while moving along the bpf_verifier_state->loop_entry chain is bounded by the DFS depth of state S. This fact is exploited to implement the check. To avoid dealing with the potential error code returned from get_loop_entry() in update_loop_entry(), remove the get_loop_entry() calls there: - This change does not affect correctness. Loop entries would still be updated during the backward DFS move in update_branch_counts(). - This change does not affect performance. Measurements show that get_loop_entry() performs at most 1 step on selftests and at most 2 steps on sched_ext programs (1 step in 17 cases, 2 steps in 3 cases, measured using "do-not-submit" patches from [2]). [1] https://github.com/sched-ext/scx/ commit f0b27038ea10 ("XXX - kernel stall") [2] https://github.com/eddyz87/bpf/tree/get-loop-entry-hungup Reported-by: Tejun Heo Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250215110411.3236773-6-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8963ec7e2032..ddf6ed9f3482 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1803,12 +1803,9 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta * h = entries[h] * return h * - * # Update n's loop entry if h's outermost entry comes - * # before n's outermost entry in current DFS path. + * # Update n's loop entry if h comes before n in current DFS path. * def update_loop_entry(n, h): - * n1 = get_loop_entry(n) or n - * h1 = get_loop_entry(h) or h - * if h1 in path and depths[h1] <= depths[n1]: + * if h in path and depths[entries.get(n, n)] < depths[n]: * entries[n] = h1 * * def dfs(n, depth): @@ -1820,7 +1817,7 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta * # Case A: explore succ and update cur's loop entry * # only if succ's entry is in current DFS path. * dfs(succ, depth + 1) - * h = get_loop_entry(succ) + * h = entries.get(succ, None) * update_loop_entry(n, h) * else: * # Case B or C depending on `h1 in path` check in update_loop_entry(). @@ -1835,12 +1832,20 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta * - handle cases B and C in is_state_visited(); * - update topmost loop entry for intermediate states in get_loop_entry(). */ -static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_state *st) +static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env, + struct bpf_verifier_state *st) { struct bpf_verifier_state *topmost = st->loop_entry, *old; + u32 steps = 0; - while (topmost && topmost->loop_entry && topmost != topmost->loop_entry) + while (topmost && topmost->loop_entry && topmost != topmost->loop_entry) { + if (steps++ > st->dfs_depth) { + WARN_ONCE(true, "verifier bug: infinite loop in get_loop_entry\n"); + verbose(env, "verifier bug: infinite loop in get_loop_entry()\n"); + return ERR_PTR(-EFAULT); + } topmost = topmost->loop_entry; + } /* Update loop entries for intermediate states to avoid this * traversal in future get_loop_entry() calls. */ @@ -1854,17 +1859,13 @@ static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_state *st) static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr) { - struct bpf_verifier_state *cur1, *hdr1; - - cur1 = get_loop_entry(cur) ?: cur; - hdr1 = get_loop_entry(hdr) ?: hdr; - /* The head1->branches check decides between cases B and C in - * comment for get_loop_entry(). If hdr1->branches == 0 then + /* The hdr->branches check decides between cases B and C in + * comment for get_loop_entry(). If hdr->branches == 0 then * head's topmost loop entry is not in current DFS path, * hence 'cur' and 'hdr' are not in the same loop and there is * no need to update cur->loop_entry. */ - if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) { + if (hdr->branches && hdr->dfs_depth <= (cur->loop_entry ?: cur)->dfs_depth) { cur->loop_entry = hdr; hdr->used_as_loop_entry = true; } @@ -17870,8 +17871,8 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, while (sl) { if (sl->state.branches) goto next; - loop_entry = get_loop_entry(&sl->state); - if (loop_entry && loop_entry->branches) + loop_entry = get_loop_entry(env, &sl->state); + if (!IS_ERR_OR_NULL(loop_entry) && loop_entry->branches) goto next; if (sl->state.insn_idx != insn || !same_callsites(&sl->state, cur)) @@ -18749,7 +18750,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * * Additional details are in the comment before get_loop_entry(). */ - loop_entry = get_loop_entry(&sl->state); + loop_entry = get_loop_entry(env, &sl->state); + if (IS_ERR(loop_entry)) + return PTR_ERR(loop_entry); force_exact = loop_entry && loop_entry->branches > 0; if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) { if (force_exact) From bb7abf3049025f7e4ad91cff2d9fe8381a9278af Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 15 Feb 2025 03:03:57 -0800 Subject: [PATCH 06/10] bpf: make state->dfs_depth < state->loop_entry->dfs_depth an invariant For a generic loop detection algorithm a graph node can be a loop header for itself. However, state loop entries are computed for use in is_state_visited(), where get_loop_entry(state)->branches is checked. is_state_visited() also checks state->branches, thus the case when state == state->loop_entry is not interesting for is_state_visited(). This change does not affect correctness, but simplifies get_loop_entry() a bit and also simplifies change to update_loop_entry() in patch 9. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250215110411.3236773-7-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ddf6ed9f3482..cb5ae7dc6488 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1799,7 +1799,7 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta * # Find outermost loop entry known for n * def get_loop_entry(n): * h = entries.get(n, None) - * while h in entries and entries[h] != h: + * while h in entries: * h = entries[h] * return h * @@ -1838,7 +1838,7 @@ static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env, struct bpf_verifier_state *topmost = st->loop_entry, *old; u32 steps = 0; - while (topmost && topmost->loop_entry && topmost != topmost->loop_entry) { + while (topmost && topmost->loop_entry) { if (steps++ > st->dfs_depth) { WARN_ONCE(true, "verifier bug: infinite loop in get_loop_entry\n"); verbose(env, "verifier bug: infinite loop in get_loop_entry()\n"); @@ -1865,7 +1865,7 @@ static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifie * hence 'cur' and 'hdr' are not in the same loop and there is * no need to update cur->loop_entry. */ - if (hdr->branches && hdr->dfs_depth <= (cur->loop_entry ?: cur)->dfs_depth) { + if (hdr->branches && hdr->dfs_depth < (cur->loop_entry ?: cur)->dfs_depth) { cur->loop_entry = hdr; hdr->used_as_loop_entry = true; } From 590eee4268368cfa05db4d4c5524c86e8d94a0bd Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 15 Feb 2025 03:03:58 -0800 Subject: [PATCH 07/10] bpf: do not update state->loop_entry in get_loop_entry() The patch 9 is simpler if less places modify loop_entry field. The loop deleted by this patch does not affect correctness, but is a performance optimization. However, measurements on selftests and sched_ext programs show that this optimization is unnecessary: - at most 2 steps are done in get_loop_entry(); - most of the time 0 or 1 steps are done in get_loop_entry(). Measured using "do-not-submit" patches from here: https://github.com/eddyz87/bpf/tree/get-loop-entry-hungup Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250215110411.3236773-8-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index cb5ae7dc6488..e6ed71e053b7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1829,13 +1829,12 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta * and cur's loop entry has to be updated (case A), handle this in * update_branch_counts(); * - use st->branch > 0 as a signal that st is in the current DFS path; - * - handle cases B and C in is_state_visited(); - * - update topmost loop entry for intermediate states in get_loop_entry(). + * - handle cases B and C in is_state_visited(). */ static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env, struct bpf_verifier_state *st) { - struct bpf_verifier_state *topmost = st->loop_entry, *old; + struct bpf_verifier_state *topmost = st->loop_entry; u32 steps = 0; while (topmost && topmost->loop_entry) { @@ -1846,14 +1845,6 @@ static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env, } topmost = topmost->loop_entry; } - /* Update loop entries for intermediate states to avoid this - * traversal in future get_loop_entry() calls. - */ - while (st && st->loop_entry != topmost) { - old = st->loop_entry; - st->loop_entry = topmost; - st = old; - } return topmost; } From 5564ee3abb2ebece37000662a52eb6607b9c9f7d Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 15 Feb 2025 03:03:59 -0800 Subject: [PATCH 08/10] bpf: use list_head to track explored states and free list The next patch in the set needs the ability to remove individual states from env->free_list while only holding a pointer to the state. Which requires env->free_list to be a doubly linked list. This patch converts env->free_list and struct bpf_verifier_state_list to use struct list_head for this purpose. The change to env->explored_states is collateral. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250215110411.3236773-9-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 9 +++-- kernel/bpf/verifier.c | 74 ++++++++++++++++++------------------ 2 files changed, 42 insertions(+), 41 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 32c23f2a3086..222f6af278ec 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -498,7 +498,7 @@ struct bpf_verifier_state { /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; - struct bpf_verifier_state_list *next; + struct list_head node; int miss_cnt, hit_cnt; }; @@ -710,8 +710,11 @@ struct bpf_verifier_env { bool test_state_freq; /* test verifier with different pruning frequency */ bool test_reg_invariants; /* fail verification on register invariants violations */ struct bpf_verifier_state *cur_state; /* current verifier state */ - struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ - struct bpf_verifier_state_list *free_list; + /* Search pruning optimization, array of list_heads for + * lists of struct bpf_verifier_state_list. + */ + struct list_head *explored_states; + struct list_head free_list; /* list of struct bpf_verifier_state_list */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */ u32 used_map_cnt; /* number of used maps */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e6ed71e053b7..d04d6ca2d5a6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1691,7 +1691,7 @@ static u32 state_htab_size(struct bpf_verifier_env *env) return env->prog->len; } -static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx) +static struct list_head *explored_state(struct bpf_verifier_env *env, int idx) { struct bpf_verifier_state *cur = env->cur_state; struct bpf_func_state *state = cur->frame[cur->curframe]; @@ -8443,10 +8443,12 @@ static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env, { struct bpf_verifier_state_list *sl; struct bpf_verifier_state *st; + struct list_head *pos, *head; /* Explored states are pushed in stack order, most recent states come first */ - sl = *explored_state(env, insn_idx); - for (; sl; sl = sl->next) { + head = explored_state(env, insn_idx); + list_for_each(pos, head) { + sl = container_of(pos, struct bpf_verifier_state_list, node); /* If st->branches != 0 state is a part of current DFS verification path, * hence cur & st for a loop. */ @@ -17857,20 +17859,20 @@ static void clean_live_states(struct bpf_verifier_env *env, int insn, { struct bpf_verifier_state *loop_entry; struct bpf_verifier_state_list *sl; + struct list_head *pos, *head; - sl = *explored_state(env, insn); - while (sl) { + head = explored_state(env, insn); + list_for_each(pos, head) { + sl = container_of(pos, struct bpf_verifier_state_list, node); if (sl->state.branches) - goto next; + continue; loop_entry = get_loop_entry(env, &sl->state); if (!IS_ERR_OR_NULL(loop_entry) && loop_entry->branches) - goto next; + continue; if (sl->state.insn_idx != insn || !same_callsites(&sl->state, cur)) - goto next; + continue; clean_verifier_state(env, &sl->state); -next: - sl = sl->next; } } @@ -18561,10 +18563,11 @@ static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { struct bpf_verifier_state_list *new_sl; - struct bpf_verifier_state_list *sl, **pprev; + struct bpf_verifier_state_list *sl; struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry; int i, j, n, err, states_cnt = 0; bool force_new_state, add_new_state, force_exact; + struct list_head *pos, *tmp, *head; force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) || /* Avoid accumulating infinitely long jmp history */ @@ -18583,15 +18586,14 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) env->insn_processed - env->prev_insn_processed >= 8) add_new_state = true; - pprev = explored_state(env, insn_idx); - sl = *pprev; - clean_live_states(env, insn_idx, cur); - while (sl) { + head = explored_state(env, insn_idx); + list_for_each_safe(pos, tmp, head) { + sl = container_of(pos, struct bpf_verifier_state_list, node); states_cnt++; if (sl->state.insn_idx != insn_idx) - goto next; + continue; if (sl->state.branches) { struct bpf_func_state *frame = sl->state.frame[sl->state.curframe]; @@ -18796,7 +18798,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) /* the state is unlikely to be useful. Remove it to * speed up verification */ - *pprev = sl->next; + list_del(&sl->node); if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE && !sl->state.used_as_loop_entry) { u32 br = sl->state.branches; @@ -18812,15 +18814,9 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * walk it later. Add it for free_list instead to * be freed at the end of verification */ - sl->next = env->free_list; - env->free_list = sl; + list_add(&sl->node, &env->free_list); } - sl = *pprev; - continue; } -next: - pprev = &sl->next; - sl = *pprev; } if (env->max_states_per_insn < states_cnt) @@ -18869,8 +18865,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) cur->first_insn_idx = insn_idx; cur->insn_hist_start = cur->insn_hist_end; cur->dfs_depth = new->dfs_depth + 1; - new_sl->next = *explored_state(env, insn_idx); - *explored_state(env, insn_idx) = new_sl; + list_add(&new_sl->node, head); + /* connect new state to parentage chain. Current frame needs all * registers connected. Only r6 - r9 of the callers are alive (pushed * to the stack implicitly by JITs) so in callers' frames connect just @@ -22194,31 +22190,29 @@ static int remove_fastcall_spills_fills(struct bpf_verifier_env *env) static void free_states(struct bpf_verifier_env *env) { - struct bpf_verifier_state_list *sl, *sln; + struct bpf_verifier_state_list *sl; + struct list_head *head, *pos, *tmp; int i; - sl = env->free_list; - while (sl) { - sln = sl->next; + list_for_each_safe(pos, tmp, &env->free_list) { + sl = container_of(pos, struct bpf_verifier_state_list, node); free_verifier_state(&sl->state, false); kfree(sl); - sl = sln; } - env->free_list = NULL; + INIT_LIST_HEAD(&env->free_list); if (!env->explored_states) return; for (i = 0; i < state_htab_size(env); i++) { - sl = env->explored_states[i]; + head = &env->explored_states[i]; - while (sl) { - sln = sl->next; + list_for_each_safe(pos, tmp, head) { + sl = container_of(pos, struct bpf_verifier_state_list, node); free_verifier_state(&sl->state, false); kfree(sl); - sl = sln; } - env->explored_states[i] = NULL; + INIT_LIST_HEAD(&env->explored_states[i]); } } @@ -23186,12 +23180,16 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS; env->explored_states = kvcalloc(state_htab_size(env), - sizeof(struct bpf_verifier_state_list *), + sizeof(struct list_head), GFP_USER); ret = -ENOMEM; if (!env->explored_states) goto skip_full_check; + for (i = 0; i < state_htab_size(env); i++) + INIT_LIST_HEAD(&env->explored_states[i]); + INIT_LIST_HEAD(&env->free_list); + ret = check_btf_info_early(env, attr, uattr); if (ret < 0) goto skip_full_check; From 408fcf946b2badb0a81c69ab837b6dfc0e76aa87 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 15 Feb 2025 03:04:00 -0800 Subject: [PATCH 09/10] bpf: free verifier states when they are no longer referenced When fixes from patches 1 and 3 are applied, Patrick Somaru reported an increase in memory consumption for sched_ext iterator-based programs hitting 1M instructions limit. For example, 2Gb VMs ran out of memory while verifying a program. Similar behaviour could be reproduced on current bpf-next master. Here is an example of such program: /* verification completes if given 16G or RAM, * final env->free_list size is 369,960 entries. */ SEC("raw_tp") __flag(BPF_F_TEST_STATE_FREQ) __success int free_list_bomb(const void *ctx) { volatile char buf[48] = {}; unsigned i, j; j = 0; bpf_for(i, 0, 10) { /* this forks verifier state: * - verification of current path continues and * creates a checkpoint after 'if'; * - verification of forked path hits the * checkpoint and marks it as loop_entry. */ if (bpf_get_prandom_u32()) asm volatile (""); /* this marks 'j' as precise, thus any checkpoint * created on current iteration would not be matched * on the next iteration. */ buf[j++] = 42; j %= ARRAY_SIZE(buf); } asm volatile (""::"r"(buf)); return 0; } Memory consumption increased due to more states being marked as loop entries and eventually added to env->free_list. This commit introduces logic to free states from env->free_list during verification. A state in env->free_list can be freed if: - it has no child states; - it is not used as a loop_entry. This commit: - updates bpf_verifier_state->used_as_loop_entry to be a counter that tracks how many states use this one as a loop entry; - adds a function maybe_free_verifier_state(), which: - frees a state if its ->branches and ->used_as_loop_entry counters are both zero; - if the state is freed, state->loop_entry->used_as_loop_entry is decremented, and an attempt is made to free state->loop_entry. In the example above, this approach reduces the maximum number of states in the free list from 369,960 to 16,223. However, this approach has its limitations. If the buf size in the example above is modified to 64, state caching overflows: the state for j=0 is evicted from the cache before it can be used to stop traversal. As a result, states in the free list accumulate because their branch counters do not reach zero. The effect of this patch on the selftests looks as follows: File Program Max free list (A) Max free list (B) Max free list (DIFF) -------------------------------- ------------------------------------ ----------------- ----------------- -------------------- arena_list.bpf.o arena_list_add 17 3 -14 (-82.35%) bpf_iter_task_stack.bpf.o dump_task_stack 39 9 -30 (-76.92%) iters.bpf.o checkpoint_states_deletion 265 89 -176 (-66.42%) iters.bpf.o clean_live_states 19 0 -19 (-100.00%) profiler2.bpf.o tracepoint__syscalls__sys_enter_kill 102 1 -101 (-99.02%) profiler3.bpf.o tracepoint__syscalls__sys_enter_kill 144 0 -144 (-100.00%) pyperf600_iter.bpf.o on_event 15 0 -15 (-100.00%) pyperf600_nounroll.bpf.o on_event 1170 1158 -12 (-1.03%) setget_sockopt.bpf.o skops_sockopt 18 0 -18 (-100.00%) strobemeta_nounroll1.bpf.o on_event 147 83 -64 (-43.54%) strobemeta_nounroll2.bpf.o on_event 312 209 -103 (-33.01%) strobemeta_subprogs.bpf.o on_event 124 86 -38 (-30.65%) test_cls_redirect_subprogs.bpf.o cls_redirect 15 0 -15 (-100.00%) timer.bpf.o test1 30 15 -15 (-50.00%) Measured using "do-not-submit" patches from here: https://github.com/eddyz87/bpf/tree/get-loop-entry-hungup Reported-by: Patrick Somaru Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250215110411.3236773-10-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 14 +++--- kernel/bpf/verifier.c | 91 ++++++++++++++++++++++++++---------- 2 files changed, 75 insertions(+), 30 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 222f6af278ec..f920af30eb06 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -427,11 +427,6 @@ struct bpf_verifier_state { bool active_rcu_lock; bool speculative; - /* If this state was ever pointed-to by other state's loop_entry field - * this flag would be set to true. Used to avoid freeing such states - * while they are still in use. - */ - bool used_as_loop_entry; bool in_sleepable; /* first and last insn idx of this verifier state */ @@ -458,6 +453,11 @@ struct bpf_verifier_state { u32 dfs_depth; u32 callback_unroll_depth; u32 may_goto_depth; + /* If this state was ever pointed-to by other state's loop_entry field + * this flag would be set to true. Used to avoid freeing such states + * while they are still in use. + */ + u32 used_as_loop_entry; }; #define bpf_get_spilled_reg(slot, frame, mask) \ @@ -499,7 +499,9 @@ struct bpf_verifier_state { struct bpf_verifier_state_list { struct bpf_verifier_state state; struct list_head node; - int miss_cnt, hit_cnt; + u32 miss_cnt; + u32 hit_cnt:31; + u32 in_free_list:1; }; struct bpf_loop_inline_state { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d04d6ca2d5a6..1d1f6a5902d8 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1631,6 +1631,50 @@ static void free_verifier_state(struct bpf_verifier_state *state, kfree(state); } +/* struct bpf_verifier_state->{parent,loop_entry} refer to states + * that are in either of env->{expored_states,free_list}. + * In both cases the state is contained in struct bpf_verifier_state_list. + */ +static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st) +{ + if (st->parent) + return container_of(st->parent, struct bpf_verifier_state_list, state); + return NULL; +} + +static struct bpf_verifier_state_list *state_loop_entry_as_list(struct bpf_verifier_state *st) +{ + if (st->loop_entry) + return container_of(st->loop_entry, struct bpf_verifier_state_list, state); + return NULL; +} + +/* A state can be freed if it is no longer referenced: + * - is in the env->free_list; + * - has no children states; + * - is not used as loop_entry. + * + * Freeing a state can make it's loop_entry free-able. + */ +static void maybe_free_verifier_state(struct bpf_verifier_env *env, + struct bpf_verifier_state_list *sl) +{ + struct bpf_verifier_state_list *loop_entry_sl; + + while (sl && sl->in_free_list && + sl->state.branches == 0 && + sl->state.used_as_loop_entry == 0) { + loop_entry_sl = state_loop_entry_as_list(&sl->state); + if (loop_entry_sl) + loop_entry_sl->state.used_as_loop_entry--; + list_del(&sl->node); + free_verifier_state(&sl->state, false); + kfree(sl); + env->peak_states--; + sl = loop_entry_sl; + } +} + /* copy verifier state from src to dst growing dst stack space * when necessary to accommodate larger src stack */ @@ -1848,7 +1892,8 @@ static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env, return topmost; } -static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr) +static void update_loop_entry(struct bpf_verifier_env *env, + struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr) { /* The hdr->branches check decides between cases B and C in * comment for get_loop_entry(). If hdr->branches == 0 then @@ -1857,13 +1902,20 @@ static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifie * no need to update cur->loop_entry. */ if (hdr->branches && hdr->dfs_depth < (cur->loop_entry ?: cur)->dfs_depth) { + if (cur->loop_entry) { + cur->loop_entry->used_as_loop_entry--; + maybe_free_verifier_state(env, state_loop_entry_as_list(cur)); + } cur->loop_entry = hdr; - hdr->used_as_loop_entry = true; + hdr->used_as_loop_entry++; } } static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) { + struct bpf_verifier_state_list *sl = NULL, *parent_sl; + struct bpf_verifier_state *parent; + while (st) { u32 br = --st->branches; @@ -1873,7 +1925,7 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi * This is a part of 'case A' in get_loop_entry() comment. */ if (br == 0 && st->parent && st->loop_entry) - update_loop_entry(st->parent, st->loop_entry); + update_loop_entry(env, st->parent, st->loop_entry); /* WARN_ON(br > 1) technically makes sense here, * but see comment in push_stack(), hence: @@ -1883,7 +1935,12 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi br); if (br) break; - st = st->parent; + parent = st->parent; + parent_sl = state_parent_as_list(st); + if (sl) + maybe_free_verifier_state(env, sl); + st = parent; + sl = parent_sl; } } @@ -18667,7 +18724,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) spi = __get_spi(iter_reg->off + iter_reg->var_off.value); iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr; if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) { - update_loop_entry(cur, &sl->state); + update_loop_entry(env, cur, &sl->state); goto hit; } } @@ -18676,7 +18733,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) if (is_may_goto_insn_at(env, insn_idx)) { if (sl->state.may_goto_depth != cur->may_goto_depth && states_equal(env, &sl->state, cur, RANGE_WITHIN)) { - update_loop_entry(cur, &sl->state); + update_loop_entry(env, cur, &sl->state); goto hit; } } @@ -18749,7 +18806,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) force_exact = loop_entry && loop_entry->branches > 0; if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) { if (force_exact) - update_loop_entry(cur, loop_entry); + update_loop_entry(env, cur, loop_entry); hit: sl->hit_cnt++; /* reached equivalent register/stack state, @@ -18798,24 +18855,10 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) /* the state is unlikely to be useful. Remove it to * speed up verification */ + sl->in_free_list = true; list_del(&sl->node); - if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE && - !sl->state.used_as_loop_entry) { - u32 br = sl->state.branches; - - WARN_ONCE(br, - "BUG live_done but branches_to_explore %d\n", - br); - free_verifier_state(&sl->state, false); - kfree(sl); - env->peak_states--; - } else { - /* cannot free this state, since parentage chain may - * walk it later. Add it for free_list instead to - * be freed at the end of verification - */ - list_add(&sl->node, &env->free_list); - } + list_add(&sl->node, &env->free_list); + maybe_free_verifier_state(env, sl); } } From 574078b001cdf6dfa4cf8a2f7373a9babdcc26c7 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Sat, 15 Feb 2025 03:04:01 -0800 Subject: [PATCH 10/10] bpf: fix env->peak_states computation Compute env->peak_states as a maximum value of sum of env->explored_states and env->free_list size. Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20250215110411.3236773-11-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 ++ kernel/bpf/verifier.c | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f920af30eb06..bbd013c38ff9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -772,6 +772,8 @@ struct bpf_verifier_env { u32 peak_states; /* longest register parentage chain walked for liveness marking */ u32 longest_mark_read_walk; + u32 free_list_size; + u32 explored_states_size; bpfptr_t fd_array; /* bit mask to keep track of whether a register has been accessed diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1d1f6a5902d8..e57b7c949860 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1609,6 +1609,14 @@ static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *st return NULL; } +static void update_peak_states(struct bpf_verifier_env *env) +{ + u32 cur_states; + + cur_states = env->explored_states_size + env->free_list_size; + env->peak_states = max(env->peak_states, cur_states); +} + static void free_func_state(struct bpf_func_state *state) { if (!state) @@ -1670,7 +1678,7 @@ static void maybe_free_verifier_state(struct bpf_verifier_env *env, list_del(&sl->node); free_verifier_state(&sl->state, false); kfree(sl); - env->peak_states--; + env->free_list_size--; sl = loop_entry_sl; } } @@ -18858,6 +18866,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) sl->in_free_list = true; list_del(&sl->node); list_add(&sl->node, &env->free_list); + env->free_list_size++; + env->explored_states_size--; maybe_free_verifier_state(env, sl); } } @@ -18884,7 +18894,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) if (!new_sl) return -ENOMEM; env->total_states++; - env->peak_states++; + env->explored_states_size++; + update_peak_states(env); env->prev_jmps_processed = env->jmps_processed; env->prev_insn_processed = env->insn_processed;