diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 32c23f2a3086..bbd013c38ff9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -427,11 +427,6 @@ struct bpf_verifier_state { bool active_rcu_lock; bool speculative; - /* If this state was ever pointed-to by other state's loop_entry field - * this flag would be set to true. Used to avoid freeing such states - * while they are still in use. - */ - bool used_as_loop_entry; bool in_sleepable; /* first and last insn idx of this verifier state */ @@ -458,6 +453,11 @@ struct bpf_verifier_state { u32 dfs_depth; u32 callback_unroll_depth; u32 may_goto_depth; + /* If this state was ever pointed-to by other state's loop_entry field + * this flag would be set to true. Used to avoid freeing such states + * while they are still in use. + */ + u32 used_as_loop_entry; }; #define bpf_get_spilled_reg(slot, frame, mask) \ @@ -498,8 +498,10 @@ struct bpf_verifier_state { /* linked list of verifier states used to prune search */ struct bpf_verifier_state_list { struct bpf_verifier_state state; - struct bpf_verifier_state_list *next; - int miss_cnt, hit_cnt; + struct list_head node; + u32 miss_cnt; + u32 hit_cnt:31; + u32 in_free_list:1; }; struct bpf_loop_inline_state { @@ -710,8 +712,11 @@ struct bpf_verifier_env { bool test_state_freq; /* test verifier with different pruning frequency */ bool test_reg_invariants; /* fail verification on register invariants violations */ struct bpf_verifier_state *cur_state; /* current verifier state */ - struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ - struct bpf_verifier_state_list *free_list; + /* Search pruning optimization, array of list_heads for + * lists of struct bpf_verifier_state_list. + */ + struct list_head *explored_states; + struct list_head free_list; /* list of struct bpf_verifier_state_list */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */ u32 used_map_cnt; /* number of used maps */ @@ -767,6 +772,8 @@ struct bpf_verifier_env { u32 peak_states; /* longest register parentage chain walked for liveness marking */ u32 longest_mark_read_walk; + u32 free_list_size; + u32 explored_states_size; bpfptr_t fd_array; /* bit mask to keep track of whether a register has been accessed diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e7bc74171c99..e57b7c949860 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1609,6 +1609,14 @@ static struct bpf_reference_state *find_lock_state(struct bpf_verifier_state *st return NULL; } +static void update_peak_states(struct bpf_verifier_env *env) +{ + u32 cur_states; + + cur_states = env->explored_states_size + env->free_list_size; + env->peak_states = max(env->peak_states, cur_states); +} + static void free_func_state(struct bpf_func_state *state) { if (!state) @@ -1631,6 +1639,50 @@ static void free_verifier_state(struct bpf_verifier_state *state, kfree(state); } +/* struct bpf_verifier_state->{parent,loop_entry} refer to states + * that are in either of env->{expored_states,free_list}. + * In both cases the state is contained in struct bpf_verifier_state_list. + */ +static struct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st) +{ + if (st->parent) + return container_of(st->parent, struct bpf_verifier_state_list, state); + return NULL; +} + +static struct bpf_verifier_state_list *state_loop_entry_as_list(struct bpf_verifier_state *st) +{ + if (st->loop_entry) + return container_of(st->loop_entry, struct bpf_verifier_state_list, state); + return NULL; +} + +/* A state can be freed if it is no longer referenced: + * - is in the env->free_list; + * - has no children states; + * - is not used as loop_entry. + * + * Freeing a state can make it's loop_entry free-able. + */ +static void maybe_free_verifier_state(struct bpf_verifier_env *env, + struct bpf_verifier_state_list *sl) +{ + struct bpf_verifier_state_list *loop_entry_sl; + + while (sl && sl->in_free_list && + sl->state.branches == 0 && + sl->state.used_as_loop_entry == 0) { + loop_entry_sl = state_loop_entry_as_list(&sl->state); + if (loop_entry_sl) + loop_entry_sl->state.used_as_loop_entry--; + list_del(&sl->node); + free_verifier_state(&sl->state, false); + kfree(sl); + env->free_list_size--; + sl = loop_entry_sl; + } +} + /* copy verifier state from src to dst growing dst stack space * when necessary to accommodate larger src stack */ @@ -1670,6 +1722,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->callback_unroll_depth = src->callback_unroll_depth; dst_state->used_as_loop_entry = src->used_as_loop_entry; dst_state->may_goto_depth = src->may_goto_depth; + dst_state->loop_entry = src->loop_entry; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { @@ -1690,7 +1743,7 @@ static u32 state_htab_size(struct bpf_verifier_env *env) return env->prog->len; } -static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx) +static struct list_head *explored_state(struct bpf_verifier_env *env, int idx) { struct bpf_verifier_state *cur = env->cur_state; struct bpf_func_state *state = cur->frame[cur->curframe]; @@ -1798,16 +1851,13 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta * # Find outermost loop entry known for n * def get_loop_entry(n): * h = entries.get(n, None) - * while h in entries and entries[h] != h: + * while h in entries: * h = entries[h] * return h * - * # Update n's loop entry if h's outermost entry comes - * # before n's outermost entry in current DFS path. + * # Update n's loop entry if h comes before n in current DFS path. * def update_loop_entry(n, h): - * n1 = get_loop_entry(n) or n - * h1 = get_loop_entry(h) or h - * if h1 in path and depths[h1] <= depths[n1]: + * if h in path and depths[entries.get(n, n)] < depths[n]: * entries[n] = h1 * * def dfs(n, depth): @@ -1819,7 +1869,7 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta * # Case A: explore succ and update cur's loop entry * # only if succ's entry is in current DFS path. * dfs(succ, depth + 1) - * h = get_loop_entry(succ) + * h = entries.get(succ, None) * update_loop_entry(n, h) * else: * # Case B or C depending on `h1 in path` check in update_loop_entry(). @@ -1831,46 +1881,49 @@ static bool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_sta * and cur's loop entry has to be updated (case A), handle this in * update_branch_counts(); * - use st->branch > 0 as a signal that st is in the current DFS path; - * - handle cases B and C in is_state_visited(); - * - update topmost loop entry for intermediate states in get_loop_entry(). + * - handle cases B and C in is_state_visited(). */ -static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_state *st) +static struct bpf_verifier_state *get_loop_entry(struct bpf_verifier_env *env, + struct bpf_verifier_state *st) { - struct bpf_verifier_state *topmost = st->loop_entry, *old; + struct bpf_verifier_state *topmost = st->loop_entry; + u32 steps = 0; - while (topmost && topmost->loop_entry && topmost != topmost->loop_entry) + while (topmost && topmost->loop_entry) { + if (steps++ > st->dfs_depth) { + WARN_ONCE(true, "verifier bug: infinite loop in get_loop_entry\n"); + verbose(env, "verifier bug: infinite loop in get_loop_entry()\n"); + return ERR_PTR(-EFAULT); + } topmost = topmost->loop_entry; - /* Update loop entries for intermediate states to avoid this - * traversal in future get_loop_entry() calls. - */ - while (st && st->loop_entry != topmost) { - old = st->loop_entry; - st->loop_entry = topmost; - st = old; } return topmost; } -static void update_loop_entry(struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr) +static void update_loop_entry(struct bpf_verifier_env *env, + struct bpf_verifier_state *cur, struct bpf_verifier_state *hdr) { - struct bpf_verifier_state *cur1, *hdr1; - - cur1 = get_loop_entry(cur) ?: cur; - hdr1 = get_loop_entry(hdr) ?: hdr; - /* The head1->branches check decides between cases B and C in - * comment for get_loop_entry(). If hdr1->branches == 0 then + /* The hdr->branches check decides between cases B and C in + * comment for get_loop_entry(). If hdr->branches == 0 then * head's topmost loop entry is not in current DFS path, * hence 'cur' and 'hdr' are not in the same loop and there is * no need to update cur->loop_entry. */ - if (hdr1->branches && hdr1->dfs_depth <= cur1->dfs_depth) { + if (hdr->branches && hdr->dfs_depth < (cur->loop_entry ?: cur)->dfs_depth) { + if (cur->loop_entry) { + cur->loop_entry->used_as_loop_entry--; + maybe_free_verifier_state(env, state_loop_entry_as_list(cur)); + } cur->loop_entry = hdr; - hdr->used_as_loop_entry = true; + hdr->used_as_loop_entry++; } } static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st) { + struct bpf_verifier_state_list *sl = NULL, *parent_sl; + struct bpf_verifier_state *parent; + while (st) { u32 br = --st->branches; @@ -1880,7 +1933,7 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi * This is a part of 'case A' in get_loop_entry() comment. */ if (br == 0 && st->parent && st->loop_entry) - update_loop_entry(st->parent, st->loop_entry); + update_loop_entry(env, st->parent, st->loop_entry); /* WARN_ON(br > 1) technically makes sense here, * but see comment in push_stack(), hence: @@ -1890,7 +1943,12 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi br); if (br) break; - st = st->parent; + parent = st->parent; + parent_sl = state_parent_as_list(st); + if (sl) + maybe_free_verifier_state(env, sl); + st = parent; + sl = parent_sl; } } @@ -8450,10 +8508,12 @@ static struct bpf_verifier_state *find_prev_entry(struct bpf_verifier_env *env, { struct bpf_verifier_state_list *sl; struct bpf_verifier_state *st; + struct list_head *pos, *head; /* Explored states are pushed in stack order, most recent states come first */ - sl = *explored_state(env, insn_idx); - for (; sl; sl = sl->next) { + head = explored_state(env, insn_idx); + list_for_each(pos, head) { + sl = container_of(pos, struct bpf_verifier_state_list, node); /* If st->branches != 0 state is a part of current DFS verification path, * hence cur & st for a loop. */ @@ -17862,18 +17922,22 @@ static void clean_verifier_state(struct bpf_verifier_env *env, static void clean_live_states(struct bpf_verifier_env *env, int insn, struct bpf_verifier_state *cur) { + struct bpf_verifier_state *loop_entry; struct bpf_verifier_state_list *sl; + struct list_head *pos, *head; - sl = *explored_state(env, insn); - while (sl) { + head = explored_state(env, insn); + list_for_each(pos, head) { + sl = container_of(pos, struct bpf_verifier_state_list, node); if (sl->state.branches) - goto next; + continue; + loop_entry = get_loop_entry(env, &sl->state); + if (!IS_ERR_OR_NULL(loop_entry) && loop_entry->branches) + continue; if (sl->state.insn_idx != insn || !same_callsites(&sl->state, cur)) - goto next; + continue; clean_verifier_state(env, &sl->state); -next: - sl = sl->next; } } @@ -18564,10 +18628,11 @@ static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { struct bpf_verifier_state_list *new_sl; - struct bpf_verifier_state_list *sl, **pprev; + struct bpf_verifier_state_list *sl; struct bpf_verifier_state *cur = env->cur_state, *new, *loop_entry; int i, j, n, err, states_cnt = 0; bool force_new_state, add_new_state, force_exact; + struct list_head *pos, *tmp, *head; force_new_state = env->test_state_freq || is_force_checkpoint(env, insn_idx) || /* Avoid accumulating infinitely long jmp history */ @@ -18586,15 +18651,14 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) env->insn_processed - env->prev_insn_processed >= 8) add_new_state = true; - pprev = explored_state(env, insn_idx); - sl = *pprev; - clean_live_states(env, insn_idx, cur); - while (sl) { + head = explored_state(env, insn_idx); + list_for_each_safe(pos, tmp, head) { + sl = container_of(pos, struct bpf_verifier_state_list, node); states_cnt++; if (sl->state.insn_idx != insn_idx) - goto next; + continue; if (sl->state.branches) { struct bpf_func_state *frame = sl->state.frame[sl->state.curframe]; @@ -18668,7 +18732,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) spi = __get_spi(iter_reg->off + iter_reg->var_off.value); iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr; if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) { - update_loop_entry(cur, &sl->state); + update_loop_entry(env, cur, &sl->state); goto hit; } } @@ -18677,7 +18741,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) if (is_may_goto_insn_at(env, insn_idx)) { if (sl->state.may_goto_depth != cur->may_goto_depth && states_equal(env, &sl->state, cur, RANGE_WITHIN)) { - update_loop_entry(cur, &sl->state); + update_loop_entry(env, cur, &sl->state); goto hit; } } @@ -18744,11 +18808,13 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * * Additional details are in the comment before get_loop_entry(). */ - loop_entry = get_loop_entry(&sl->state); + loop_entry = get_loop_entry(env, &sl->state); + if (IS_ERR(loop_entry)) + return PTR_ERR(loop_entry); force_exact = loop_entry && loop_entry->branches > 0; if (states_equal(env, &sl->state, cur, force_exact ? RANGE_WITHIN : NOT_EXACT)) { if (force_exact) - update_loop_entry(cur, loop_entry); + update_loop_entry(env, cur, loop_entry); hit: sl->hit_cnt++; /* reached equivalent register/stack state, @@ -18797,31 +18863,13 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) /* the state is unlikely to be useful. Remove it to * speed up verification */ - *pprev = sl->next; - if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE && - !sl->state.used_as_loop_entry) { - u32 br = sl->state.branches; - - WARN_ONCE(br, - "BUG live_done but branches_to_explore %d\n", - br); - free_verifier_state(&sl->state, false); - kfree(sl); - env->peak_states--; - } else { - /* cannot free this state, since parentage chain may - * walk it later. Add it for free_list instead to - * be freed at the end of verification - */ - sl->next = env->free_list; - env->free_list = sl; - } - sl = *pprev; - continue; + sl->in_free_list = true; + list_del(&sl->node); + list_add(&sl->node, &env->free_list); + env->free_list_size++; + env->explored_states_size--; + maybe_free_verifier_state(env, sl); } -next: - pprev = &sl->next; - sl = *pprev; } if (env->max_states_per_insn < states_cnt) @@ -18846,7 +18894,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) if (!new_sl) return -ENOMEM; env->total_states++; - env->peak_states++; + env->explored_states_size++; + update_peak_states(env); env->prev_jmps_processed = env->jmps_processed; env->prev_insn_processed = env->insn_processed; @@ -18870,8 +18919,8 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) cur->first_insn_idx = insn_idx; cur->insn_hist_start = cur->insn_hist_end; cur->dfs_depth = new->dfs_depth + 1; - new_sl->next = *explored_state(env, insn_idx); - *explored_state(env, insn_idx) = new_sl; + list_add(&new_sl->node, head); + /* connect new state to parentage chain. Current frame needs all * registers connected. Only r6 - r9 of the callers are alive (pushed * to the stack implicitly by JITs) so in callers' frames connect just @@ -19292,6 +19341,8 @@ static int do_check(struct bpf_verifier_env *env) return err; break; } else { + if (WARN_ON_ONCE(env->cur_state->loop_entry)) + env->cur_state->loop_entry = NULL; do_print_state = true; continue; } @@ -22193,31 +22244,29 @@ static int remove_fastcall_spills_fills(struct bpf_verifier_env *env) static void free_states(struct bpf_verifier_env *env) { - struct bpf_verifier_state_list *sl, *sln; + struct bpf_verifier_state_list *sl; + struct list_head *head, *pos, *tmp; int i; - sl = env->free_list; - while (sl) { - sln = sl->next; + list_for_each_safe(pos, tmp, &env->free_list) { + sl = container_of(pos, struct bpf_verifier_state_list, node); free_verifier_state(&sl->state, false); kfree(sl); - sl = sln; } - env->free_list = NULL; + INIT_LIST_HEAD(&env->free_list); if (!env->explored_states) return; for (i = 0; i < state_htab_size(env); i++) { - sl = env->explored_states[i]; + head = &env->explored_states[i]; - while (sl) { - sln = sl->next; + list_for_each_safe(pos, tmp, head) { + sl = container_of(pos, struct bpf_verifier_state_list, node); free_verifier_state(&sl->state, false); kfree(sl); - sl = sln; } - env->explored_states[i] = NULL; + INIT_LIST_HEAD(&env->explored_states[i]); } } @@ -23185,12 +23234,16 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 env->test_reg_invariants = attr->prog_flags & BPF_F_TEST_REG_INVARIANTS; env->explored_states = kvcalloc(state_htab_size(env), - sizeof(struct bpf_verifier_state_list *), + sizeof(struct list_head), GFP_USER); ret = -ENOMEM; if (!env->explored_states) goto skip_full_check; + for (i = 0; i < state_htab_size(env); i++) + INIT_LIST_HEAD(&env->explored_states[i]); + INIT_LIST_HEAD(&env->free_list); + ret = check_btf_info_early(env, attr, uattr); if (ret < 0) goto skip_full_check; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 190822b2f08b..427b72954b87 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -7,6 +7,8 @@ #include "bpf_misc.h" #include "bpf_compiler.h" +#define unlikely(x) __builtin_expect(!!(x), 0) + static volatile int zero = 0; int my_pid; @@ -1174,6 +1176,122 @@ __naked int loop_state_deps2(void) ); } +SEC("?raw_tp") +__failure +__msg("math between fp pointer and register with unbounded") +__flag(BPF_F_TEST_STATE_FREQ) +__naked int loop_state_deps3(void) +{ + /* This is equivalent to a C program below. + * + * if (random() != 24) { // assume false branch is placed first + * i = iter_new(); // fp[-8] + * while (iter_next(i)); + * iter_destroy(i); + * return; + * } + * + * for (i = 10; i > 0; i--); // increase dfs_depth for child states + * + * i = iter_new(); // fp[-8] + * b = -24; // r8 + * for (;;) { // checkpoint (L) + * if (iter_next(i)) // checkpoint (N) + * break; + * if (random() == 77) { // assume false branch is placed first + * *(u64 *)(r10 + b) = 7; // this is not safe when b == -25 + * iter_destroy(i); + * return; + * } + * if (random() == 42) { // assume false branch is placed first + * b = -25; + * } + * } + * iter_destroy(i); + * + * In case of a buggy verifier first loop might poison + * env->cur_state->loop_entry with a state having 0 branches + * and small dfs_depth. This would trigger NOT_EXACT states + * comparison for some states within second loop. + * Specifically, checkpoint (L) might be problematic if: + * - branch with '*(u64 *)(r10 + b) = 7' is not explored yet; + * - checkpoint (L) is first reached in state {b=-24}; + * - traversal is pruned at checkpoint (N) setting checkpoint's (L) + * branch count to 0, thus making it eligible for use in pruning; + * - checkpoint (L) is next reached in state {b=-25}, + * this would cause NOT_EXACT comparison with a state {b=-24} + * while 'b' is not marked precise yet. + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 == 24 goto 2f;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 5;" + "call %[bpf_iter_num_new];" + "1:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 != 0 goto 1b;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "2:" + /* loop to increase dfs_depth */ + "r0 = 10;" + "3:" + "r0 -= 1;" + "if r0 != 0 goto 3b;" + /* end of loop */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r8 = -24;" + "main_loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto main_loop_end_%=;" + /* first if */ + "call %[bpf_get_prandom_u32];" + "if r0 == 77 goto unsafe_write_%=;" + /* second if */ + "call %[bpf_get_prandom_u32];" + "if r0 == 42 goto poison_r8_%=;" + /* iterate */ + "goto main_loop_%=;" + "main_loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + + "unsafe_write_%=:" + "r0 = r10;" + "r0 += r8;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto main_loop_end_%=;" + + "poison_r8_%=:" + "r8 = -25;" + "goto main_loop_%=;" + : + : __imm(bpf_get_prandom_u32), + __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy) + : __clobber_all + ); +} + SEC("?raw_tp") __success __naked int triple_continue(void) @@ -1512,4 +1630,25 @@ int iter_destroy_bad_arg(const void *ctx) return 0; } +SEC("raw_tp") +__success +int clean_live_states(const void *ctx) +{ + char buf[1]; + int i, j, k, l, m, n, o; + + bpf_for(i, 0, 10) + bpf_for(j, 0, 10) + bpf_for(k, 0, 10) + bpf_for(l, 0, 10) + bpf_for(m, 0, 10) + bpf_for(n, 0, 10) + bpf_for(o, 0, 10) { + if (unlikely(bpf_get_prandom_u32())) + buf[0] = 42; + bpf_printk("%s", buf); + } + return 0; +} + char _license[] SEC("license") = "GPL";