From 1500a5d9f49cb66906d3ea1c9158df25cc41dd40 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:34 -0800 Subject: [PATCH 1/6] bpf: Add KF_RCU flag to bpf_refcount_acquire_impl Refcounted local kptrs are kptrs to user-defined types with a bpf_refcount field. Recent commits ([0], [1]) modified the lifetime of refcounted local kptrs such that the underlying memory is not reused until RCU grace period has elapsed. Separately, verification of bpf_refcount_acquire calls currently succeeds for MAYBE_NULL non-owning reference input, which is a problem as bpf_refcount_acquire_impl has no handling for this case. This patch takes advantage of aforementioned lifetime changes to tag bpf_refcount_acquire_impl kfunc KF_RCU, thereby preventing MAYBE_NULL input to the kfunc. The KF_RCU flag applies to all kfunc params; it's fine for it to apply to the void *meta__ign param as that's populated by the verifier and is tagged __ign regardless. [0]: commit 7e26cd12ad1c ("bpf: Use bpf_mem_free_rcu when bpf_obj_dropping refcounted nodes") is the actual change to allocation behaivor [1]: commit 0816b8c6bf7f ("bpf: Consider non-owning refs to refcounted nodes RCU protected") modified verifier understanding of refcounted local kptrs to match [0]'s changes Signed-off-by: Dave Marchevsky Fixes: 7c50b1cb76ac ("bpf: Add bpf_refcount_acquire kfunc") Link: https://lore.kernel.org/r/20231107085639.3016113-2-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 174f02a9e7034..b84d8cb8d2395 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2520,7 +2520,7 @@ BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE) BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE) -BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL | KF_RCU) BTF_ID_FLAGS(func, bpf_list_push_front_impl) BTF_ID_FLAGS(func, bpf_list_push_back_impl) BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL) From f460e7bdb027d1da93f0c5090b239889cd46a33d Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:35 -0800 Subject: [PATCH 2/6] selftests/bpf: Add test passing MAYBE_NULL reg to bpf_refcount_acquire The test added in this patch exercises the logic fixed in the previous patch in this series. Before the previous patch's changes, bpf_refcount_acquire accepts MAYBE_NULL local kptrs; after the change the verifier correctly rejects the such a call. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20231107085639.3016113-3-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- .../bpf/progs/refcounted_kptr_fail.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index 1ef07f6ee580d..1553b9c16aa7f 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -53,6 +53,25 @@ long rbtree_refcounted_node_ref_escapes(void *ctx) return 0; } +SEC("?tc") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +long refcount_acquire_maybe_null(void *ctx) +{ + struct node_acquire *n, *m; + + n = bpf_obj_new(typeof(*n)); + /* Intentionally not testing !n + * it's MAYBE_NULL for refcount_acquire + */ + m = bpf_refcount_acquire(n); + if (m) + bpf_obj_drop(m); + if (n) + bpf_obj_drop(n); + + return 0; +} + SEC("?tc") __failure __msg("Unreleased reference id=3 alloc_insn=9") long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) From 649924b76ab151a96bdd22a97a993fb0421f134c Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:36 -0800 Subject: [PATCH 3/6] bpf: Use bpf_mem_free_rcu when bpf_obj_dropping non-refcounted nodes The use of bpf_mem_free_rcu to free refcounted local kptrs was added in commit 7e26cd12ad1c ("bpf: Use bpf_mem_free_rcu when bpf_obj_dropping refcounted nodes"). In the cover letter for the series containing that patch [0] I commented: Perhaps it makes sense to move to mem_free_rcu for _all_ non-owning refs in the future, not just refcounted. This might allow custom non-owning ref lifetime + invalidation logic to be entirely subsumed by MEM_RCU handling. IMO this needs a bit more thought and should be tackled outside of a fix series, so it's not attempted here. It's time to start moving in the "non-owning refs have MEM_RCU lifetime" direction. As mentioned in that comment, using bpf_mem_free_rcu for all local kptrs - not just refcounted - is necessarily the first step towards that goal. This patch does so. After this patch the memory pointed to by all local kptrs will not be reused until RCU grace period elapses. The verifier's understanding of non-owning ref validity and the clobbering logic it uses to enforce that understanding are not changed here, that'll happen gradually in future work, including further patches in the series. [0]: https://lore.kernel.org/all/20230821193311.3290257-1-davemarchevsky@fb.com/ Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20231107085639.3016113-4-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/helpers.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index b84d8cb8d2395..03517db5cfb36 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1937,10 +1937,7 @@ void __bpf_obj_drop_impl(void *p, const struct btf_record *rec, bool percpu) ma = &bpf_global_percpu_ma; else ma = &bpf_global_ma; - if (rec && rec->refcount_off >= 0) - bpf_mem_free_rcu(ma, p); - else - bpf_mem_free(ma, p); + bpf_mem_free_rcu(ma, p); } __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign) From 790ce3cfefb1b768dccd4eee324ddef0f0ce3db4 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:37 -0800 Subject: [PATCH 4/6] bpf: Move GRAPH_{ROOT,NODE}_MASK macros into btf_field_type enum This refactoring patch removes the unused BPF_GRAPH_NODE_OR_ROOT btf_field_type and moves BPF_GRAPH_{NODE,ROOT} macros into the btf_field_type enum. Further patches in the series will use BPF_GRAPH_NODE, so let's move this useful definition out of btf.c. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20231107085639.3016113-5-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 4 ++-- kernel/bpf/btf.c | 11 ++++------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index eb84caf133df9..4001d11be1516 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -186,8 +186,8 @@ enum btf_field_type { BPF_LIST_NODE = (1 << 6), BPF_RB_ROOT = (1 << 7), BPF_RB_NODE = (1 << 8), - BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD | - BPF_RB_NODE | BPF_RB_ROOT, + BPF_GRAPH_NODE = BPF_RB_NODE | BPF_LIST_NODE, + BPF_GRAPH_ROOT = BPF_RB_ROOT | BPF_LIST_HEAD, BPF_REFCOUNT = (1 << 9), }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 15d71d2986d3a..63cf4128fc059 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3840,9 +3840,6 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type return ERR_PTR(ret); } -#define GRAPH_ROOT_MASK (BPF_LIST_HEAD | BPF_RB_ROOT) -#define GRAPH_NODE_MASK (BPF_LIST_NODE | BPF_RB_NODE) - int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) { int i; @@ -3855,13 +3852,13 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) * Hence we only need to ensure that bpf_{list_head,rb_root} ownership * does not form cycles. */ - if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & GRAPH_ROOT_MASK)) + if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & BPF_GRAPH_ROOT)) return 0; for (i = 0; i < rec->cnt; i++) { struct btf_struct_meta *meta; u32 btf_id; - if (!(rec->fields[i].type & GRAPH_ROOT_MASK)) + if (!(rec->fields[i].type & BPF_GRAPH_ROOT)) continue; btf_id = rec->fields[i].graph_root.value_btf_id; meta = btf_find_struct_meta(btf, btf_id); @@ -3873,7 +3870,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) * to check ownership cycle for a type unless it's also a * node type. */ - if (!(rec->field_mask & GRAPH_NODE_MASK)) + if (!(rec->field_mask & BPF_GRAPH_NODE)) continue; /* We need to ensure ownership acyclicity among all types. The @@ -3909,7 +3906,7 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec) * - A is both an root and node. * - B is only an node. */ - if (meta->record->field_mask & GRAPH_ROOT_MASK) + if (meta->record->field_mask & BPF_GRAPH_ROOT) return -ELOOP; } return 0; From 1b12171533a9bb23cf6fba7262b479028b65e1e8 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:38 -0800 Subject: [PATCH 5/6] bpf: Mark direct ld of stashed bpf_{rb,list}_node as non-owning ref This patch enables the following pattern: /* mapval contains a __kptr pointing to refcounted local kptr */ mapval = bpf_map_lookup_elem(&map, &idx); if (!mapval || !mapval->some_kptr) { /* omitted */ } p = bpf_refcount_acquire(&mapval->some_kptr); Currently this doesn't work because bpf_refcount_acquire expects an owning or non-owning ref. The verifier defines non-owning ref as a type: PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF while mapval->some_kptr is PTR_TO_BTF_ID | PTR_UNTRUSTED. It's possible to do the refcount_acquire by first bpf_kptr_xchg'ing mapval->some_kptr into a temp kptr, refcount_acquiring that, and xchg'ing back into mapval, but this is unwieldy and shouldn't be necessary. This patch modifies btf_ld_kptr_type such that user-allocated types are marked MEM_ALLOC and if those types have a bpf_{rb,list}_node they're marked NON_OWN_REF as well. Additionally, due to changes to bpf_obj_drop_impl earlier in this series, rcu_protected_object now returns true for all user-allocated types, resulting in mapval->some_kptr being marked MEM_RCU. After this patch's changes, mapval->some_kptr is now: PTR_TO_BTF_ID | MEM_ALLOC | NON_OWN_REF | MEM_RCU which results in it passing the non-owning ref test, and the motivating example passing verification. Future work will likely get rid of special non-owning ref lifetime logic in the verifier, at which point we'll be able to delete the NON_OWN_REF flag entirely. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20231107085639.3016113-6-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 993e4677bbe99..9ae6eae134716 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5557,10 +5557,23 @@ BTF_SET_END(rcu_protected_types) static bool rcu_protected_object(const struct btf *btf, u32 btf_id) { if (!btf_is_kernel(btf)) - return false; + return true; return btf_id_set_contains(&rcu_protected_types, btf_id); } +static struct btf_record *kptr_pointee_btf_record(struct btf_field *kptr_field) +{ + struct btf_struct_meta *meta; + + if (btf_is_kernel(kptr_field->kptr.btf)) + return NULL; + + meta = btf_find_struct_meta(kptr_field->kptr.btf, + kptr_field->kptr.btf_id); + + return meta ? meta->record : NULL; +} + static bool rcu_safe_kptr(const struct btf_field *field) { const struct btf_field_kptr *kptr = &field->kptr; @@ -5571,12 +5584,25 @@ static bool rcu_safe_kptr(const struct btf_field *field) static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field) { + struct btf_record *rec; + u32 ret; + + ret = PTR_MAYBE_NULL; if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) { - if (kptr_field->type != BPF_KPTR_PERCPU) - return PTR_MAYBE_NULL | MEM_RCU; - return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU; + ret |= MEM_RCU; + if (kptr_field->type == BPF_KPTR_PERCPU) + ret |= MEM_PERCPU; + else if (!btf_is_kernel(kptr_field->kptr.btf)) + ret |= MEM_ALLOC; + + rec = kptr_pointee_btf_record(kptr_field); + if (rec && btf_record_has_field(rec, BPF_GRAPH_NODE)) + ret |= NON_OWN_REF; + } else { + ret |= PTR_UNTRUSTED; } - return PTR_MAYBE_NULL | PTR_UNTRUSTED; + + return ret; } static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno, From e9ed8df7187cfdce1075d0ee591544ac15d072f1 Mon Sep 17 00:00:00 2001 From: Dave Marchevsky Date: Tue, 7 Nov 2023 00:56:39 -0800 Subject: [PATCH 6/6] selftests/bpf: Test bpf_refcount_acquire of node obtained via direct ld This patch demonstrates that verifier changes earlier in this series result in bpf_refcount_acquire(mapval->stashed_kptr) passing verification. The added test additionally validates that stashing a kptr in mapval and - in a separate BPF program - refcount_acquiring the kptr without unstashing works as expected at runtime. Signed-off-by: Dave Marchevsky Link: https://lore.kernel.org/r/20231107085639.3016113-7-davemarchevsky@fb.com Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/local_kptr_stash.c | 33 +++++++++ .../selftests/bpf/progs/local_kptr_stash.c | 71 +++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c index b25b870f87ba9..e6e50a394472c 100644 --- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c @@ -73,6 +73,37 @@ static void test_local_kptr_stash_unstash(void) local_kptr_stash__destroy(skel); } +static void test_refcount_acquire_without_unstash(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash), + &opts); + ASSERT_OK(ret, "refcount_acquire_without_unstash run"); + ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts); + ASSERT_OK(ret, "stash_refcounted_node run"); + ASSERT_OK(opts.retval, "stash_refcounted_node retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash), + &opts); + ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run"); + ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval"); + + local_kptr_stash__destroy(skel); +} + static void test_local_kptr_stash_fail(void) { RUN_TESTS(local_kptr_stash_fail); @@ -86,6 +117,8 @@ void test_local_kptr_stash(void) test_local_kptr_stash_plain(); if (test__start_subtest("local_kptr_stash_unstash")) test_local_kptr_stash_unstash(); + if (test__start_subtest("refcount_acquire_without_unstash")) + test_refcount_acquire_without_unstash(); if (test__start_subtest("local_kptr_stash_fail")) test_local_kptr_stash_fail(); } diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index b567a666d2b87..1769fdff6aeae 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -14,6 +14,24 @@ struct node_data { struct bpf_rb_node node; }; +struct refcounted_node { + long data; + struct bpf_rb_node rb_node; + struct bpf_refcount refcount; +}; + +struct stash { + struct bpf_spin_lock l; + struct refcounted_node __kptr *stashed; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct stash); + __uint(max_entries, 10); +} refcounted_node_stash SEC(".maps"); + struct plain_local { long key; long data; @@ -38,6 +56,7 @@ struct map_value { * Had to do the same w/ bpf_kfunc_call_test_release below */ struct node_data *just_here_because_btf_bug; +struct refcounted_node *just_here_because_btf_bug2; struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -132,4 +151,56 @@ long stash_test_ref_kfunc(void *ctx) return 0; } +SEC("tc") +long refcount_acquire_without_unstash(void *ctx) +{ + struct refcounted_node *p; + struct stash *s; + int ret = 0; + + s = bpf_map_lookup_elem(&refcounted_node_stash, &ret); + if (!s) + return 1; + + if (!s->stashed) + /* refcount_acquire failure is expected when no refcounted_node + * has been stashed before this program executes + */ + return 2; + + p = bpf_refcount_acquire(s->stashed); + if (!p) + return 3; + + ret = s->stashed ? s->stashed->data : -1; + bpf_obj_drop(p); + return ret; +} + +/* Helper for refcount_acquire_without_unstash test */ +SEC("tc") +long stash_refcounted_node(void *ctx) +{ + struct refcounted_node *p; + struct stash *s; + int key = 0; + + s = bpf_map_lookup_elem(&refcounted_node_stash, &key); + if (!s) + return 1; + + p = bpf_obj_new(typeof(*p)); + if (!p) + return 2; + p->data = 42; + + p = bpf_kptr_xchg(&s->stashed, p); + if (p) { + bpf_obj_drop(p); + return 3; + } + + return 0; +} + char _license[] SEC("license") = "GPL";