diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst index 75344cd230e55..41efd8874eebc 100644 --- a/Documentation/bpf/cpumasks.rst +++ b/Documentation/bpf/cpumasks.rst @@ -117,12 +117,7 @@ For example: As mentioned and illustrated above, these ``struct bpf_cpumask *`` objects can also be stored in a map and used as kptrs. If a ``struct bpf_cpumask *`` is in a map, the reference can be removed from the map with bpf_kptr_xchg(), or -opportunistically acquired with bpf_cpumask_kptr_get(): - -.. kernel-doc:: kernel/bpf/cpumask.c - :identifiers: bpf_cpumask_kptr_get - -Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map: +opportunistically acquired using RCU: .. code-block:: c @@ -144,7 +139,7 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map: /** * A simple example tracepoint program showing how a * struct bpf_cpumask * kptr that is stored in a map can - * be acquired using the bpf_cpumask_kptr_get() kfunc. + * be passed to kfuncs using RCU protection. */ SEC("tp_btf/cgroup_mkdir") int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path) @@ -158,26 +153,21 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map: if (!v) return -ENOENT; + bpf_rcu_read_lock(); /* Acquire a reference to the bpf_cpumask * kptr that's already stored in the map. */ - kptr = bpf_cpumask_kptr_get(&v->cpumask); - if (!kptr) + kptr = v->cpumask; + if (!kptr) { /* If no bpf_cpumask was present in the map, it's because * we're racing with another CPU that removed it with * bpf_kptr_xchg() between the bpf_map_lookup_elem() - * above, and our call to bpf_cpumask_kptr_get(). - * bpf_cpumask_kptr_get() internally safely handles this - * race, and will return NULL if the cpumask is no longer - * present in the map by the time we invoke the kfunc. + * above, and our load of the pointer from the map. */ + bpf_rcu_read_unlock(); return -EBUSY; + } - /* Free the reference we just took above. Note that the - * original struct bpf_cpumask * kptr is still in the map. It will - * be freed either at a later time if another context deletes - * it from the map, or automatically by the BPF subsystem if - * it's still present when the map is destroyed. - */ - bpf_cpumask_release(kptr); + bpf_cpumask_setall(kptr); + bpf_rcu_read_unlock(); return 0; } diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index b6587ec40f1b8..db9da2194c1a7 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -9,6 +9,7 @@ /** * struct bpf_cpumask - refcounted BPF cpumask wrapper structure * @cpumask: The actual cpumask embedded in the struct. + * @rcu: The RCU head used to free the cpumask with RCU safety. * @usage: Object reference counter. When the refcount goes to 0, the * memory is released back to the BPF allocator, which provides * RCU safety. @@ -24,6 +25,7 @@ */ struct bpf_cpumask { cpumask_t cpumask; + struct rcu_head rcu; refcount_t usage; }; @@ -80,32 +82,14 @@ __bpf_kfunc struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) return cpumask; } -/** - * bpf_cpumask_kptr_get() - Attempt to acquire a reference to a BPF cpumask - * stored in a map. - * @cpumaskp: A pointer to a BPF cpumask map value. - * - * Attempts to acquire a reference to a BPF cpumask stored in a map value. The - * cpumask returned by this function must either be embedded in a map as a - * kptr, or freed with bpf_cpumask_release(). This function may return NULL if - * no BPF cpumask was found in the specified map value. - */ -__bpf_kfunc struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp) +static void cpumask_free_cb(struct rcu_head *head) { struct bpf_cpumask *cpumask; - /* The BPF memory allocator frees memory backing its caches in an RCU - * callback. Thus, we can safely use RCU to ensure that the cpumask is - * safe to read. - */ - rcu_read_lock(); - - cpumask = READ_ONCE(*cpumaskp); - if (cpumask && !refcount_inc_not_zero(&cpumask->usage)) - cpumask = NULL; - - rcu_read_unlock(); - return cpumask; + cpumask = container_of(head, struct bpf_cpumask, rcu); + migrate_disable(); + bpf_mem_cache_free(&bpf_cpumask_ma, cpumask); + migrate_enable(); } /** @@ -121,11 +105,8 @@ __bpf_kfunc void bpf_cpumask_release(struct bpf_cpumask *cpumask) if (!cpumask) return; - if (refcount_dec_and_test(&cpumask->usage)) { - migrate_disable(); - bpf_mem_cache_free(&bpf_cpumask_ma, cpumask); - migrate_enable(); - } + if (refcount_dec_and_test(&cpumask->usage)) + call_rcu(&cpumask->rcu, cpumask_free_cb); } /** @@ -426,7 +407,6 @@ BTF_SET8_START(cpumask_kfunc_btf_ids) BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_RCU) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 60793f793ca6e..15b5c5c729f99 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4599,6 +4599,7 @@ static bool in_rcu_cs(struct bpf_verifier_env *env) BTF_SET_START(rcu_protected_types) BTF_ID(struct, prog_test_ref_kfunc) BTF_ID(struct, cgroup) +BTF_ID(struct, bpf_cpumask) BTF_SET_END(rcu_protected_types) static bool rcu_protected_object(const struct btf *btf, u32 btf_id) diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index 5fbe457c4ebe3..cdf4acc18e4c5 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -16,7 +16,7 @@ static const char * const cpumask_success_testcases[] = { "test_copy_any_anyand", "test_insert_leave", "test_insert_remove_release", - "test_insert_kptr_get_release", + "test_global_mask_rcu", }; static void verify_success(const char *prog_name) diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index 65e5496ca1b22..0c5b785a93e45 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -9,6 +9,9 @@ int err; +#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) +private(MASK) static struct bpf_cpumask __kptr * global_mask; + struct __cpumask_map_value { struct bpf_cpumask __kptr * cpumask; }; @@ -23,7 +26,6 @@ struct array_map { struct bpf_cpumask *bpf_cpumask_create(void) __ksym; void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym; struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym; -struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumask) __ksym; u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym; u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; @@ -51,6 +53,9 @@ void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym u32 bpf_cpumask_any(const struct cpumask *src) __ksym; u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym; +void bpf_rcu_read_lock(void) __ksym; +void bpf_rcu_read_unlock(void) __ksym; + static inline const struct cpumask *cast(struct bpf_cpumask *cpumask) { return (const struct cpumask *)cpumask; diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index cfe83f0ef9e25..db4f94e72b615 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -95,35 +95,73 @@ int BPF_PROG(test_insert_remove_no_release, struct task_struct *task, u64 clone_ } SEC("tp_btf/task_newtask") -__failure __msg("Unreleased reference") -int BPF_PROG(test_kptr_get_no_release, struct task_struct *task, u64 clone_flags) +__failure __msg("NULL pointer passed to trusted arg0") +int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags) { - struct bpf_cpumask *cpumask; - struct __cpumask_map_value *v; + /* NULL passed to KF_TRUSTED_ARGS kfunc. */ + bpf_cpumask_empty(NULL); - cpumask = create_cpumask(); - if (!cpumask) + return 0; +} + +SEC("tp_btf/task_newtask") +__failure __msg("R2 must be a rcu pointer") +int BPF_PROG(test_global_mask_out_of_rcu, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local, *prev; + + local = create_cpumask(); + if (!local) return 0; - if (cpumask_map_insert(cpumask)) + prev = bpf_kptr_xchg(&global_mask, local); + if (prev) { + bpf_cpumask_release(prev); + err = 3; return 0; + } - v = cpumask_map_value_lookup(); - if (!v) + bpf_rcu_read_lock(); + local = global_mask; + if (!local) { + err = 4; + bpf_rcu_read_unlock(); return 0; + } - cpumask = bpf_cpumask_kptr_get(&v->cpumask); + bpf_rcu_read_unlock(); + + /* RCU region is exited before calling KF_RCU kfunc. */ + + bpf_cpumask_test_cpu(0, (const struct cpumask *)local); - /* cpumask is never released. */ return 0; } SEC("tp_btf/task_newtask") -__failure __msg("NULL pointer passed to trusted arg0") -int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags) +__failure __msg("NULL pointer passed to trusted arg1") +int BPF_PROG(test_global_mask_no_null_check, struct task_struct *task, u64 clone_flags) { - /* NULL passed to KF_TRUSTED_ARGS kfunc. */ - bpf_cpumask_empty(NULL); + struct bpf_cpumask *local, *prev; + + local = create_cpumask(); + if (!local) + return 0; + + prev = bpf_kptr_xchg(&global_mask, local); + if (prev) { + bpf_cpumask_release(prev); + err = 3; + return 0; + } + + bpf_rcu_read_lock(); + local = global_mask; + + /* No NULL check is performed on global cpumask kptr. */ + bpf_cpumask_test_cpu(0, (const struct cpumask *)local); + + bpf_rcu_read_unlock(); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 97ed08c4ff03c..2fcdd7f68ac7a 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -395,31 +395,34 @@ int BPF_PROG(test_insert_remove_release, struct task_struct *task, u64 clone_fla } SEC("tp_btf/task_newtask") -int BPF_PROG(test_insert_kptr_get_release, struct task_struct *task, u64 clone_flags) +int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags) { - struct bpf_cpumask *cpumask; - struct __cpumask_map_value *v; + struct bpf_cpumask *local, *prev; - cpumask = create_cpumask(); - if (!cpumask) + if (!is_test_task()) return 0; - if (cpumask_map_insert(cpumask)) { + local = create_cpumask(); + if (!local) + return 0; + + prev = bpf_kptr_xchg(&global_mask, local); + if (prev) { + bpf_cpumask_release(prev); err = 3; return 0; } - v = cpumask_map_value_lookup(); - if (!v) { + bpf_rcu_read_lock(); + local = global_mask; + if (!local) { err = 4; + bpf_rcu_read_unlock(); return 0; } - cpumask = bpf_cpumask_kptr_get(&v->cpumask); - if (cpumask) - bpf_cpumask_release(cpumask); - else - err = 5; + bpf_cpumask_test_cpu(0, (const struct cpumask *)local); + bpf_rcu_read_unlock(); return 0; }