Skip to content

Commit

Permalink
Merge branch 'introduce-bpf_preempt_-disable-enable'
Browse files Browse the repository at this point in the history
Kumar Kartikeya Dwivedi says:

====================
Introduce bpf_preempt_{disable,enable}

This set introduces two kfuncs, bpf_preempt_disable and
bpf_preempt_enable, which are wrappers around preempt_disable and
preempt_enable in the kernel. These functions allow a BPF program to
have code sections where preemption is disabled. There are multiple use
cases that are served by such a feature, a few are listed below:

1. Writing safe per-CPU alogrithms/data structures that work correctly
   across different contexts.
2. Writing safe per-CPU allocators similar to bpf_memalloc on top of
   array/arena memory blobs.
3. Writing locking algorithms in BPF programs natively.

Note that local_irq_disable/enable equivalent is also needed for proper
IRQ context protection, but that is a more involved change and will be
sent later.

While bpf_preempt_{disable,enable} is not sufficient for all of these
usage scenarios on its own, it is still necessary.

The same effect as these kfuncs can in some sense be already achieved
using the bpf_spin_lock or rcu_read_lock APIs, therefore from the
standpoint of kernel functionality exposure in the verifier, this is
well understood territory.

Note that these helpers do allow calling kernel helpers and kfuncs from
within the non-preemptible region (unless sleepable). Otherwise, any
locks built using the preemption helpers will be as limited as
existing bpf_spin_lock.

Nesting is allowed by keeping a counter for tracking remaining enables
required to be performed. Similar approach can be applied to
rcu_read_locks in a follow up.

Changelog
=========
v1: https://lore.kernel.org/bpf/20240423061922.2295517-1-memxor@gmail.com

 * Move kfunc BTF ID declerations above css task kfunc for
   !CONFIG_CGROUPS config (Alexei)
 * Add test case for global function call in non-preemptible region
   (Jiri)
====================

Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20240424031315.2757363-1-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Apr 24, 2024
2 parents dc92feb + 3134396 commit 55d30cc
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 2 deletions.
1 change: 1 addition & 0 deletions include/linux/bpf_verifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,7 @@ struct bpf_verifier_state {
struct bpf_active_lock active_lock;
bool speculative;
bool active_rcu_lock;
u32 active_preempt_lock;
/* If this state was ever pointed-to by other state's loop_entry field
* this flag would be set to true. Used to avoid freeing such states
* while they are still in use.
Expand Down
12 changes: 12 additions & 0 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -2734,6 +2734,16 @@ __bpf_kfunc int bpf_wq_set_callback_impl(struct bpf_wq *wq,
return __bpf_async_set_callback(async, callback_fn, aux, flags, BPF_ASYNC_TYPE_WQ);
}

__bpf_kfunc void bpf_preempt_disable(void)
{
preempt_disable();
}

__bpf_kfunc void bpf_preempt_enable(void)
{
preempt_enable();
}

__bpf_kfunc_end_defs();

BTF_KFUNCS_START(generic_btf_ids)
Expand Down Expand Up @@ -2814,6 +2824,8 @@ BTF_ID_FLAGS(func, bpf_modify_return_test_tp)
BTF_ID_FLAGS(func, bpf_wq_init)
BTF_ID_FLAGS(func, bpf_wq_set_callback_impl)
BTF_ID_FLAGS(func, bpf_wq_start)
BTF_ID_FLAGS(func, bpf_preempt_disable)
BTF_ID_FLAGS(func, bpf_preempt_enable)
BTF_KFUNCS_END(common_btf_ids)

static const struct btf_kfunc_id_set common_kfunc_set = {
Expand Down
71 changes: 69 additions & 2 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -1434,6 +1434,7 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state,
}
dst_state->speculative = src->speculative;
dst_state->active_rcu_lock = src->active_rcu_lock;
dst_state->active_preempt_lock = src->active_preempt_lock;
dst_state->in_sleepable = src->in_sleepable;
dst_state->curframe = src->curframe;
dst_state->active_lock.ptr = src->active_lock.ptr;
Expand Down Expand Up @@ -9599,6 +9600,13 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EINVAL;
}

/* Only global subprogs cannot be called with preemption disabled. */
if (env->cur_state->active_preempt_lock) {
verbose(env, "global function calls are not allowed with preemption disabled,\n"
"use static function instead\n");
return -EINVAL;
}

if (err) {
verbose(env, "Caller passes invalid args into func#%d ('%s')\n",
subprog, sub_name);
Expand Down Expand Up @@ -10285,6 +10293,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
}

if (env->cur_state->active_preempt_lock) {
if (fn->might_sleep) {
verbose(env, "sleepable helper %s#%d in non-preemptible region\n",
func_id_name(func_id), func_id);
return -EINVAL;
}

if (in_sleepable(env) && is_storage_get_function(func_id))
env->insn_aux_data[insn_idx].storage_get_func_atomic = true;
}

meta.func_id = func_id;
/* check args */
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
Expand Down Expand Up @@ -11027,6 +11046,8 @@ enum special_kfunc_type {
KF_bpf_percpu_obj_drop_impl,
KF_bpf_throw,
KF_bpf_wq_set_callback_impl,
KF_bpf_preempt_disable,
KF_bpf_preempt_enable,
KF_bpf_iter_css_task_new,
};

Expand Down Expand Up @@ -11081,6 +11102,8 @@ BTF_ID(func, bpf_percpu_obj_new_impl)
BTF_ID(func, bpf_percpu_obj_drop_impl)
BTF_ID(func, bpf_throw)
BTF_ID(func, bpf_wq_set_callback_impl)
BTF_ID(func, bpf_preempt_disable)
BTF_ID(func, bpf_preempt_enable)
#ifdef CONFIG_CGROUPS
BTF_ID(func, bpf_iter_css_task_new)
#else
Expand All @@ -11107,6 +11130,16 @@ static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
}

static bool is_kfunc_bpf_preempt_disable(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->func_id == special_kfunc_list[KF_bpf_preempt_disable];
}

static bool is_kfunc_bpf_preempt_enable(struct bpf_kfunc_call_arg_meta *meta)
{
return meta->func_id == special_kfunc_list[KF_bpf_preempt_enable];
}

static enum kfunc_ptr_arg_type
get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
struct bpf_kfunc_call_arg_meta *meta,
Expand Down Expand Up @@ -12195,11 +12228,11 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
int *insn_idx_p)
{
const struct btf_type *t, *ptr_type;
bool sleepable, rcu_lock, rcu_unlock, preempt_disable, preempt_enable;
u32 i, nargs, ptr_type_id, release_ref_obj_id;
struct bpf_reg_state *regs = cur_regs(env);
const char *func_name, *ptr_type_name;
bool sleepable, rcu_lock, rcu_unlock;
const struct btf_type *t, *ptr_type;
struct bpf_kfunc_call_arg_meta meta;
struct bpf_insn_aux_data *insn_aux;
int err, insn_idx = *insn_idx_p;
Expand Down Expand Up @@ -12260,6 +12293,9 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
rcu_lock = is_kfunc_bpf_rcu_read_lock(&meta);
rcu_unlock = is_kfunc_bpf_rcu_read_unlock(&meta);

preempt_disable = is_kfunc_bpf_preempt_disable(&meta);
preempt_enable = is_kfunc_bpf_preempt_enable(&meta);

if (env->cur_state->active_rcu_lock) {
struct bpf_func_state *state;
struct bpf_reg_state *reg;
Expand Down Expand Up @@ -12292,6 +12328,22 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
return -EINVAL;
}

if (env->cur_state->active_preempt_lock) {
if (preempt_disable) {
env->cur_state->active_preempt_lock++;
} else if (preempt_enable) {
env->cur_state->active_preempt_lock--;
} else if (sleepable) {
verbose(env, "kernel func %s is sleepable within non-preemptible region\n", func_name);
return -EACCES;
}
} else if (preempt_disable) {
env->cur_state->active_preempt_lock++;
} else if (preempt_enable) {
verbose(env, "unmatched attempt to enable preemption (kernel function %s)\n", func_name);
return -EINVAL;
}

/* In case of release function, we get register number of refcounted
* PTR_TO_BTF_ID in bpf_kfunc_arg_meta, do the release now.
*/
Expand Down Expand Up @@ -15439,6 +15491,11 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EINVAL;
}

if (env->cur_state->active_preempt_lock) {
verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_preempt_disable-ed region\n");
return -EINVAL;
}

if (regs[ctx_reg].type != PTR_TO_CTX) {
verbose(env,
"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
Expand Down Expand Up @@ -17006,6 +17063,9 @@ static bool states_equal(struct bpf_verifier_env *env,
if (old->active_rcu_lock != cur->active_rcu_lock)
return false;

if (old->active_preempt_lock != cur->active_preempt_lock)
return false;

if (old->in_sleepable != cur->in_sleepable)
return false;

Expand Down Expand Up @@ -17957,6 +18017,13 @@ static int do_check(struct bpf_verifier_env *env)
return -EINVAL;
}

if (env->cur_state->active_preempt_lock && !env->cur_state->curframe) {
verbose(env, "%d bpf_preempt_enable%s missing\n",
env->cur_state->active_preempt_lock,
env->cur_state->active_preempt_lock == 1 ? " is" : "(s) are");
return -EINVAL;
}

/* We must do check_reference_leak here before
* prepare_func_exit to handle the case when
* state->curframe > 0, it may be a callback
Expand Down
9 changes: 9 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/preempt_lock.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
#include <preempt_lock.skel.h>

void test_preempt_lock(void)
{
RUN_TESTS(preempt_lock);
}
135 changes: 135 additions & 0 deletions tools/testing/selftests/bpf/progs/preempt_lock.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// SPDX-License-Identifier: GPL-2.0
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"

void bpf_preempt_disable(void) __ksym;
void bpf_preempt_enable(void) __ksym;

SEC("?tc")
__failure __msg("1 bpf_preempt_enable is missing")
int preempt_lock_missing_1(struct __sk_buff *ctx)
{
bpf_preempt_disable();
return 0;
}

SEC("?tc")
__failure __msg("2 bpf_preempt_enable(s) are missing")
int preempt_lock_missing_2(struct __sk_buff *ctx)
{
bpf_preempt_disable();
bpf_preempt_disable();
return 0;
}

SEC("?tc")
__failure __msg("3 bpf_preempt_enable(s) are missing")
int preempt_lock_missing_3(struct __sk_buff *ctx)
{
bpf_preempt_disable();
bpf_preempt_disable();
bpf_preempt_disable();
return 0;
}

SEC("?tc")
__failure __msg("1 bpf_preempt_enable is missing")
int preempt_lock_missing_3_minus_2(struct __sk_buff *ctx)
{
bpf_preempt_disable();
bpf_preempt_disable();
bpf_preempt_disable();
bpf_preempt_enable();
bpf_preempt_enable();
return 0;
}

static __noinline void preempt_disable(void)
{
bpf_preempt_disable();
}

static __noinline void preempt_enable(void)
{
bpf_preempt_enable();
}

SEC("?tc")
__failure __msg("1 bpf_preempt_enable is missing")
int preempt_lock_missing_1_subprog(struct __sk_buff *ctx)
{
preempt_disable();
return 0;
}

SEC("?tc")
__failure __msg("2 bpf_preempt_enable(s) are missing")
int preempt_lock_missing_2_subprog(struct __sk_buff *ctx)
{
preempt_disable();
preempt_disable();
return 0;
}

SEC("?tc")
__failure __msg("1 bpf_preempt_enable is missing")
int preempt_lock_missing_2_minus_1_subprog(struct __sk_buff *ctx)
{
preempt_disable();
preempt_disable();
preempt_enable();
return 0;
}

static __noinline void preempt_balance_subprog(void)
{
preempt_disable();
preempt_enable();
}

SEC("?tc")
__success int preempt_balance(struct __sk_buff *ctx)
{
bpf_preempt_disable();
bpf_preempt_enable();
return 0;
}

SEC("?tc")
__success int preempt_balance_subprog_test(struct __sk_buff *ctx)
{
preempt_balance_subprog();
return 0;
}

SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
__failure __msg("sleepable helper bpf_copy_from_user#")
int preempt_sleepable_helper(void *ctx)
{
u32 data;

bpf_preempt_disable();
bpf_copy_from_user(&data, sizeof(data), NULL);
bpf_preempt_enable();
return 0;
}

int __noinline preempt_global_subprog(void)
{
preempt_balance_subprog();
return 0;
}

SEC("?tc")
__failure __msg("global function calls are not allowed with preemption disabled")
int preempt_global_subprog_test(struct __sk_buff *ctx)
{
preempt_disable();
preempt_global_subprog();
preempt_enable();
return 0;
}

char _license[] SEC("license") = "GPL";

0 comments on commit 55d30cc

Please sign in to comment.