Skip to content

Commit

Permalink
Merge branch 'BPF_and_RT'
Browse files Browse the repository at this point in the history
Thomas Gleixner says:

====================
This is the third version of the BPF/RT patch set which makes both coexist
nicely. The long explanation can be found in the cover letter of the V1
submission:

  https://lore.kernel.org/r/20200214133917.304937432@linutronix.de

V2 is here:

  https://lore.kernel.org/r/20200220204517.863202864@linutronix.de

The following changes vs. V2 have been made:

  - Rebased to bpf-next, adjusted to the lock changes in the hashmap code.

  - Split the preallocation enforcement patch for instrumentation type BPF
    programs into two pieces:

    1) Emit a one-time warning on !RT kernels when any instrumentation type
       BPF program uses run-time allocation. Emit also a corresponding
       warning in the verifier log. But allow the program to run for
       backward compatibility sake. After a grace period this should be
       enforced.

    2) On RT reject such programs because on RT the memory allocator cannot
       be called from truly atomic contexts.

  - Fixed the fallout from V2 as reported by Alexei and 0-day

  - Removed the redundant preempt_disable() from trace_call_bpf()

  - Removed the unused export of trace_call_bpf()
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Feb 25, 2020
2 parents 8eece07 + 099bfaa commit 80a836c
Show file tree
Hide file tree
Showing 18 changed files with 283 additions and 142 deletions.
38 changes: 34 additions & 4 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -885,7 +885,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *_prog; \
struct bpf_prog_array *_array; \
u32 _ret = 1; \
preempt_disable(); \
migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
if (unlikely(check_non_null && !_array))\
Expand All @@ -898,7 +898,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
} \
_out: \
rcu_read_unlock(); \
preempt_enable(); \
migrate_enable(); \
_ret; \
})

Expand Down Expand Up @@ -932,7 +932,7 @@ _out: \
u32 ret; \
u32 _ret = 1; \
u32 _cn = 0; \
preempt_disable(); \
migrate_disable(); \
rcu_read_lock(); \
_array = rcu_dereference(array); \
_item = &_array->items[0]; \
Expand All @@ -944,7 +944,7 @@ _out: \
_item++; \
} \
rcu_read_unlock(); \
preempt_enable(); \
migrate_enable(); \
if (_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \
Expand All @@ -961,6 +961,36 @@ _out: \
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);

/*
* Block execution of BPF programs attached to instrumentation (perf,
* kprobes, tracepoints) to prevent deadlocks on map operations as any of
* these events can happen inside a region which holds a map bucket lock
* and can deadlock on it.
*
* Use the preemption safe inc/dec variants on RT because migrate disable
* is preemptible on RT and preemption in the middle of the RMW operation
* might lead to inconsistent state. Use the raw variants for non RT
* kernels as migrate_disable() maps to preempt_disable() so the slightly
* more expensive save operation can be avoided.
*/
static inline void bpf_disable_instrumentation(void)
{
migrate_disable();
if (IS_ENABLED(CONFIG_PREEMPT_RT))
this_cpu_inc(bpf_prog_active);
else
__this_cpu_inc(bpf_prog_active);
}

static inline void bpf_enable_instrumentation(void)
{
if (IS_ENABLED(CONFIG_PREEMPT_RT))
this_cpu_dec(bpf_prog_active);
else
__this_cpu_dec(bpf_prog_active);
migrate_enable();
}

extern const struct file_operations bpf_map_fops;
extern const struct file_operations bpf_prog_fops;

Expand Down
37 changes: 29 additions & 8 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);

#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \
u32 ret; \
cant_sleep(); \
cant_migrate(); \
if (static_branch_unlikely(&bpf_stats_enabled_key)) { \
struct bpf_prog_stats *stats; \
u64 start = sched_clock(); \
Expand All @@ -576,8 +576,30 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
} \
ret; })

#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \
bpf_dispatcher_nopfunc)
#define BPF_PROG_RUN(prog, ctx) \
__BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc)

/*
* Use in preemptible and therefore migratable context to make sure that
* the execution of the BPF program runs on one CPU.
*
* This uses migrate_disable/enable() explicitly to document that the
* invocation of a BPF program does not require reentrancy protection
* against a BPF program which is invoked from a preempting task.
*
* For non RT enabled kernels migrate_disable/enable() maps to
* preempt_disable/enable(), i.e. it disables also preemption.
*/
static inline u32 bpf_prog_run_pin_on_cpu(const struct bpf_prog *prog,
const void *ctx)
{
u32 ret;

migrate_disable();
ret = __BPF_PROG_RUN(prog, ctx, bpf_dispatcher_nopfunc);
migrate_enable();
return ret;
}

#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN

Expand Down Expand Up @@ -655,6 +677,7 @@ static inline u8 *bpf_skb_cb(struct sk_buff *skb)
return qdisc_skb_cb(skb)->data;
}

/* Must be invoked with migration disabled */
static inline u32 __bpf_prog_run_save_cb(const struct bpf_prog *prog,
struct sk_buff *skb)
{
Expand All @@ -680,9 +703,9 @@ static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
{
u32 res;

preempt_disable();
migrate_disable();
res = __bpf_prog_run_save_cb(prog, skb);
preempt_enable();
migrate_enable();
return res;
}

Expand All @@ -695,9 +718,7 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
if (unlikely(prog->cb_access))
memset(cb_data, 0, BPF_SKB_CB_LEN);

preempt_disable();
res = BPF_PROG_RUN(prog, skb);
preempt_enable();
res = bpf_prog_run_pin_on_cpu(prog, skb);
return res;
}

Expand Down
Loading

0 comments on commit 80a836c

Please sign in to comment.