Skip to content

Commit

Permalink
Merge branch 'bpf-jit-cleanups'
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
This series follows up mostly with with some minor cleanups on top
of 'Move ld_abs/ld_ind to native BPF' as well as implements better
32/64 bit immediate load into register and saves tail call init on
cBPF for the arm64 JIT. Last but not least we add a couple of test
cases. For details please see individual patches. Thanks!

v1 -> v2:
  - Minor fix in i64_i16_blocks() to remove 24 shift.
  - Added last two patches.
  - Added Acks from prior round.
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed May 15, 2018
2 parents 53ea24c + a82d8cd commit fb40c9d
Show file tree
Hide file tree
Showing 7 changed files with 228 additions and 98 deletions.
13 changes: 3 additions & 10 deletions arch/arm/net/bpf_jit_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -234,18 +234,11 @@ static void jit_fill_hole(void *area, unsigned int size)
#define SCRATCH_SIZE 80

/* total stack size used in JITed code */
#define _STACK_SIZE \
(ctx->prog->aux->stack_depth + \
+ SCRATCH_SIZE + \
+ 4 /* extra for skb_copy_bits buffer */)

#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
#define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE)
#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)

/* Get the offset of eBPF REGISTERs stored on scratch space. */
#define STACK_VAR(off) (STACK_SIZE-off-4)

/* Offset of skb_copy_bits buffer */
#define SKB_BUFFER STACK_VAR(SCRATCH_SIZE)
#define STACK_VAR(off) (STACK_SIZE - off)

#if __LINUX_ARM_ARCH__ < 7

Expand Down
115 changes: 69 additions & 46 deletions arch/arm64/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/printk.h>
#include <linux/skbuff.h>
#include <linux/slab.h>

#include <asm/byteorder.h>
Expand Down Expand Up @@ -80,23 +79,66 @@ static inline void emit(const u32 insn, struct jit_ctx *ctx)
ctx->idx++;
}

static inline void emit_a64_mov_i(const int is64, const int reg,
const s32 val, struct jit_ctx *ctx)
{
u16 hi = val >> 16;
u16 lo = val & 0xffff;

if (hi & 0x8000) {
if (hi == 0xffff) {
emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
} else {
emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
if (lo != 0xffff)
emit(A64_MOVK(is64, reg, lo, 0), ctx);
}
} else {
emit(A64_MOVZ(is64, reg, lo, 0), ctx);
if (hi)
emit(A64_MOVK(is64, reg, hi, 16), ctx);
}
}

static int i64_i16_blocks(const u64 val, bool inverse)
{
return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
(((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
(((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
(((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000));
}

static inline void emit_a64_mov_i64(const int reg, const u64 val,
struct jit_ctx *ctx)
{
u64 tmp = val;
int shift = 0;

emit(A64_MOVZ(1, reg, tmp & 0xffff, shift), ctx);
tmp >>= 16;
shift += 16;
while (tmp) {
if (tmp & 0xffff)
emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
tmp >>= 16;
shift += 16;
u64 nrm_tmp = val, rev_tmp = ~val;
bool inverse;
int shift;

if (!(nrm_tmp >> 32))
return emit_a64_mov_i(0, reg, (u32)val, ctx);

inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false);
shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) :
(fls64(nrm_tmp) - 1)), 16), 0);
if (inverse)
emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx);
else
emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
shift -= 16;
while (shift >= 0) {
if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000))
emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
shift -= 16;
}
}

/*
* This is an unoptimized 64 immediate emission used for BPF to BPF call
* addresses. It will always do a full 64 bit decomposition as otherwise
* more complexity in the last extra pass is required since we previously
* reserved 4 instructions for the address.
*/
static inline void emit_addr_mov_i64(const int reg, const u64 val,
struct jit_ctx *ctx)
{
Expand All @@ -111,26 +153,6 @@ static inline void emit_addr_mov_i64(const int reg, const u64 val,
}
}

static inline void emit_a64_mov_i(const int is64, const int reg,
const s32 val, struct jit_ctx *ctx)
{
u16 hi = val >> 16;
u16 lo = val & 0xffff;

if (hi & 0x8000) {
if (hi == 0xffff) {
emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
} else {
emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
emit(A64_MOVK(is64, reg, lo, 0), ctx);
}
} else {
emit(A64_MOVZ(is64, reg, lo, 0), ctx);
if (hi)
emit(A64_MOVK(is64, reg, hi, 16), ctx);
}
}

static inline int bpf2a64_offset(int bpf_to, int bpf_from,
const struct jit_ctx *ctx)
{
Expand Down Expand Up @@ -163,7 +185,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
/* Tail call offset to jump into */
#define PROLOGUE_OFFSET 7

static int build_prologue(struct jit_ctx *ctx)
static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
{
const struct bpf_prog *prog = ctx->prog;
const u8 r6 = bpf2a64[BPF_REG_6];
Expand All @@ -188,7 +210,7 @@ static int build_prologue(struct jit_ctx *ctx)
* | ... | BPF prog stack
* | |
* +-----+ <= (BPF_FP - prog->aux->stack_depth)
* |RSVD | JIT scratchpad
* |RSVD | padding
* current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size)
* | |
* | ... | Function call stack
Expand All @@ -210,19 +232,19 @@ static int build_prologue(struct jit_ctx *ctx)
/* Set up BPF prog stack base register */
emit(A64_MOV(1, fp, A64_SP), ctx);

/* Initialize tail_call_cnt */
emit(A64_MOVZ(1, tcc, 0, 0), ctx);
if (!ebpf_from_cbpf) {
/* Initialize tail_call_cnt */
emit(A64_MOVZ(1, tcc, 0, 0), ctx);

cur_offset = ctx->idx - idx0;
if (cur_offset != PROLOGUE_OFFSET) {
pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
cur_offset, PROLOGUE_OFFSET);
return -1;
cur_offset = ctx->idx - idx0;
if (cur_offset != PROLOGUE_OFFSET) {
pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
cur_offset, PROLOGUE_OFFSET);
return -1;
}
}

/* 4 byte extra for skb_copy_bits buffer */
ctx->stack_size = prog->aux->stack_depth + 4;
ctx->stack_size = STACK_ALIGN(ctx->stack_size);
ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth);

/* Set up function call stack */
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
Expand Down Expand Up @@ -786,6 +808,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
struct bpf_prog *tmp, *orig_prog = prog;
struct bpf_binary_header *header;
struct arm64_jit_data *jit_data;
bool was_classic = bpf_prog_was_classic(prog);
bool tmp_blinded = false;
bool extra_pass = false;
struct jit_ctx ctx;
Expand Down Expand Up @@ -840,7 +863,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
goto out_off;
}

if (build_prologue(&ctx)) {
if (build_prologue(&ctx, was_classic)) {
prog = orig_prog;
goto out_off;
}
Expand All @@ -863,7 +886,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
skip_init_ctx:
ctx.idx = 0;

build_prologue(&ctx);
build_prologue(&ctx, was_classic);

if (build_body(&ctx)) {
bpf_jit_binary_free(header);
Expand Down
26 changes: 0 additions & 26 deletions arch/mips/net/ebpf_jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,6 @@ enum reg_val_type {
* struct jit_ctx - JIT context
* @skf: The sk_filter
* @stack_size: eBPF stack size
* @tmp_offset: eBPF $sp offset to 8-byte temporary memory
* @idx: Instruction index
* @flags: JIT flags
* @offsets: Instruction offsets
Expand All @@ -105,7 +104,6 @@ enum reg_val_type {
struct jit_ctx {
const struct bpf_prog *skf;
int stack_size;
int tmp_offset;
u32 idx;
u32 flags;
u32 *offsets;
Expand Down Expand Up @@ -293,7 +291,6 @@ static int gen_int_prologue(struct jit_ctx *ctx)
locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0;

stack_adjust += locals_size;
ctx->tmp_offset = locals_size;

ctx->stack_size = stack_adjust;

Expand Down Expand Up @@ -399,7 +396,6 @@ static void gen_imm_to_reg(const struct bpf_insn *insn, int reg,
emit_instr(ctx, lui, reg, upper >> 16);
emit_instr(ctx, addiu, reg, reg, lower);
}

}

static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
Expand Down Expand Up @@ -547,28 +543,6 @@ static int gen_imm_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
return 0;
}

static void * __must_check
ool_skb_header_pointer(const struct sk_buff *skb, int offset,
int len, void *buffer)
{
return skb_header_pointer(skb, offset, len, buffer);
}

static int size_to_len(const struct bpf_insn *insn)
{
switch (BPF_SIZE(insn->code)) {
case BPF_B:
return 1;
case BPF_H:
return 2;
case BPF_W:
return 4;
case BPF_DW:
return 8;
}
return 0;
}

static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value)
{
if (value >= 0xffffffffffff8000ull || value < 0x8000ull) {
Expand Down
1 change: 0 additions & 1 deletion arch/sparc/net/bpf_jit_comp_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
const int i = insn - ctx->prog->insnsi;
const s16 off = insn->off;
const s32 imm = insn->imm;
u32 *func;

if (insn->src_reg == BPF_REG_FP)
ctx->saw_frame_pointer = true;
Expand Down
29 changes: 14 additions & 15 deletions arch/x86/include/asm/nospec-branch.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,9 @@ do { \
* jmp *%edx for x86_32
*/
#ifdef CONFIG_RETPOLINE
#ifdef CONFIG_X86_64
# define RETPOLINE_RAX_BPF_JIT_SIZE 17
# define RETPOLINE_RAX_BPF_JIT() \
# ifdef CONFIG_X86_64
# define RETPOLINE_RAX_BPF_JIT_SIZE 17
# define RETPOLINE_RAX_BPF_JIT() \
do { \
EMIT1_off32(0xE8, 7); /* callq do_rop */ \
/* spec_trap: */ \
Expand All @@ -314,8 +314,8 @@ do { \
EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \
EMIT1(0xC3); /* retq */ \
} while (0)
#else
# define RETPOLINE_EDX_BPF_JIT() \
# else /* !CONFIG_X86_64 */
# define RETPOLINE_EDX_BPF_JIT() \
do { \
EMIT1_off32(0xE8, 7); /* call do_rop */ \
/* spec_trap: */ \
Expand All @@ -326,17 +326,16 @@ do { \
EMIT3(0x89, 0x14, 0x24); /* mov %edx,(%esp) */ \
EMIT1(0xC3); /* ret */ \
} while (0)
#endif
# endif
#else /* !CONFIG_RETPOLINE */

#ifdef CONFIG_X86_64
# define RETPOLINE_RAX_BPF_JIT_SIZE 2
# define RETPOLINE_RAX_BPF_JIT() \
EMIT2(0xFF, 0xE0); /* jmp *%rax */
#else
# define RETPOLINE_EDX_BPF_JIT() \
EMIT2(0xFF, 0xE2) /* jmp *%edx */
#endif
# ifdef CONFIG_X86_64
# define RETPOLINE_RAX_BPF_JIT_SIZE 2
# define RETPOLINE_RAX_BPF_JIT() \
EMIT2(0xFF, 0xE0); /* jmp *%rax */
# else /* !CONFIG_X86_64 */
# define RETPOLINE_EDX_BPF_JIT() \
EMIT2(0xFF, 0xE2) /* jmp *%edx */
# endif
#endif

#endif /* _ASM_X86_NOSPEC_BRANCH_H_ */
Loading

0 comments on commit fb40c9d

Please sign in to comment.