Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
pull-request: bpf-next 2018-01-26

The following pull-request contains BPF updates for your *net-next* tree.

The main changes are:

1) A number of extensions to tcp-bpf, from Lawrence.
    - direct R or R/W access to many tcp_sock fields via bpf_sock_ops
    - passing up to 3 arguments to bpf_sock_ops functions
    - tcp_sock field bpf_sock_ops_cb_flags for controlling callbacks
    - optionally calling bpf_sock_ops program when RTO fires
    - optionally calling bpf_sock_ops program when packet is retransmitted
    - optionally calling bpf_sock_ops program when TCP state changes
    - access to tclass and sk_txhash
    - new selftest

2) div/mod exception handling, from Daniel.
    One of the ugly leftovers from the early eBPF days is that div/mod
    operations based on registers have a hard-coded src_reg == 0 test
    in the interpreter as well as in JIT code generators that would
    return from the BPF program with exit code 0. This was basically
    adopted from cBPF interpreter for historical reasons.
    There are multiple reasons why this is very suboptimal and prone
    to bugs. To name one: the return code mapping for such abnormal
    program exit of 0 does not always match with a suitable program
    type's exit code mapping. For example, '0' in tc means action 'ok'
    where the packet gets passed further up the stack, which is just
    undesirable for such cases (e.g. when implementing policy) and
    also does not match with other program types.
    After considering _four_ different ways to address the problem,
    we adapt the same behavior as on some major archs like ARMv8:
    X div 0 results in 0, and X mod 0 results in X. aarch64 and
    aarch32 ISA do not generate any traps or otherwise aborts
    of program execution for unsigned divides.
    Given the options, it seems the most suitable from
    all of them, also since major archs have similar schemes in
    place. Given this is all in the realm of undefined behavior,
    we still have the option to adapt if deemed necessary.

3) sockmap sample refactoring, from John.

4) lpm map get_next_key fixes, from Yonghong.

5) test cleanups, from Alexei and Prashant.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jan 29, 2018
2 parents 6b2e282 + 8223967 commit 457740a
Show file tree
Hide file tree
Showing 37 changed files with 1,969 additions and 378 deletions.
2 changes: 1 addition & 1 deletion Documentation/networking/filter.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1134,7 +1134,7 @@ The verifier's knowledge about the variable offset consists of:
mask and value; no bit should ever be 1 in both. For example, if a byte is read
into a register from memory, the register's top 56 bits are known zero, while
the low 8 are unknown - which is represented as the tnum (0x0; 0xff). If we
then OR this with 0x40, we get (0x40; 0xcf), then if we add 1 we get (0x0;
then OR this with 0x40, we get (0x40; 0xbf), then if we add 1 we get (0x0;
0x1ff), because of potential carries.
Besides arithmetic, the register state can also be updated by conditional
branches. For instance, if a SCALAR_VALUE is compared > 8, in the 'true' branch
Expand Down
8 changes: 0 additions & 8 deletions arch/arm/net/bpf_jit_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -363,15 +363,7 @@ static inline int epilogue_offset(const struct jit_ctx *ctx)
static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op)
{
const u8 *tmp = bpf2a32[TMP_REG_1];
s32 jmp_offset;

/* checks if divisor is zero or not. If it is, then
* exit directly.
*/
emit(ARM_CMP_I(rn, 0), ctx);
_emit(ARM_COND_EQ, ARM_MOV_I(ARM_R0, 0), ctx);
jmp_offset = epilogue_offset(ctx);
_emit(ARM_COND_EQ, ARM_B(jmp_offset), ctx);
#if __LINUX_ARM_ARCH__ == 7
if (elf_hwcap & HWCAP_IDIVA) {
if (op == BPF_DIV)
Expand Down
13 changes: 0 additions & 13 deletions arch/arm64/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -390,18 +390,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
case BPF_ALU64 | BPF_DIV | BPF_X:
case BPF_ALU | BPF_MOD | BPF_X:
case BPF_ALU64 | BPF_MOD | BPF_X:
{
const u8 r0 = bpf2a64[BPF_REG_0];

/* if (src == 0) return 0 */
jmp_offset = 3; /* skip ahead to else path */
check_imm19(jmp_offset);
emit(A64_CBNZ(is64, src, jmp_offset), ctx);
emit(A64_MOVZ(1, r0, 0, 0), ctx);
jmp_offset = epilogue_offset(ctx);
check_imm26(jmp_offset);
emit(A64_B(jmp_offset), ctx);
/* else */
switch (BPF_OP(code)) {
case BPF_DIV:
emit(A64_UDIV(is64, dst, dst, src), ctx);
Expand All @@ -413,7 +401,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
break;
}
break;
}
case BPF_ALU | BPF_LSH | BPF_X:
case BPF_ALU64 | BPF_LSH | BPF_X:
emit(A64_LSLV(is64, dst, dst, src), ctx);
Expand Down
29 changes: 4 additions & 25 deletions arch/mips/net/ebpf_jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -741,16 +741,11 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_ALU | BPF_DIV | BPF_K: /* ALU_IMM */
case BPF_ALU | BPF_MOD | BPF_K: /* ALU_IMM */
if (insn->imm == 0)
return -EINVAL;
dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
if (dst < 0)
return dst;
if (insn->imm == 0) { /* Div by zero */
b_off = b_imm(exit_idx, ctx);
if (is_bad_offset(b_off))
return -E2BIG;
emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO);
}
td = get_reg_val_type(ctx, this_idx, insn->dst_reg);
if (td == REG_64BIT || td == REG_32BIT_ZERO_EX)
/* sign extend */
Expand All @@ -770,19 +765,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_ALU64 | BPF_DIV | BPF_K: /* ALU_IMM */
case BPF_ALU64 | BPF_MOD | BPF_K: /* ALU_IMM */
if (insn->imm == 0)
return -EINVAL;
dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
if (dst < 0)
return dst;
if (insn->imm == 0) { /* Div by zero */
b_off = b_imm(exit_idx, ctx);
if (is_bad_offset(b_off))
return -E2BIG;
emit_instr(ctx, beq, MIPS_R_ZERO, MIPS_R_ZERO, b_off);
emit_instr(ctx, addu, MIPS_R_V0, MIPS_R_ZERO, MIPS_R_ZERO);
}
if (get_reg_val_type(ctx, this_idx, insn->dst_reg) == REG_32BIT)
emit_instr(ctx, dinsu, dst, MIPS_R_ZERO, 32, 32);

if (insn->imm == 1) {
/* div by 1 is a nop, mod by 1 is zero */
if (bpf_op == BPF_MOD)
Expand Down Expand Up @@ -860,11 +849,6 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_DIV:
case BPF_MOD:
b_off = b_imm(exit_idx, ctx);
if (is_bad_offset(b_off))
return -E2BIG;
emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
emit_instr(ctx, ddivu, dst, src);
if (bpf_op == BPF_DIV)
emit_instr(ctx, mflo, dst);
Expand Down Expand Up @@ -943,11 +927,6 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_DIV:
case BPF_MOD:
b_off = b_imm(exit_idx, ctx);
if (is_bad_offset(b_off))
return -E2BIG;
emit_instr(ctx, beq, src, MIPS_R_ZERO, b_off);
emit_instr(ctx, movz, MIPS_R_V0, MIPS_R_ZERO, src);
emit_instr(ctx, divu, dst, src);
if (bpf_op == BPF_DIV)
emit_instr(ctx, mflo, dst);
Expand Down
8 changes: 0 additions & 8 deletions arch/powerpc/net/bpf_jit_comp64.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,10 +381,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
goto bpf_alu32_trunc;
case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
PPC_CMPWI(src_reg, 0);
PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12);
PPC_LI(b2p[BPF_REG_0], 0);
PPC_JMP(exit_addr);
if (BPF_OP(code) == BPF_MOD) {
PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg);
PPC_MULW(b2p[TMP_REG_1], src_reg,
Expand All @@ -395,10 +391,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
goto bpf_alu32_trunc;
case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
PPC_CMPDI(src_reg, 0);
PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12);
PPC_LI(b2p[BPF_REG_0], 0);
PPC_JMP(exit_addr);
if (BPF_OP(code) == BPF_MOD) {
PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg);
PPC_MULD(b2p[TMP_REG_1], src_reg,
Expand Down
10 changes: 0 additions & 10 deletions arch/s390/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -610,11 +610,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;

jit->seen |= SEEN_RET0;
/* ltr %src,%src (if src == 0 goto fail) */
EMIT2(0x1200, src_reg, src_reg);
/* jz <ret0> */
EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
/* lhi %w0,0 */
EMIT4_IMM(0xa7080000, REG_W0, 0);
/* lr %w1,%dst */
Expand All @@ -630,11 +625,6 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
{
int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;

jit->seen |= SEEN_RET0;
/* ltgr %src,%src (if src == 0 goto fail) */
EMIT4(0xb9020000, src_reg, src_reg);
/* jz <ret0> */
EMIT4_PCREL(0xa7840000, jit->ret0_ip - jit->prg);
/* lghi %w0,0 */
EMIT4_IMM(0xa7090000, REG_W0, 0);
/* lgr %w1,%dst */
Expand Down
18 changes: 0 additions & 18 deletions arch/sparc/net/bpf_jit_comp_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -967,31 +967,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
emit_alu(MULX, src, dst, ctx);
break;
case BPF_ALU | BPF_DIV | BPF_X:
emit_cmp(src, G0, ctx);
emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);

emit_write_y(G0, ctx);
emit_alu(DIV, src, dst, ctx);
break;

case BPF_ALU64 | BPF_DIV | BPF_X:
emit_cmp(src, G0, ctx);
emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);

emit_alu(UDIVX, src, dst, ctx);
break;

case BPF_ALU | BPF_MOD | BPF_X: {
const u8 tmp = bpf2sparc[TMP_REG_1];

ctx->tmp_1_used = true;

emit_cmp(src, G0, ctx);
emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);

emit_write_y(G0, ctx);
emit_alu3(DIV, dst, src, tmp, ctx);
emit_alu3(MULX, tmp, src, tmp, ctx);
Expand All @@ -1003,10 +989,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)

ctx->tmp_1_used = true;

emit_cmp(src, G0, ctx);
emit_branch(BE|ANNUL, ctx->idx, ctx->epilogue_offset, ctx);
emit_loadimm(0, bpf2sparc[BPF_REG_0], ctx);

emit_alu3(UDIVX, dst, src, tmp, ctx);
emit_alu3(MULX, tmp, src, tmp, ctx);
emit_alu3(SUB, dst, tmp, dst, ctx);
Expand Down
20 changes: 0 additions & 20 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -568,26 +568,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
*/
EMIT2(0x31, 0xd2);

if (BPF_SRC(insn->code) == BPF_X) {
/* if (src_reg == 0) return 0 */

/* cmp r11, 0 */
EMIT4(0x49, 0x83, 0xFB, 0x00);

/* jne .+9 (skip over pop, pop, xor and jmp) */
EMIT2(X86_JNE, 1 + 1 + 2 + 5);
EMIT1(0x5A); /* pop rdx */
EMIT1(0x58); /* pop rax */
EMIT2(0x31, 0xc0); /* xor eax, eax */

/* jmp cleanup_addr
* addrs[i] - 11, because there are 11 bytes
* after this insn: div, mov, pop, pop, mov
*/
jmp_offset = ctx->cleanup_addr - (addrs[i] - 11);
EMIT1_off32(0xE9, jmp_offset);
}

if (BPF_CLASS(insn->code) == BPF_ALU64)
/* div r11 */
EMIT3(0x49, 0xF7, 0xF3);
Expand Down
12 changes: 12 additions & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,8 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);
void bpf_prog_free(struct bpf_prog *fp);

bool bpf_opcode_in_insntable(u8 code);

struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
gfp_t gfp_extra_flags);
Expand Down Expand Up @@ -1003,10 +1005,20 @@ struct bpf_sock_ops_kern {
struct sock *sk;
u32 op;
union {
u32 args[4];
u32 reply;
u32 replylong[4];
};
u32 is_fullsock;
u64 temp; /* temp and everything after is not
* initialized to 0 before calling
* the BPF program. New fields that
* should be initialized to 0 should
* be inserted before temp.
* temp is scratch storage used by
* sock_ops_convert_ctx_access
* as temporary storage of a register.
*/
};

#endif /* __LINUX_FILTER_H__ */
11 changes: 11 additions & 0 deletions include/linux/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,17 @@ struct tcp_sock {

int linger2;


/* Sock_ops bpf program related variables */
#ifdef CONFIG_BPF
u8 bpf_sock_ops_cb_flags; /* Control calling BPF programs
* values defined in uapi/linux/tcp.h
*/
#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) (TP->bpf_sock_ops_cb_flags & ARG)
#else
#define BPF_SOCK_OPS_TEST_FLAG(TP, ARG) 0
#endif

/* Receiver side RTT estimation */
struct {
u32 rtt_us;
Expand Down
42 changes: 36 additions & 6 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -2006,19 +2006,21 @@ void tcp_cleanup_ulp(struct sock *sk);
* program loaded).
*/
#ifdef CONFIG_BPF
static inline int tcp_call_bpf(struct sock *sk, int op)
static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
{
struct bpf_sock_ops_kern sock_ops;
int ret;

memset(&sock_ops, 0, sizeof(sock_ops));
memset(&sock_ops, 0, offsetof(struct bpf_sock_ops_kern, temp));
if (sk_fullsock(sk)) {
sock_ops.is_fullsock = 1;
sock_owned_by_me(sk);
}

sock_ops.sk = sk;
sock_ops.op = op;
if (nargs > 0)
memcpy(sock_ops.args, args, nargs * sizeof(*args));

ret = BPF_CGROUP_RUN_PROG_SOCK_OPS(&sock_ops);
if (ret == 0)
Expand All @@ -2027,18 +2029,46 @@ static inline int tcp_call_bpf(struct sock *sk, int op)
ret = -1;
return ret;
}

static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
{
u32 args[2] = {arg1, arg2};

return tcp_call_bpf(sk, op, 2, args);
}

static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
u32 arg3)
{
u32 args[3] = {arg1, arg2, arg3};

return tcp_call_bpf(sk, op, 3, args);
}

#else
static inline int tcp_call_bpf(struct sock *sk, int op)
static inline int tcp_call_bpf(struct sock *sk, int op, u32 nargs, u32 *args)
{
return -EPERM;
}

static inline int tcp_call_bpf_2arg(struct sock *sk, int op, u32 arg1, u32 arg2)
{
return -EPERM;
}

static inline int tcp_call_bpf_3arg(struct sock *sk, int op, u32 arg1, u32 arg2,
u32 arg3)
{
return -EPERM;
}

#endif

static inline u32 tcp_timeout_init(struct sock *sk)
{
int timeout;

timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT);
timeout = tcp_call_bpf(sk, BPF_SOCK_OPS_TIMEOUT_INIT, 0, NULL);

if (timeout <= 0)
timeout = TCP_TIMEOUT_INIT;
Expand All @@ -2049,7 +2079,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
{
int rwnd;

rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT);
rwnd = tcp_call_bpf(sk, BPF_SOCK_OPS_RWND_INIT, 0, NULL);

if (rwnd < 0)
rwnd = 0;
Expand All @@ -2058,7 +2088,7 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)

static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
{
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN, 0, NULL) == 1);
}

#if IS_ENABLED(CONFIG_SMC)
Expand Down
Loading

0 comments on commit 457740a

Please sign in to comment.