Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf
Browse files Browse the repository at this point in the history
Daniel Borkmann says:

====================
pull-request: bpf 2021-09-28

The following pull-request contains BPF updates for your *net* tree.

We've added 10 non-merge commits during the last 14 day(s) which contain
a total of 11 files changed, 139 insertions(+), 53 deletions(-).

The main changes are:

1) Fix MIPS JIT jump code emission for too large offsets, from Piotr Krysiuk.

2) Fix x86 JIT atomic/fetch emission when dst reg maps to rax, from Johan Almbladh.

3) Fix cgroup_sk_alloc corner case when called from interrupt, from Daniel Borkmann.

4) Fix segfault in libbpf's linker for objects without BTF, from Kumar Kartikeya Dwivedi.

5) Fix bpf_jit_charge_modmem for applications with CAP_BPF, from Lorenz Bauer.

6) Fix return value handling for struct_ops BPF programs, from Hou Tao.

7) Various fixes to BPF selftests, from Jiri Benc.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
,
  • Loading branch information
David S. Miller committed Sep 28, 2021
2 parents c894b51 + ced1858 commit 4ccb9f0
Show file tree
Hide file tree
Showing 11 changed files with 139 additions and 53 deletions.
2 changes: 2 additions & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -3384,9 +3384,11 @@ F: Documentation/networking/filter.rst
F: Documentation/userspace-api/ebpf/
F: arch/*/net/*
F: include/linux/bpf*
F: include/linux/btf*
F: include/linux/filter.h
F: include/trace/events/xdp.h
F: include/uapi/linux/bpf*
F: include/uapi/linux/btf*
F: include/uapi/linux/filter.h
F: kernel/bpf/
F: kernel/trace/bpf_trace.c
Expand Down
57 changes: 43 additions & 14 deletions arch/mips/net/bpf_jit.c
Original file line number Diff line number Diff line change
Expand Up @@ -662,6 +662,11 @@ static void build_epilogue(struct jit_ctx *ctx)
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \
func##_positive)

static bool is_bad_offset(int b_off)
{
return b_off > 0x1ffff || b_off < -0x20000;
}

static int build_body(struct jit_ctx *ctx)
{
const struct bpf_prog *prog = ctx->skf;
Expand Down Expand Up @@ -728,7 +733,10 @@ static int build_body(struct jit_ctx *ctx)
/* Load return register on DS for failures */
emit_reg_move(r_ret, r_zero, ctx);
/* Return with error */
emit_b(b_imm(prog->len, ctx), ctx);
b_off = b_imm(prog->len, ctx);
if (is_bad_offset(b_off))
return -E2BIG;
emit_b(b_off, ctx);
emit_nop(ctx);
break;
case BPF_LD | BPF_W | BPF_IND:
Expand Down Expand Up @@ -775,8 +783,10 @@ static int build_body(struct jit_ctx *ctx)
emit_jalr(MIPS_R_RA, r_s0, ctx);
emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
/* Check the error value */
emit_bcond(MIPS_COND_NE, r_ret, 0,
b_imm(prog->len, ctx), ctx);
b_off = b_imm(prog->len, ctx);
if (is_bad_offset(b_off))
return -E2BIG;
emit_bcond(MIPS_COND_NE, r_ret, 0, b_off, ctx);
emit_reg_move(r_ret, r_zero, ctx);
/* We are good */
/* X <- P[1:K] & 0xf */
Expand Down Expand Up @@ -855,17 +865,21 @@ static int build_body(struct jit_ctx *ctx)
/* A /= X */
ctx->flags |= SEEN_X | SEEN_A;
/* Check if r_X is zero */
emit_bcond(MIPS_COND_EQ, r_X, r_zero,
b_imm(prog->len, ctx), ctx);
b_off = b_imm(prog->len, ctx);
if (is_bad_offset(b_off))
return -E2BIG;
emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx);
emit_load_imm(r_ret, 0, ctx); /* delay slot */
emit_div(r_A, r_X, ctx);
break;
case BPF_ALU | BPF_MOD | BPF_X:
/* A %= X */
ctx->flags |= SEEN_X | SEEN_A;
/* Check if r_X is zero */
emit_bcond(MIPS_COND_EQ, r_X, r_zero,
b_imm(prog->len, ctx), ctx);
b_off = b_imm(prog->len, ctx);
if (is_bad_offset(b_off))
return -E2BIG;
emit_bcond(MIPS_COND_EQ, r_X, r_zero, b_off, ctx);
emit_load_imm(r_ret, 0, ctx); /* delay slot */
emit_mod(r_A, r_X, ctx);
break;
Expand Down Expand Up @@ -926,7 +940,10 @@ static int build_body(struct jit_ctx *ctx)
break;
case BPF_JMP | BPF_JA:
/* pc += K */
emit_b(b_imm(i + k + 1, ctx), ctx);
b_off = b_imm(i + k + 1, ctx);
if (is_bad_offset(b_off))
return -E2BIG;
emit_b(b_off, ctx);
emit_nop(ctx);
break;
case BPF_JMP | BPF_JEQ | BPF_K:
Expand Down Expand Up @@ -1056,12 +1073,16 @@ static int build_body(struct jit_ctx *ctx)
break;
case BPF_RET | BPF_A:
ctx->flags |= SEEN_A;
if (i != prog->len - 1)
if (i != prog->len - 1) {
/*
* If this is not the last instruction
* then jump to the epilogue
*/
emit_b(b_imm(prog->len, ctx), ctx);
b_off = b_imm(prog->len, ctx);
if (is_bad_offset(b_off))
return -E2BIG;
emit_b(b_off, ctx);
}
emit_reg_move(r_ret, r_A, ctx); /* delay slot */
break;
case BPF_RET | BPF_K:
Expand All @@ -1075,7 +1096,10 @@ static int build_body(struct jit_ctx *ctx)
* If this is not the last instruction
* then jump to the epilogue
*/
emit_b(b_imm(prog->len, ctx), ctx);
b_off = b_imm(prog->len, ctx);
if (is_bad_offset(b_off))
return -E2BIG;
emit_b(b_off, ctx);
emit_nop(ctx);
}
break;
Expand Down Expand Up @@ -1133,8 +1157,10 @@ static int build_body(struct jit_ctx *ctx)
/* Load *dev pointer */
emit_load_ptr(r_s0, r_skb, off, ctx);
/* error (0) in the delay slot */
emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
b_imm(prog->len, ctx), ctx);
b_off = b_imm(prog->len, ctx);
if (is_bad_offset(b_off))
return -E2BIG;
emit_bcond(MIPS_COND_EQ, r_s0, r_zero, b_off, ctx);
emit_reg_move(r_ret, r_zero, ctx);
if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
Expand Down Expand Up @@ -1244,7 +1270,10 @@ void bpf_jit_compile(struct bpf_prog *fp)

/* Generate the actual JIT code */
build_prologue(&ctx);
build_body(&ctx);
if (build_body(&ctx)) {
module_memfree(ctx.target);
goto out;
}
build_epilogue(&ctx);

/* Update the icache */
Expand Down
66 changes: 48 additions & 18 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1341,9 +1341,10 @@ st: if (is_imm8(insn->off))
if (insn->imm == (BPF_AND | BPF_FETCH) ||
insn->imm == (BPF_OR | BPF_FETCH) ||
insn->imm == (BPF_XOR | BPF_FETCH)) {
u8 *branch_target;
bool is64 = BPF_SIZE(insn->code) == BPF_DW;
u32 real_src_reg = src_reg;
u32 real_dst_reg = dst_reg;
u8 *branch_target;

/*
* Can't be implemented with a single x86 insn.
Expand All @@ -1354,11 +1355,13 @@ st: if (is_imm8(insn->off))
emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0);
if (src_reg == BPF_REG_0)
real_src_reg = BPF_REG_AX;
if (dst_reg == BPF_REG_0)
real_dst_reg = BPF_REG_AX;

branch_target = prog;
/* Load old value */
emit_ldx(&prog, BPF_SIZE(insn->code),
BPF_REG_0, dst_reg, insn->off);
BPF_REG_0, real_dst_reg, insn->off);
/*
* Perform the (commutative) operation locally,
* put the result in the AUX_REG.
Expand All @@ -1369,7 +1372,8 @@ st: if (is_imm8(insn->off))
add_2reg(0xC0, AUX_REG, real_src_reg));
/* Attempt to swap in new value */
err = emit_atomic(&prog, BPF_CMPXCHG,
dst_reg, AUX_REG, insn->off,
real_dst_reg, AUX_REG,
insn->off,
BPF_SIZE(insn->code));
if (WARN_ON(err))
return err;
Expand All @@ -1383,11 +1387,10 @@ st: if (is_imm8(insn->off))
/* Restore R0 after clobbering RAX */
emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX);
break;

}

err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
insn->off, BPF_SIZE(insn->code));
insn->off, BPF_SIZE(insn->code));
if (err)
return err;
break;
Expand Down Expand Up @@ -1744,7 +1747,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
}

static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
struct bpf_prog *p, int stack_size, bool mod_ret)
struct bpf_prog *p, int stack_size, bool save_ret)
{
u8 *prog = *pprog;
u8 *jmp_insn;
Expand Down Expand Up @@ -1777,11 +1780,15 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
if (emit_call(&prog, p->bpf_func, prog))
return -EINVAL;

/* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
/*
* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return
* of the previous call which is then passed on the stack to
* the next BPF program.
*
* BPF_TRAMP_FENTRY trampoline may need to return the return
* value of BPF_PROG_TYPE_STRUCT_OPS prog.
*/
if (mod_ret)
if (save_ret)
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);

/* replace 2 nops with JE insn, since jmp target is known */
Expand Down Expand Up @@ -1828,13 +1835,15 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
}

static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_progs *tp, int stack_size)
struct bpf_tramp_progs *tp, int stack_size,
bool save_ret)
{
int i;
u8 *prog = *pprog;

for (i = 0; i < tp->nr_progs; i++) {
if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false))
if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size,
save_ret))
return -EINVAL;
}
*pprog = prog;
Expand Down Expand Up @@ -1877,6 +1886,23 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
return 0;
}

static bool is_valid_bpf_tramp_flags(unsigned int flags)
{
if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
(flags & BPF_TRAMP_F_SKIP_FRAME))
return false;

/*
* BPF_TRAMP_F_RET_FENTRY_RET is only used by bpf_struct_ops,
* and it must be used alone.
*/
if ((flags & BPF_TRAMP_F_RET_FENTRY_RET) &&
(flags & ~BPF_TRAMP_F_RET_FENTRY_RET))
return false;

return true;
}

/* Example:
* __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
* its 'struct btf_func_model' will be nr_args=2
Expand Down Expand Up @@ -1949,17 +1975,19 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN];
u8 **branches = NULL;
u8 *prog;
bool save_ret;

/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
if (nr_args > 6)
return -ENOTSUPP;

if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
(flags & BPF_TRAMP_F_SKIP_FRAME))
if (!is_valid_bpf_tramp_flags(flags))
return -EINVAL;

if (flags & BPF_TRAMP_F_CALL_ORIG)
stack_size += 8; /* room for return value of orig_call */
/* room for return value of orig_call or fentry prog */
save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
if (save_ret)
stack_size += 8;

if (flags & BPF_TRAMP_F_IP_ARG)
stack_size += 8; /* room for IP address argument */
Expand Down Expand Up @@ -2005,7 +2033,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
}

if (fentry->nr_progs)
if (invoke_bpf(m, &prog, fentry, stack_size))
if (invoke_bpf(m, &prog, fentry, stack_size,
flags & BPF_TRAMP_F_RET_FENTRY_RET))
return -EINVAL;

if (fmod_ret->nr_progs) {
Expand Down Expand Up @@ -2052,7 +2081,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
}

if (fexit->nr_progs)
if (invoke_bpf(m, &prog, fexit, stack_size)) {
if (invoke_bpf(m, &prog, fexit, stack_size, false)) {
ret = -EINVAL;
goto cleanup;
}
Expand All @@ -2072,9 +2101,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
ret = -EINVAL;
goto cleanup;
}
/* restore original return value back into RAX */
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
}
/* restore return value of orig_call or fentry prog back into RAX */
if (save_ret)
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);

EMIT1(0x5B); /* pop rbx */
EMIT1(0xC9); /* leave */
Expand Down
3 changes: 2 additions & 1 deletion include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -578,11 +578,12 @@ struct btf_func_model {
* programs only. Should not be used with normal calls and indirect calls.
*/
#define BPF_TRAMP_F_SKIP_FRAME BIT(2)

/* Store IP address of the caller on the trampoline stack,
* so it's available for trampoline's programs.
*/
#define BPF_TRAMP_F_IP_ARG BIT(3)
/* Return the return value of fentry prog. Only used by bpf_struct_ops. */
#define BPF_TRAMP_F_RET_FENTRY_RET BIT(4)

/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
Expand Down
7 changes: 5 additions & 2 deletions kernel/bpf/bpf_struct_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
const struct btf_type *mtype, *ptype;
struct bpf_prog *prog;
u32 moff;
u32 flags;

moff = btf_member_bit_offset(t, member) / 8;
ptype = btf_type_resolve_ptr(btf_vmlinux, member->type, NULL);
Expand Down Expand Up @@ -431,10 +432,12 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,

tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
flags = st_ops->func_models[i].ret_size > 0 ?
BPF_TRAMP_F_RET_FENTRY_RET : 0;
err = arch_prepare_bpf_trampoline(NULL, image,
st_map->image + PAGE_SIZE,
&st_ops->func_models[i], 0,
tprogs, NULL);
&st_ops->func_models[i],
flags, tprogs, NULL);
if (err < 0)
goto reset_unlock;

Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -827,7 +827,7 @@ int bpf_jit_charge_modmem(u32 pages)
{
if (atomic_long_add_return(pages, &bpf_jit_current) >
(bpf_jit_limit >> PAGE_SHIFT)) {
if (!capable(CAP_SYS_ADMIN)) {
if (!bpf_capable()) {
atomic_long_sub(pages, &bpf_jit_current);
return -EPERM;
}
Expand Down
17 changes: 12 additions & 5 deletions kernel/cgroup/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -6574,22 +6574,29 @@ int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)

void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
{
/* Don't associate the sock with unrelated interrupted task's cgroup. */
if (in_interrupt())
return;
struct cgroup *cgroup;

rcu_read_lock();
/* Don't associate the sock with unrelated interrupted task's cgroup. */
if (in_interrupt()) {
cgroup = &cgrp_dfl_root.cgrp;
cgroup_get(cgroup);
goto out;
}

while (true) {
struct css_set *cset;

cset = task_css_set(current);
if (likely(cgroup_tryget(cset->dfl_cgrp))) {
skcd->cgroup = cset->dfl_cgrp;
cgroup_bpf_get(cset->dfl_cgrp);
cgroup = cset->dfl_cgrp;
break;
}
cpu_relax();
}
out:
skcd->cgroup = cgroup;
cgroup_bpf_get(cgroup);
rcu_read_unlock();
}

Expand Down
Loading

0 comments on commit 4ccb9f0

Please sign in to comment.