Skip to content

Commit

Permalink
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
pull-request: bpf-next 2021-04-01

The following pull-request contains BPF updates for your *net-next* tree.

We've added 68 non-merge commits during the last 7 day(s) which contain
a total of 70 files changed, 2944 insertions(+), 1139 deletions(-).

The main changes are:

1) UDP support for sockmap, from Cong.

2) Verifier merge conflict resolution fix, from Daniel.

3) xsk selftests enhancements, from Maciej.

4) Unstable helpers aka kernel func calling, from Martin.

5) Batches ops for LPM map, from Pedro.

6) Fix race in bpf_get_local_storage, from Yonghong.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Apr 2, 2021
2 parents bd78980 + 89d69c5 commit c2bcb4c
Show file tree
Hide file tree
Showing 70 changed files with 2,944 additions and 1,139 deletions.
15 changes: 15 additions & 0 deletions Documentation/bpf/bpf_design_QA.rst
Original file line number Diff line number Diff line change
Expand Up @@ -258,3 +258,18 @@ Q: Can BPF functionality such as new program or map types, new
helpers, etc be added out of kernel module code?

A: NO.

Q: Directly calling kernel function is an ABI?
----------------------------------------------
Q: Some kernel functions (e.g. tcp_slow_start) can be called
by BPF programs. Do these kernel functions become an ABI?

A: NO.

The kernel function protos will change and the bpf programs will be
rejected by the verifier. Also, for example, some of the bpf-callable
kernel functions have already been used by other kernel tcp
cc (congestion-control) implementations. If any of these kernel
functions has changed, both the in-tree and out-of-tree kernel tcp cc
implementations have to be changed. The same goes for the bpf
programs and they have to be adjusted accordingly.
5 changes: 5 additions & 0 deletions arch/x86/net/bpf_jit_comp.c
Original file line number Diff line number Diff line change
Expand Up @@ -2346,3 +2346,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
tmp : orig_prog);
return prog;
}

bool bpf_jit_supports_kfunc_call(void)
{
return true;
}
198 changes: 198 additions & 0 deletions arch/x86/net/bpf_jit_comp32.c
Original file line number Diff line number Diff line change
Expand Up @@ -1390,6 +1390,19 @@ static inline void emit_push_r64(const u8 src[], u8 **pprog)
*pprog = prog;
}

static void emit_push_r32(const u8 src[], u8 **pprog)
{
u8 *prog = *pprog;
int cnt = 0;

/* mov ecx,dword ptr [ebp+off] */
EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
/* push ecx */
EMIT1(0x51);

*pprog = prog;
}

static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
{
u8 jmp_cond;
Expand Down Expand Up @@ -1459,6 +1472,174 @@ static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
return jmp_cond;
}

/* i386 kernel compiles with "-mregparm=3". From gcc document:
*
* ==== snippet ====
* regparm (number)
* On x86-32 targets, the regparm attribute causes the compiler
* to pass arguments number one to (number) if they are of integral
* type in registers EAX, EDX, and ECX instead of on the stack.
* Functions that take a variable number of arguments continue
* to be passed all of their arguments on the stack.
* ==== snippet ====
*
* The first three args of a function will be considered for
* putting into the 32bit register EAX, EDX, and ECX.
*
* Two 32bit registers are used to pass a 64bit arg.
*
* For example,
* void foo(u32 a, u32 b, u32 c, u32 d):
* u32 a: EAX
* u32 b: EDX
* u32 c: ECX
* u32 d: stack
*
* void foo(u64 a, u32 b, u32 c):
* u64 a: EAX (lo32) EDX (hi32)
* u32 b: ECX
* u32 c: stack
*
* void foo(u32 a, u64 b, u32 c):
* u32 a: EAX
* u64 b: EDX (lo32) ECX (hi32)
* u32 c: stack
*
* void foo(u32 a, u32 b, u64 c):
* u32 a: EAX
* u32 b: EDX
* u64 c: stack
*
* The return value will be stored in the EAX (and EDX for 64bit value).
*
* For example,
* u32 foo(u32 a, u32 b, u32 c):
* return value: EAX
*
* u64 foo(u32 a, u32 b, u32 c):
* return value: EAX (lo32) EDX (hi32)
*
* Notes:
* The verifier only accepts function having integer and pointers
* as its args and return value, so it does not have
* struct-by-value.
*
* emit_kfunc_call() finds out the btf_func_model by calling
* bpf_jit_find_kfunc_model(). A btf_func_model
* has the details about the number of args, size of each arg,
* and the size of the return value.
*
* It first decides how many args can be passed by EAX, EDX, and ECX.
* That will decide what args should be pushed to the stack:
* [first_stack_regno, last_stack_regno] are the bpf regnos
* that should be pushed to the stack.
*
* It will first push all args to the stack because the push
* will need to use ECX. Then, it moves
* [BPF_REG_1, first_stack_regno) to EAX, EDX, and ECX.
*
* When emitting a call (0xE8), it needs to figure out
* the jmp_offset relative to the jit-insn address immediately
* following the call (0xE8) instruction. At this point, it knows
* the end of the jit-insn address after completely translated the
* current (BPF_JMP | BPF_CALL) bpf-insn. It is passed as "end_addr"
* to the emit_kfunc_call(). Thus, it can learn the "immediate-follow-call"
* address by figuring out how many jit-insn is generated between
* the call (0xE8) and the end_addr:
* - 0-1 jit-insn (3 bytes each) to restore the esp pointer if there
* is arg pushed to the stack.
* - 0-2 jit-insns (3 bytes each) to handle the return value.
*/
static int emit_kfunc_call(const struct bpf_prog *bpf_prog, u8 *end_addr,
const struct bpf_insn *insn, u8 **pprog)
{
const u8 arg_regs[] = { IA32_EAX, IA32_EDX, IA32_ECX };
int i, cnt = 0, first_stack_regno, last_stack_regno;
int free_arg_regs = ARRAY_SIZE(arg_regs);
const struct btf_func_model *fm;
int bytes_in_stack = 0;
const u8 *cur_arg_reg;
u8 *prog = *pprog;
s64 jmp_offset;

fm = bpf_jit_find_kfunc_model(bpf_prog, insn);
if (!fm)
return -EINVAL;

first_stack_regno = BPF_REG_1;
for (i = 0; i < fm->nr_args; i++) {
int regs_needed = fm->arg_size[i] > sizeof(u32) ? 2 : 1;

if (regs_needed > free_arg_regs)
break;

free_arg_regs -= regs_needed;
first_stack_regno++;
}

/* Push the args to the stack */
last_stack_regno = BPF_REG_0 + fm->nr_args;
for (i = last_stack_regno; i >= first_stack_regno; i--) {
if (fm->arg_size[i - 1] > sizeof(u32)) {
emit_push_r64(bpf2ia32[i], &prog);
bytes_in_stack += 8;
} else {
emit_push_r32(bpf2ia32[i], &prog);
bytes_in_stack += 4;
}
}

cur_arg_reg = &arg_regs[0];
for (i = BPF_REG_1; i < first_stack_regno; i++) {
/* mov e[adc]x,dword ptr [ebp+off] */
EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
STACK_VAR(bpf2ia32[i][0]));
if (fm->arg_size[i - 1] > sizeof(u32))
/* mov e[adc]x,dword ptr [ebp+off] */
EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
STACK_VAR(bpf2ia32[i][1]));
}

if (bytes_in_stack)
/* add esp,"bytes_in_stack" */
end_addr -= 3;

/* mov dword ptr [ebp+off],edx */
if (fm->ret_size > sizeof(u32))
end_addr -= 3;

/* mov dword ptr [ebp+off],eax */
if (fm->ret_size)
end_addr -= 3;

jmp_offset = (u8 *)__bpf_call_base + insn->imm - end_addr;
if (!is_simm32(jmp_offset)) {
pr_err("unsupported BPF kernel function jmp_offset:%lld\n",
jmp_offset);
return -EINVAL;
}

EMIT1_off32(0xE8, jmp_offset);

if (fm->ret_size)
/* mov dword ptr [ebp+off],eax */
EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
STACK_VAR(bpf2ia32[BPF_REG_0][0]));

if (fm->ret_size > sizeof(u32))
/* mov dword ptr [ebp+off],edx */
EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
STACK_VAR(bpf2ia32[BPF_REG_0][1]));

if (bytes_in_stack)
/* add esp,"bytes_in_stack" */
EMIT3(0x83, add_1reg(0xC0, IA32_ESP), bytes_in_stack);

*pprog = prog;

return 0;
}

static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
Expand Down Expand Up @@ -1888,6 +2069,18 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
if (insn->src_reg == BPF_PSEUDO_CALL)
goto notyet;

if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
int err;

err = emit_kfunc_call(bpf_prog,
image + addrs[i],
insn, &prog);

if (err)
return err;
break;
}

func = (u8 *) __bpf_call_base + imm32;
jmp_offset = func - (image + addrs[i]);

Expand Down Expand Up @@ -2393,3 +2586,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
tmp : orig_prog);
return prog;
}

bool bpf_jit_supports_kfunc_call(void)
{
return true;
}
12 changes: 12 additions & 0 deletions drivers/net/veth.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,17 @@ static void veth_get_ethtool_stats(struct net_device *dev,
}
}

static void veth_get_channels(struct net_device *dev,
struct ethtool_channels *channels)
{
channels->tx_count = dev->real_num_tx_queues;
channels->rx_count = dev->real_num_rx_queues;
channels->max_tx = dev->real_num_tx_queues;
channels->max_rx = dev->real_num_rx_queues;
channels->combined_count = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
channels->max_combined = min(dev->real_num_rx_queues, dev->real_num_tx_queues);
}

static const struct ethtool_ops veth_ethtool_ops = {
.get_drvinfo = veth_get_drvinfo,
.get_link = ethtool_op_get_link,
Expand All @@ -226,6 +237,7 @@ static const struct ethtool_ops veth_ethtool_ops = {
.get_ethtool_stats = veth_get_ethtool_stats,
.get_link_ksettings = veth_get_link_ksettings,
.get_ts_info = ethtool_op_get_ts_info,
.get_channels = veth_get_channels,
};

/* general routines */
Expand Down
57 changes: 49 additions & 8 deletions include/linux/bpf-cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,25 @@ struct bpf_sock_ops_kern;
struct bpf_cgroup_storage;
struct ctl_table;
struct ctl_table_header;
struct task_struct;

#ifdef CONFIG_CGROUP_BPF

extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])

DECLARE_PER_CPU(struct bpf_cgroup_storage*,
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
#define BPF_CGROUP_STORAGE_NEST_MAX 8

struct bpf_cgroup_storage_info {
struct task_struct *task;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
};

/* For each cpu, permit maximum BPF_CGROUP_STORAGE_NEST_MAX number of tasks
* to use bpf cgroup storage simultaneously.
*/
DECLARE_PER_CPU(struct bpf_cgroup_storage_info,
bpf_cgroup_storage_info[BPF_CGROUP_STORAGE_NEST_MAX]);

#define for_each_cgroup_storage_type(stype) \
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)
Expand Down Expand Up @@ -161,13 +172,42 @@ static inline enum bpf_cgroup_storage_type cgroup_storage_type(
return BPF_CGROUP_STORAGE_SHARED;
}

static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
*storage[MAX_BPF_CGROUP_STORAGE_TYPE])
static inline int bpf_cgroup_storage_set(struct bpf_cgroup_storage
*storage[MAX_BPF_CGROUP_STORAGE_TYPE])
{
enum bpf_cgroup_storage_type stype;
int i, err = 0;

preempt_disable();
for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != NULL))
continue;

this_cpu_write(bpf_cgroup_storage_info[i].task, current);
for_each_cgroup_storage_type(stype)
this_cpu_write(bpf_cgroup_storage_info[i].storage[stype],
storage[stype]);
goto out;
}
err = -EBUSY;
WARN_ON_ONCE(1);

out:
preempt_enable();
return err;
}

static inline void bpf_cgroup_storage_unset(void)
{
int i;

for (i = 0; i < BPF_CGROUP_STORAGE_NEST_MAX; i++) {
if (unlikely(this_cpu_read(bpf_cgroup_storage_info[i].task) != current))
continue;

for_each_cgroup_storage_type(stype)
this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
this_cpu_write(bpf_cgroup_storage_info[i].task, NULL);
return;
}
}

struct bpf_cgroup_storage *
Expand Down Expand Up @@ -448,8 +488,9 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
return -EINVAL;
}

static inline void bpf_cgroup_storage_set(
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
static inline int bpf_cgroup_storage_set(
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) { return 0; }
static inline void bpf_cgroup_storage_unset(void) {}
static inline int bpf_cgroup_storage_assign(struct bpf_prog_aux *aux,
struct bpf_map *map) { return 0; }
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
Expand Down
Loading

0 comments on commit c2bcb4c

Please sign in to comment.