Skip to content

Commit

Permalink
Merge branch 'ebpf-next'
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
allow eBPF programs to be attached to sockets

V1->V2:

fixed comments in sample code to state clearly that packet data is accessed
with LD_ABS instructions and not internal skb fields.
Also replaced constants in:
BPF_LD_ABS(BPF_B, 14 + 9 /* R0 = ip->proto */),
with:
BPF_LD_ABS(BPF_B, ETH_HLEN + offsetof(struct iphdr, protocol) /* R0 = ip->proto */),

V1 cover:

Introduce BPF_PROG_TYPE_SOCKET_FILTER type of eBPF programs that can be
attached to sockets with setsockopt().
Allow such programs to access maps via lookup/update/delete helpers.

This feature was previewed by bpf manpage in commit b4fc1a4("Merge branch 'bpf-next'")
Now it can actually run.

1st patch adds LD_ABS/LD_IND instruction verification and
2nd patch adds new setsockopt() flag.
Patches 3-6 are examples in assembler and in C.

Though native eBPF programs are way more powerful than classic filters
(attachable through similar setsockopt() call), they don't have skb field
accessors yet. Like skb->pkt_type, skb->dev->ifindex are not accessible.
There are sevaral ways to achieve that. That will be in the next set of patches.
So in this set native eBPF programs can only read data from packet and
access maps.

The most powerful example is sockex2_kern.c from patch 6 where ~200 lines of C
are compiled into ~300 of eBPF instructions.
It shows how quite complex packet parsing can be done.

LLVM used to build examples is at https://github.com/iovisor/llvm
which is fork of llvm trunk that I'm cleaning up for upstreaming.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Dec 6, 2014
2 parents f51a5e8 + fbe3310 commit 8d0c469
Show file tree
Hide file tree
Showing 31 changed files with 987 additions and 5 deletions.
3 changes: 3 additions & 0 deletions arch/alpha/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,7 @@

#define SO_INCOMING_CPU 49

#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#endif /* _UAPI_ASM_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/avr32/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,7 @@

#define SO_INCOMING_CPU 49

#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#endif /* _UAPI__ASM_AVR32_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/cris/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@

#define SO_INCOMING_CPU 49

#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#endif /* _ASM_SOCKET_H */


3 changes: 3 additions & 0 deletions arch/frv/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,5 +82,8 @@

#define SO_INCOMING_CPU 49

#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#endif /* _ASM_SOCKET_H */

3 changes: 3 additions & 0 deletions arch/ia64/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,7 @@

#define SO_INCOMING_CPU 49

#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#endif /* _ASM_IA64_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/m32r/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,7 @@

#define SO_INCOMING_CPU 49

#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#endif /* _ASM_M32R_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/mips/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,4 +100,7 @@

#define SO_INCOMING_CPU 49

#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#endif /* _UAPI_ASM_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/mn10300/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,7 @@

#define SO_INCOMING_CPU 49

#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#endif /* _ASM_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/parisc/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,7 @@

#define SO_INCOMING_CPU 0x402A

#define SO_ATTACH_BPF 0x402B
#define SO_DETACH_BPF SO_DETACH_FILTER

#endif /* _UAPI_ASM_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/powerpc/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,7 @@

#define SO_INCOMING_CPU 49

#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#endif /* _ASM_POWERPC_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/s390/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,4 +88,7 @@

#define SO_INCOMING_CPU 49

#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#endif /* _ASM_SOCKET_H */
3 changes: 3 additions & 0 deletions arch/sparc/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,9 @@

#define SO_INCOMING_CPU 0x0033

#define SO_ATTACH_BPF 0x0034
#define SO_DETACH_BPF SO_DETACH_FILTER

/* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
Expand Down
3 changes: 3 additions & 0 deletions arch/xtensa/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,4 +93,7 @@

#define SO_INCOMING_CPU 49

#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#endif /* _XTENSA_SOCKET_H */
4 changes: 4 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,11 @@ struct bpf_prog_aux {
struct work_struct work;
};

#ifdef CONFIG_BPF_SYSCALL
void bpf_prog_put(struct bpf_prog *prog);
#else
static inline void bpf_prog_put(struct bpf_prog *prog) {}
#endif
struct bpf_prog *bpf_prog_get(u32 ufd);
/* verify correctness of eBPF program */
int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
Expand Down
1 change: 1 addition & 0 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
void bpf_prog_destroy(struct bpf_prog *fp);

int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
int sk_attach_bpf(u32 ufd, struct sock *sk);
int sk_detach_filter(struct sock *sk);

int bpf_check_classic(const struct sock_filter *filter, unsigned int flen);
Expand Down
3 changes: 3 additions & 0 deletions include/uapi/asm-generic/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,4 +84,7 @@

#define SO_INCOMING_CPU 49

#define SO_ATTACH_BPF 50
#define SO_DETACH_BPF SO_DETACH_FILTER

#endif /* __ASM_GENERIC_SOCKET_H */
1 change: 1 addition & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ enum bpf_map_type {

enum bpf_prog_type {
BPF_PROG_TYPE_UNSPEC,
BPF_PROG_TYPE_SOCKET_FILTER,
};

/* flags for BPF_MAP_UPDATE_ELEM command */
Expand Down
70 changes: 68 additions & 2 deletions kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -1172,6 +1172,70 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
return 0;
}

/* verify safety of LD_ABS|LD_IND instructions:
* - they can only appear in the programs where ctx == skb
* - since they are wrappers of function calls, they scratch R1-R5 registers,
* preserve R6-R9, and store return value into R0
*
* Implicit input:
* ctx == skb == R6 == CTX
*
* Explicit input:
* SRC == any register
* IMM == 32-bit immediate
*
* Output:
* R0 - 8/16/32-bit skb data converted to cpu endianness
*/
static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
{
struct reg_state *regs = env->cur_state.regs;
u8 mode = BPF_MODE(insn->code);
struct reg_state *reg;
int i, err;

if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n");
return -EINVAL;
}

if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
(mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
verbose("BPF_LD_ABS uses reserved fields\n");
return -EINVAL;
}

/* check whether implicit source operand (register R6) is readable */
err = check_reg_arg(regs, BPF_REG_6, SRC_OP);
if (err)
return err;

if (regs[BPF_REG_6].type != PTR_TO_CTX) {
verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
return -EINVAL;
}

if (mode == BPF_IND) {
/* check explicit source operand */
err = check_reg_arg(regs, insn->src_reg, SRC_OP);
if (err)
return err;
}

/* reset caller saved regs to unreadable */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
reg = regs + caller_saved[i];
reg->type = NOT_INIT;
reg->imm = 0;
}

/* mark destination R0 register as readable, since it contains
* the value fetched from the packet
*/
regs[BPF_REG_0].type = UNKNOWN_VALUE;
return 0;
}

/* non-recursive DFS pseudo code
* 1 procedure DFS-iterative(G,v):
* 2 label v as discovered
Expand Down Expand Up @@ -1677,8 +1741,10 @@ static int do_check(struct verifier_env *env)
u8 mode = BPF_MODE(insn->code);

if (mode == BPF_ABS || mode == BPF_IND) {
verbose("LD_ABS is not supported yet\n");
return -EINVAL;
err = check_ld_abs(env, insn);
if (err)
return err;

} else if (mode == BPF_IMM) {
err = check_ld_imm(env, insn);
if (err)
Expand Down
97 changes: 95 additions & 2 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#include <linux/ratelimit.h>
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>

/**
* sk_filter - run a packet through a socket filter
Expand Down Expand Up @@ -813,8 +814,12 @@ static void bpf_release_orig_filter(struct bpf_prog *fp)

static void __bpf_prog_release(struct bpf_prog *prog)
{
bpf_release_orig_filter(prog);
bpf_prog_free(prog);
if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
bpf_prog_put(prog);
} else {
bpf_release_orig_filter(prog);
bpf_prog_free(prog);
}
}

static void __sk_filter_release(struct sk_filter *fp)
Expand Down Expand Up @@ -1088,6 +1093,94 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_attach_filter);

#ifdef CONFIG_BPF_SYSCALL
int sk_attach_bpf(u32 ufd, struct sock *sk)
{
struct sk_filter *fp, *old_fp;
struct bpf_prog *prog;

if (sock_flag(sk, SOCK_FILTER_LOCKED))
return -EPERM;

prog = bpf_prog_get(ufd);
if (!prog)
return -EINVAL;

if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
/* valid fd, but invalid program type */
bpf_prog_put(prog);
return -EINVAL;
}

fp = kmalloc(sizeof(*fp), GFP_KERNEL);
if (!fp) {
bpf_prog_put(prog);
return -ENOMEM;
}
fp->prog = prog;

atomic_set(&fp->refcnt, 0);

if (!sk_filter_charge(sk, fp)) {
__sk_filter_release(fp);
return -ENOMEM;
}

old_fp = rcu_dereference_protected(sk->sk_filter,
sock_owned_by_user(sk));
rcu_assign_pointer(sk->sk_filter, fp);

if (old_fp)
sk_filter_uncharge(sk, old_fp);

return 0;
}

/* allow socket filters to call
* bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem()
*/
static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
case BPF_FUNC_map_lookup_elem:
return &bpf_map_lookup_elem_proto;
case BPF_FUNC_map_update_elem:
return &bpf_map_update_elem_proto;
case BPF_FUNC_map_delete_elem:
return &bpf_map_delete_elem_proto;
default:
return NULL;
}
}

static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type)
{
/* skb fields cannot be accessed yet */
return false;
}

static struct bpf_verifier_ops sock_filter_ops = {
.get_func_proto = sock_filter_func_proto,
.is_valid_access = sock_filter_is_valid_access,
};

static struct bpf_prog_type_list tl = {
.ops = &sock_filter_ops,
.type = BPF_PROG_TYPE_SOCKET_FILTER,
};

static int __init register_sock_filter_ops(void)
{
bpf_register_prog_type(&tl);
return 0;
}
late_initcall(register_sock_filter_ops);
#else
int sk_attach_bpf(u32 ufd, struct sock *sk)
{
return -EOPNOTSUPP;
}
#endif
int sk_detach_filter(struct sock *sk)
{
int ret = -ENOENT;
Expand Down
13 changes: 13 additions & 0 deletions net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -888,6 +888,19 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
}
break;

case SO_ATTACH_BPF:
ret = -EINVAL;
if (optlen == sizeof(u32)) {
u32 ufd;

ret = -EFAULT;
if (copy_from_user(&ufd, optval, sizeof(ufd)))
break;

ret = sk_attach_bpf(ufd, sk);
}
break;

case SO_DETACH_FILTER:
ret = sk_detach_filter(sk);
break;
Expand Down
20 changes: 20 additions & 0 deletions samples/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,31 @@ obj- := dummy.o

# List of programs to build
hostprogs-y := test_verifier test_maps
hostprogs-y += sock_example
hostprogs-y += sockex1
hostprogs-y += sockex2

test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
sock_example-objs := sock_example.o libbpf.o
sockex1-objs := bpf_load.o libbpf.o sockex1_user.o
sockex2-objs := bpf_load.o libbpf.o sockex2_user.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
always += sockex1_kern.o
always += sockex2_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include

HOSTCFLAGS_bpf_load.o += -I$(objtree)/usr/include -Wno-unused-variable
HOSTLOADLIBES_sockex1 += -lelf
HOSTLOADLIBES_sockex2 += -lelf

# point this to your LLVM backend with bpf support
LLC=$(srctree)/tools/bpf/llvm/bld/Debug+Asserts/bin/llc

%.o: %.c
clang $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) \
-D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \
-O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@
Loading

0 comments on commit 8d0c469

Please sign in to comment.