Skip to content

Commit

Permalink
bpf: expand BPF syscall with program load/unload
Browse files Browse the repository at this point in the history
eBPF programs are similar to kernel modules. They are loaded by the user
process and automatically unloaded when process exits. Each eBPF program is
a safe run-to-completion set of instructions. eBPF verifier statically
determines that the program terminates and is safe to execute.

The following syscall wrapper can be used to load the program:
int bpf_prog_load(enum bpf_prog_type prog_type,
                  const struct bpf_insn *insns, int insn_cnt,
                  const char *license)
{
    union bpf_attr attr = {
        .prog_type = prog_type,
        .insns = ptr_to_u64(insns),
        .insn_cnt = insn_cnt,
        .license = ptr_to_u64(license),
    };

    return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
where 'insns' is an array of eBPF instructions and 'license' is a string
that must be GPL compatible to call helper functions marked gpl_only

Upon succesful load the syscall returns prog_fd.
Use close(prog_fd) to unload the program.

User space tests and examples follow in the later patches

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Alexei Starovoitov authored and David S. Miller committed Sep 26, 2014
1 parent db20fd2 commit 09756af
Show file tree
Hide file tree
Showing 5 changed files with 246 additions and 20 deletions.
38 changes: 38 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,42 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
void bpf_map_put(struct bpf_map *map);
struct bpf_map *bpf_map_get(struct fd f);

/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
* to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
* instructions after verifying
*/
struct bpf_func_proto {
u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
bool gpl_only;
};

struct bpf_verifier_ops {
/* return eBPF function prototype for verification */
const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
};

struct bpf_prog_type_list {
struct list_head list_node;
struct bpf_verifier_ops *ops;
enum bpf_prog_type type;
};

void bpf_register_prog_type(struct bpf_prog_type_list *tl);

struct bpf_prog;

struct bpf_prog_aux {
atomic_t refcnt;
bool is_gpl_compatible;
enum bpf_prog_type prog_type;
struct bpf_verifier_ops *ops;
struct bpf_map **used_maps;
u32 used_map_cnt;
struct bpf_prog *prog;
struct work_struct work;
};

void bpf_prog_put(struct bpf_prog *prog);
struct bpf_prog *bpf_prog_get(u32 ufd);

#endif /* _LINUX_BPF_H */
8 changes: 2 additions & 6 deletions include/linux/filter.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
struct sk_buff;
struct sock;
struct seccomp_data;
struct bpf_prog_aux;

/* ArgX, context and stack frame pointer register positions. Note,
* Arg1, Arg2, Arg3, etc are used as argument mappings of function
Expand Down Expand Up @@ -300,17 +301,12 @@ struct bpf_binary_header {
u8 image[];
};

struct bpf_work_struct {
struct bpf_prog *prog;
struct work_struct work;
};

struct bpf_prog {
u16 pages; /* Number of allocated pages */
bool jited; /* Is our filter JIT'ed? */
u32 len; /* Number of filter blocks */
struct sock_fprog_kern *orig_prog; /* Original BPF program */
struct bpf_work_struct *work; /* Deferred free work struct */
struct bpf_prog_aux *aux; /* Auxiliary fields */
unsigned int (*bpf_func)(const struct sk_buff *skb,
const struct bpf_insn *filter);
/* Instructions for interpreter */
Expand Down
26 changes: 26 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,12 +99,23 @@ enum bpf_cmd {
* returns zero and stores next key or negative error
*/
BPF_MAP_GET_NEXT_KEY,

/* verify and load eBPF program
* prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size)
* Using attr->prog_type, attr->insns, attr->license
* returns fd or negative error
*/
BPF_PROG_LOAD,
};

enum bpf_map_type {
BPF_MAP_TYPE_UNSPEC,
};

enum bpf_prog_type {
BPF_PROG_TYPE_UNSPEC,
};

union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
Expand All @@ -121,6 +132,21 @@ union bpf_attr {
__aligned_u64 next_key;
};
};

struct { /* anonymous struct used by BPF_PROG_LOAD command */
__u32 prog_type; /* one of enum bpf_prog_type */
__u32 insn_cnt;
__aligned_u64 insns;
__aligned_u64 license;
};
} __attribute__((aligned(8)));

/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
*/
enum bpf_func_id {
BPF_FUNC_unspec,
__BPF_FUNC_MAX_ID,
};

#endif /* _UAPI__LINUX_BPF_H__ */
29 changes: 15 additions & 14 deletions kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <linux/random.h>
#include <linux/moduleloader.h>
#include <asm/unaligned.h>
#include <linux/bpf.h>

/* Registers */
#define BPF_R0 regs[BPF_REG_0]
Expand Down Expand Up @@ -71,22 +72,22 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
{
gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
gfp_extra_flags;
struct bpf_work_struct *ws;
struct bpf_prog_aux *aux;
struct bpf_prog *fp;

size = round_up(size, PAGE_SIZE);
fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
if (fp == NULL)
return NULL;

ws = kmalloc(sizeof(*ws), GFP_KERNEL | gfp_extra_flags);
if (ws == NULL) {
aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
if (aux == NULL) {
vfree(fp);
return NULL;
}

fp->pages = size / PAGE_SIZE;
fp->work = ws;
fp->aux = aux;

return fp;
}
Expand All @@ -110,10 +111,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
fp->pages = size / PAGE_SIZE;

/* We keep fp->work from fp_old around in the new
/* We keep fp->aux from fp_old around in the new
* reallocated structure.
*/
fp_old->work = NULL;
fp_old->aux = NULL;
__bpf_prog_free(fp_old);
}

Expand All @@ -123,7 +124,7 @@ EXPORT_SYMBOL_GPL(bpf_prog_realloc);

void __bpf_prog_free(struct bpf_prog *fp)
{
kfree(fp->work);
kfree(fp->aux);
vfree(fp);
}
EXPORT_SYMBOL_GPL(__bpf_prog_free);
Expand Down Expand Up @@ -638,19 +639,19 @@ EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);

static void bpf_prog_free_deferred(struct work_struct *work)
{
struct bpf_work_struct *ws;
struct bpf_prog_aux *aux;

ws = container_of(work, struct bpf_work_struct, work);
bpf_jit_free(ws->prog);
aux = container_of(work, struct bpf_prog_aux, work);
bpf_jit_free(aux->prog);
}

/* Free internal BPF program */
void bpf_prog_free(struct bpf_prog *fp)
{
struct bpf_work_struct *ws = fp->work;
struct bpf_prog_aux *aux = fp->aux;

INIT_WORK(&ws->work, bpf_prog_free_deferred);
ws->prog = fp;
schedule_work(&ws->work);
INIT_WORK(&aux->work, bpf_prog_free_deferred);
aux->prog = fp;
schedule_work(&aux->work);
}
EXPORT_SYMBOL_GPL(bpf_prog_free);
165 changes: 165 additions & 0 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#include <linux/slab.h>
#include <linux/anon_inodes.h>
#include <linux/file.h>
#include <linux/license.h>
#include <linux/filter.h>

static LIST_HEAD(bpf_map_types);

Expand Down Expand Up @@ -334,6 +336,166 @@ static int map_get_next_key(union bpf_attr *attr)
return err;
}

static LIST_HEAD(bpf_prog_types);

static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
{
struct bpf_prog_type_list *tl;

list_for_each_entry(tl, &bpf_prog_types, list_node) {
if (tl->type == type) {
prog->aux->ops = tl->ops;
prog->aux->prog_type = type;
return 0;
}
}
return -EINVAL;
}

void bpf_register_prog_type(struct bpf_prog_type_list *tl)
{
list_add(&tl->list_node, &bpf_prog_types);
}

/* drop refcnt on maps used by eBPF program and free auxilary data */
static void free_used_maps(struct bpf_prog_aux *aux)
{
int i;

for (i = 0; i < aux->used_map_cnt; i++)
bpf_map_put(aux->used_maps[i]);

kfree(aux->used_maps);
}

void bpf_prog_put(struct bpf_prog *prog)
{
if (atomic_dec_and_test(&prog->aux->refcnt)) {
free_used_maps(prog->aux);
bpf_prog_free(prog);
}
}

static int bpf_prog_release(struct inode *inode, struct file *filp)
{
struct bpf_prog *prog = filp->private_data;

bpf_prog_put(prog);
return 0;
}

static const struct file_operations bpf_prog_fops = {
.release = bpf_prog_release,
};

static struct bpf_prog *get_prog(struct fd f)
{
struct bpf_prog *prog;

if (!f.file)
return ERR_PTR(-EBADF);

if (f.file->f_op != &bpf_prog_fops) {
fdput(f);
return ERR_PTR(-EINVAL);
}

prog = f.file->private_data;

return prog;
}

/* called by sockets/tracing/seccomp before attaching program to an event
* pairs with bpf_prog_put()
*/
struct bpf_prog *bpf_prog_get(u32 ufd)
{
struct fd f = fdget(ufd);
struct bpf_prog *prog;

prog = get_prog(f);

if (IS_ERR(prog))
return prog;

atomic_inc(&prog->aux->refcnt);
fdput(f);
return prog;
}

/* last field in 'union bpf_attr' used by this command */
#define BPF_PROG_LOAD_LAST_FIELD license

static int bpf_prog_load(union bpf_attr *attr)
{
enum bpf_prog_type type = attr->prog_type;
struct bpf_prog *prog;
int err;
char license[128];
bool is_gpl;

if (CHECK_ATTR(BPF_PROG_LOAD))
return -EINVAL;

/* copy eBPF program license from user space */
if (strncpy_from_user(license, u64_to_ptr(attr->license),
sizeof(license) - 1) < 0)
return -EFAULT;
license[sizeof(license) - 1] = 0;

/* eBPF programs must be GPL compatible to use GPL-ed functions */
is_gpl = license_is_gpl_compatible(license);

if (attr->insn_cnt >= BPF_MAXINSNS)
return -EINVAL;

/* plain bpf_prog allocation */
prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
if (!prog)
return -ENOMEM;

prog->len = attr->insn_cnt;

err = -EFAULT;
if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
prog->len * sizeof(struct bpf_insn)) != 0)
goto free_prog;

prog->orig_prog = NULL;
prog->jited = false;

atomic_set(&prog->aux->refcnt, 1);
prog->aux->is_gpl_compatible = is_gpl;

/* find program type: socket_filter vs tracing_filter */
err = find_prog_type(type, prog);
if (err < 0)
goto free_prog;

/* run eBPF verifier */
/* err = bpf_check(prog, tb); */

if (err < 0)
goto free_used_maps;

/* eBPF program is ready to be JITed */
bpf_prog_select_runtime(prog);

err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);

if (err < 0)
/* failed to allocate fd */
goto free_used_maps;

return err;

free_used_maps:
free_used_maps(prog->aux);
free_prog:
bpf_prog_free(prog);
return err;
}

SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
Expand Down Expand Up @@ -395,6 +557,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_MAP_GET_NEXT_KEY:
err = map_get_next_key(&attr);
break;
case BPF_PROG_LOAD:
err = bpf_prog_load(&attr);
break;
default:
err = -EINVAL;
break;
Expand Down

0 comments on commit 09756af

Please sign in to comment.