Skip to content

Commit

Permalink
Merge branch 'bpf-per-cpu-cgroup-storage'
Browse files Browse the repository at this point in the history
Roman Gushchin says:

====================
This patchset implements per-cpu cgroup local storage and provides
an example how per-cpu and shared cgroup local storage can be used
for efficient accounting of network traffic.

v4->v3:
  1) incorporated Alexei's feedback

v3->v2:
  1) incorporated Song's feedback
  2) rebased on top of current bpf-next

v2->v1:
  1) added a selftest implementing network counters
  2) added a missing free() in cgroup local storage selftest
====================

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
  • Loading branch information
Daniel Borkmann committed Oct 1, 2018
2 parents 5bf7a60 + 371e4fc commit cb86d0f
Show file tree
Hide file tree
Showing 20 changed files with 786 additions and 99 deletions.
55 changes: 43 additions & 12 deletions include/linux/bpf-cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#ifndef _BPF_CGROUP_H
#define _BPF_CGROUP_H

#include <linux/bpf.h>
#include <linux/errno.h>
#include <linux/jump_label.h>
#include <linux/percpu.h>
Expand All @@ -22,7 +23,11 @@ struct bpf_cgroup_storage;
extern struct static_key_false cgroup_bpf_enabled_key;
#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)

DECLARE_PER_CPU(void*, bpf_cgroup_storage);
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);

#define for_each_cgroup_storage_type(stype) \
for (stype = 0; stype < MAX_BPF_CGROUP_STORAGE_TYPE; stype++)

struct bpf_cgroup_storage_map;

Expand All @@ -32,7 +37,10 @@ struct bpf_storage_buffer {
};

struct bpf_cgroup_storage {
struct bpf_storage_buffer *buf;
union {
struct bpf_storage_buffer *buf;
void __percpu *percpu_buf;
};
struct bpf_cgroup_storage_map *map;
struct bpf_cgroup_storage_key key;
struct list_head list;
Expand All @@ -43,7 +51,7 @@ struct bpf_cgroup_storage {
struct bpf_prog_list {
struct list_head node;
struct bpf_prog *prog;
struct bpf_cgroup_storage *storage;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
};

struct bpf_prog_array;
Expand Down Expand Up @@ -101,18 +109,26 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
short access, enum bpf_attach_type type);

static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage)
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
struct bpf_map *map)
{
struct bpf_storage_buffer *buf;
if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
return BPF_CGROUP_STORAGE_PERCPU;

return BPF_CGROUP_STORAGE_SHARED;
}

if (!storage)
return;
static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage
*storage[MAX_BPF_CGROUP_STORAGE_TYPE])
{
enum bpf_cgroup_storage_type stype;

buf = READ_ONCE(storage->buf);
this_cpu_write(bpf_cgroup_storage, &buf->data[0]);
for_each_cgroup_storage_type(stype)
this_cpu_write(bpf_cgroup_storage[stype], storage[stype]);
}

struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog);
struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog,
enum bpf_cgroup_storage_type stype);
void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage);
void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
struct cgroup *cgroup,
Expand All @@ -121,6 +137,10 @@ void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage);
int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *map);
void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *map);

int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
void *value, u64 flags);

/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
({ \
Expand Down Expand Up @@ -265,15 +285,24 @@ static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
return -EINVAL;
}

static inline void bpf_cgroup_storage_set(struct bpf_cgroup_storage *storage) {}
static inline void bpf_cgroup_storage_set(
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE]) {}
static inline int bpf_cgroup_storage_assign(struct bpf_prog *prog,
struct bpf_map *map) { return 0; }
static inline void bpf_cgroup_storage_release(struct bpf_prog *prog,
struct bpf_map *map) {}
static inline struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(
struct bpf_prog *prog) { return 0; }
struct bpf_prog *prog, enum bpf_cgroup_storage_type stype) { return 0; }
static inline void bpf_cgroup_storage_free(
struct bpf_cgroup_storage *storage) {}
static inline int bpf_percpu_cgroup_storage_copy(struct bpf_map *map, void *key,
void *value) {
return 0;
}
static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
void *key, void *value, u64 flags) {
return 0;
}

#define cgroup_bpf_enabled (0)
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
Expand All @@ -293,6 +322,8 @@ static inline void bpf_cgroup_storage_free(
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })

#define for_each_cgroup_storage_type(stype) for (; false; )

#endif /* CONFIG_CGROUP_BPF */

#endif /* _BPF_CGROUP_H */
12 changes: 10 additions & 2 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,14 @@ struct bpf_prog_offload {
u32 jited_len;
};

enum bpf_cgroup_storage_type {
BPF_CGROUP_STORAGE_SHARED,
BPF_CGROUP_STORAGE_PERCPU,
__BPF_CGROUP_STORAGE_MAX
};

#define MAX_BPF_CGROUP_STORAGE_TYPE __BPF_CGROUP_STORAGE_MAX

struct bpf_prog_aux {
atomic_t refcnt;
u32 used_map_cnt;
Expand All @@ -289,7 +297,7 @@ struct bpf_prog_aux {
struct bpf_prog *prog;
struct user_struct *user;
u64 load_time; /* ns since boottime */
struct bpf_map *cgroup_storage;
struct bpf_map *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
char name[BPF_OBJ_NAME_LEN];
#ifdef CONFIG_SECURITY
void *security;
Expand Down Expand Up @@ -358,7 +366,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
*/
struct bpf_prog_array_item {
struct bpf_prog *prog;
struct bpf_cgroup_storage *cgroup_storage;
struct bpf_cgroup_storage *cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE];
};

struct bpf_prog_array {
Expand Down
1 change: 1 addition & 0 deletions include/linux/bpf_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops)
#endif
BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops)
Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_SOCKHASH,
BPF_MAP_TYPE_CGROUP_STORAGE,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
};

enum bpf_prog_type {
Expand Down
74 changes: 52 additions & 22 deletions kernel/bpf/cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ EXPORT_SYMBOL(cgroup_bpf_enabled_key);
*/
void cgroup_bpf_put(struct cgroup *cgrp)
{
enum bpf_cgroup_storage_type stype;
unsigned int type;

for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
Expand All @@ -34,8 +35,10 @@ void cgroup_bpf_put(struct cgroup *cgrp)
list_for_each_entry_safe(pl, tmp, progs, node) {
list_del(&pl->node);
bpf_prog_put(pl->prog);
bpf_cgroup_storage_unlink(pl->storage);
bpf_cgroup_storage_free(pl->storage);
for_each_cgroup_storage_type(stype) {
bpf_cgroup_storage_unlink(pl->storage[stype]);
bpf_cgroup_storage_free(pl->storage[stype]);
}
kfree(pl);
static_branch_dec(&cgroup_bpf_enabled_key);
}
Expand Down Expand Up @@ -97,6 +100,7 @@ static int compute_effective_progs(struct cgroup *cgrp,
enum bpf_attach_type type,
struct bpf_prog_array __rcu **array)
{
enum bpf_cgroup_storage_type stype;
struct bpf_prog_array *progs;
struct bpf_prog_list *pl;
struct cgroup *p = cgrp;
Expand Down Expand Up @@ -125,7 +129,9 @@ static int compute_effective_progs(struct cgroup *cgrp,
continue;

progs->items[cnt].prog = pl->prog;
progs->items[cnt].cgroup_storage = pl->storage;
for_each_cgroup_storage_type(stype)
progs->items[cnt].cgroup_storage[stype] =
pl->storage[stype];
cnt++;
}
} while ((p = cgroup_parent(p)));
Expand Down Expand Up @@ -232,7 +238,9 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
{
struct list_head *progs = &cgrp->bpf.progs[type];
struct bpf_prog *old_prog = NULL;
struct bpf_cgroup_storage *storage, *old_storage = NULL;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE],
*old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL};
enum bpf_cgroup_storage_type stype;
struct bpf_prog_list *pl;
bool pl_was_allocated;
int err;
Expand All @@ -254,47 +262,60 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
return -E2BIG;

storage = bpf_cgroup_storage_alloc(prog);
if (IS_ERR(storage))
return -ENOMEM;
for_each_cgroup_storage_type(stype) {
storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
if (IS_ERR(storage[stype])) {
storage[stype] = NULL;
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
return -ENOMEM;
}
}

if (flags & BPF_F_ALLOW_MULTI) {
list_for_each_entry(pl, progs, node) {
if (pl->prog == prog) {
/* disallow attaching the same prog twice */
bpf_cgroup_storage_free(storage);
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
return -EINVAL;
}
}

pl = kmalloc(sizeof(*pl), GFP_KERNEL);
if (!pl) {
bpf_cgroup_storage_free(storage);
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
return -ENOMEM;
}

pl_was_allocated = true;
pl->prog = prog;
pl->storage = storage;
for_each_cgroup_storage_type(stype)
pl->storage[stype] = storage[stype];
list_add_tail(&pl->node, progs);
} else {
if (list_empty(progs)) {
pl = kmalloc(sizeof(*pl), GFP_KERNEL);
if (!pl) {
bpf_cgroup_storage_free(storage);
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_free(storage[stype]);
return -ENOMEM;
}
pl_was_allocated = true;
list_add_tail(&pl->node, progs);
} else {
pl = list_first_entry(progs, typeof(*pl), node);
old_prog = pl->prog;
old_storage = pl->storage;
bpf_cgroup_storage_unlink(old_storage);
for_each_cgroup_storage_type(stype) {
old_storage[stype] = pl->storage[stype];
bpf_cgroup_storage_unlink(old_storage[stype]);
}
pl_was_allocated = false;
}
pl->prog = prog;
pl->storage = storage;
for_each_cgroup_storage_type(stype)
pl->storage[stype] = storage[stype];
}

cgrp->bpf.flags[type] = flags;
Expand All @@ -304,21 +325,27 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
goto cleanup;

static_branch_inc(&cgroup_bpf_enabled_key);
if (old_storage)
bpf_cgroup_storage_free(old_storage);
for_each_cgroup_storage_type(stype) {
if (!old_storage[stype])
continue;
bpf_cgroup_storage_free(old_storage[stype]);
}
if (old_prog) {
bpf_prog_put(old_prog);
static_branch_dec(&cgroup_bpf_enabled_key);
}
bpf_cgroup_storage_link(storage, cgrp, type);
for_each_cgroup_storage_type(stype)
bpf_cgroup_storage_link(storage[stype], cgrp, type);
return 0;

cleanup:
/* and cleanup the prog list */
pl->prog = old_prog;
bpf_cgroup_storage_free(pl->storage);
pl->storage = old_storage;
bpf_cgroup_storage_link(old_storage, cgrp, type);
for_each_cgroup_storage_type(stype) {
bpf_cgroup_storage_free(pl->storage[stype]);
pl->storage[stype] = old_storage[stype];
bpf_cgroup_storage_link(old_storage[stype], cgrp, type);
}
if (pl_was_allocated) {
list_del(&pl->node);
kfree(pl);
Expand All @@ -339,6 +366,7 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 unused_flags)
{
struct list_head *progs = &cgrp->bpf.progs[type];
enum bpf_cgroup_storage_type stype;
u32 flags = cgrp->bpf.flags[type];
struct bpf_prog *old_prog = NULL;
struct bpf_prog_list *pl;
Expand Down Expand Up @@ -385,8 +413,10 @@ int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,

/* now can actually delete it from this cgroup list */
list_del(&pl->node);
bpf_cgroup_storage_unlink(pl->storage);
bpf_cgroup_storage_free(pl->storage);
for_each_cgroup_storage_type(stype) {
bpf_cgroup_storage_unlink(pl->storage[stype]);
bpf_cgroup_storage_free(pl->storage[stype]);
}
kfree(pl);
if (list_empty(progs))
/* last program was detached, reset flags to zero */
Expand Down
25 changes: 19 additions & 6 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -194,16 +194,28 @@ const struct bpf_func_proto bpf_get_current_cgroup_id_proto = {
.ret_type = RET_INTEGER,
};

DECLARE_PER_CPU(void*, bpf_cgroup_storage);
#ifdef CONFIG_CGROUP_BPF
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);

BPF_CALL_2(bpf_get_local_storage, struct bpf_map *, map, u64, flags)
{
/* map and flags arguments are not used now,
* but provide an ability to extend the API
* for other types of local storages.
* verifier checks that their values are correct.
/* flags argument is not used now,
* but provides an ability to extend the API.
* verifier checks that its value is correct.
*/
return (unsigned long) this_cpu_read(bpf_cgroup_storage);
enum bpf_cgroup_storage_type stype = cgroup_storage_type(map);
struct bpf_cgroup_storage *storage;
void *ptr;

storage = this_cpu_read(bpf_cgroup_storage[stype]);

if (stype == BPF_CGROUP_STORAGE_SHARED)
ptr = &READ_ONCE(storage->buf)->data[0];
else
ptr = this_cpu_ptr(storage->percpu_buf);

return (unsigned long)ptr;
}

const struct bpf_func_proto bpf_get_local_storage_proto = {
Expand All @@ -214,3 +226,4 @@ const struct bpf_func_proto bpf_get_local_storage_proto = {
.arg2_type = ARG_ANYTHING,
};
#endif
#endif
Loading

0 comments on commit cb86d0f

Please sign in to comment.