Skip to content

Commit

Permalink
Merge branch 'bpf-map-prealloc'
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
bpf: map pre-alloc

v1->v2:
. fix few issues spotted by Daniel
. converted stackmap into pre-allocation as well
. added a workaround for lockdep false positive
. added pcpu_freelist_populate to be used by hashmap and stackmap

this path set switches bpf hash map to use pre-allocation by default
and introduces BPF_F_NO_PREALLOC flag to keep old behavior for cases
where full map pre-allocation is too memory expensive.

Some time back Daniel Wagner reported crashes when bpf hash map is
used to compute time intervals between preempt_disable->preempt_enable
and recently Tom Zanussi reported a dead lock in iovisor/bcc/funccount
tool if it's used to count the number of invocations of kernel
'*spin*' functions. Both problems are due to the recursive use of
slub and can only be solved by pre-allocating all map elements.

A lot of different solutions were considered. Many implemented,
but at the end pre-allocation seems to be the only feasible answer.
As far as pre-allocation goes it also was implemented 4 different ways:
- simple free-list with single lock
- percpu_ida with optimizations
- blk-mq-tag variant customized for bpf use case
- percpu_freelist
For bpf style of alloc/free patterns percpu_freelist is the best
and implemented in this patch set.
Detailed performance numbers in patch 3.
Patch 2 introduces percpu_freelist
Patch 1 fixes simple deadlocks due to missing recursion checks
Patch 5: converts stackmap to pre-allocation
Patches 6-9: prepare test infra
Patch 10: stress test for hash map infra. It attaches to spin_lock
functions and bpf_map_update/delete are called from different contexts
Patch 11: stress for bpf_get_stackid
Patch 12: map performance test

Reported-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Reported-by: Tom Zanussi <tom.zanussi@linux.intel.com>
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Mar 8, 2016
2 parents 8aba8b8 + c3f85cf commit f14b488
Show file tree
Hide file tree
Showing 20 changed files with 514 additions and 179 deletions.
6 changes: 6 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <uapi/linux/bpf.h>
#include <linux/workqueue.h>
#include <linux/file.h>
#include <linux/percpu.h>

struct bpf_map;

Expand All @@ -36,6 +37,7 @@ struct bpf_map {
u32 key_size;
u32 value_size;
u32 max_entries;
u32 map_flags;
u32 pages;
struct user_struct *user;
const struct bpf_map_ops *ops;
Expand Down Expand Up @@ -163,6 +165,8 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);

#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);

void bpf_register_prog_type(struct bpf_prog_type_list *tl);
void bpf_register_map_type(struct bpf_map_type_list *tl);

Expand All @@ -175,6 +179,7 @@ struct bpf_map *__bpf_map_get(struct fd f);
void bpf_map_inc(struct bpf_map *map, bool uref);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
int bpf_map_precharge_memlock(u32 pages);

extern int sysctl_unprivileged_bpf_disabled;

Expand All @@ -190,6 +195,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
u64 flags);
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
u64 flags);
int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);

/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
* forced to use 'long' read/writes to try to atomically copy long counters.
Expand Down
3 changes: 3 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,15 @@ enum bpf_prog_type {
#define BPF_NOEXIST 1 /* create new element if it didn't exist */
#define BPF_EXIST 2 /* update existing element */

#define BPF_F_NO_PREALLOC (1U << 0)

union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
__u32 key_size; /* size of key in bytes */
__u32 value_size; /* size of value in bytes */
__u32 max_entries; /* max number of entries in a map */
__u32 map_flags; /* prealloc or not */
};

struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
obj-y := core.o

obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
endif
2 changes: 1 addition & 1 deletion kernel/bpf/arraymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)

/* check sanity of attributes */
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size == 0)
attr->value_size == 0 || attr->map_flags)
return ERR_PTR(-EINVAL);

if (attr->value_size >= 1 << (KMALLOC_SHIFT_MAX - 1))
Expand Down
Loading

0 comments on commit f14b488

Please sign in to comment.