Skip to content

Commit

Permalink
bpf: restore behavior of bpf_map_update_elem
Browse files Browse the repository at this point in the history
The introduction of pre-allocated hash elements inadvertently broke
the behavior of bpf hash maps where users expected to call
bpf_map_update_elem() without considering that the map can be full.
Some programs do:
old_value = bpf_map_lookup_elem(map, key);
if (old_value) {
  ... prepare new_value on stack ...
  bpf_map_update_elem(map, key, new_value);
}
Before pre-alloc the update() for existing element would work even
in 'map full' condition. Restore this behavior.

The above program could have updated old_value in place instead of
update() which would be faster and most programs use that approach,
but sometimes the values are large and the programs use update()
helper to do atomic replacement of the element.
Note we cannot simply update element's value in-place like percpu
hash map does and have to allocate extra num_possible_cpu elements
and use this extra reserve when the map is full.

Fixes: 6c90598 ("bpf: pre-allocate hash map elements")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Alexei Starovoitov authored and David S. Miller committed Aug 7, 2016
1 parent 5e3b724 commit a6ed3ea
Showing 1 changed file with 73 additions and 11 deletions.
84 changes: 73 additions & 11 deletions kernel/bpf/hashtab.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,29 @@ struct bpf_htab {
struct bucket *buckets;
void *elems;
struct pcpu_freelist freelist;
void __percpu *extra_elems;
atomic_t count; /* number of elements in this hashtable */
u32 n_buckets; /* number of hash buckets */
u32 elem_size; /* size of each element in bytes */
};

enum extra_elem_state {
HTAB_NOT_AN_EXTRA_ELEM = 0,
HTAB_EXTRA_ELEM_FREE,
HTAB_EXTRA_ELEM_USED
};

/* each htab element is struct htab_elem + key + value */
struct htab_elem {
union {
struct hlist_node hash_node;
struct bpf_htab *htab;
struct pcpu_freelist_node fnode;
};
struct rcu_head rcu;
union {
struct rcu_head rcu;
enum extra_elem_state state;
};
u32 hash;
char key[0] __aligned(8);
};
Expand Down Expand Up @@ -113,6 +123,23 @@ static int prealloc_elems_and_freelist(struct bpf_htab *htab)
return err;
}

static int alloc_extra_elems(struct bpf_htab *htab)
{
void __percpu *pptr;
int cpu;

pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN);
if (!pptr)
return -ENOMEM;

for_each_possible_cpu(cpu) {
((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state =
HTAB_EXTRA_ELEM_FREE;
}
htab->extra_elems = pptr;
return 0;
}

/* Called from syscall */
static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
{
Expand Down Expand Up @@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
if (percpu)
cost += (u64) round_up(htab->map.value_size, 8) *
num_possible_cpus() * htab->map.max_entries;
else
cost += (u64) htab->elem_size * num_possible_cpus();

if (cost >= U32_MAX - PAGE_SIZE)
/* make sure page count doesn't overflow */
Expand Down Expand Up @@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
raw_spin_lock_init(&htab->buckets[i].lock);
}

if (!percpu) {
err = alloc_extra_elems(htab);
if (err)
goto free_buckets;
}

if (!(attr->map_flags & BPF_F_NO_PREALLOC)) {
err = prealloc_elems_and_freelist(htab);
if (err)
goto free_buckets;
goto free_extra_elems;
}

return &htab->map;

free_extra_elems:
free_percpu(htab->extra_elems);
free_buckets:
kvfree(htab->buckets);
free_htab:
Expand Down Expand Up @@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
kfree(l);

}

static void htab_elem_free_rcu(struct rcu_head *head)
Expand All @@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head)

static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
{
if (l->state == HTAB_EXTRA_ELEM_USED) {
l->state = HTAB_EXTRA_ELEM_FREE;
return;
}

if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) {
pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
Expand All @@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)

static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
void *value, u32 key_size, u32 hash,
bool percpu, bool onallcpus)
bool percpu, bool onallcpus,
bool old_elem_exists)
{
u32 size = htab->map.value_size;
bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC);
struct htab_elem *l_new;
void __percpu *pptr;
int err = 0;

if (prealloc) {
l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist);
if (!l_new)
return ERR_PTR(-E2BIG);
err = -E2BIG;
} else {
if (atomic_inc_return(&htab->count) > htab->map.max_entries) {
atomic_dec(&htab->count);
return ERR_PTR(-E2BIG);
err = -E2BIG;
} else {
l_new = kmalloc(htab->elem_size,
GFP_ATOMIC | __GFP_NOWARN);
if (!l_new)
return ERR_PTR(-ENOMEM);
}
l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN);
if (!l_new)
return ERR_PTR(-ENOMEM);
}

if (err) {
if (!old_elem_exists)
return ERR_PTR(err);

/* if we're updating the existing element and the hash table
* is full, use per-cpu extra elems
*/
l_new = this_cpu_ptr(htab->extra_elems);
if (l_new->state != HTAB_EXTRA_ELEM_FREE)
return ERR_PTR(-E2BIG);
l_new->state = HTAB_EXTRA_ELEM_USED;
} else {
l_new->state = HTAB_NOT_AN_EXTRA_ELEM;
}

memcpy(l_new->key, key, key_size);
Expand Down Expand Up @@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
if (ret)
goto err;

l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false);
l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false,
!!l_old);
if (IS_ERR(l_new)) {
/* all pre-allocated elements are in use or memory exhausted */
ret = PTR_ERR(l_new);
Expand Down Expand Up @@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
}
} else {
l_new = alloc_htab_elem(htab, key, value, key_size,
hash, true, onallcpus);
hash, true, onallcpus, false);
if (IS_ERR(l_new)) {
ret = PTR_ERR(l_new);
goto err;
Expand Down Expand Up @@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map)
htab_free_elems(htab);
pcpu_freelist_destroy(&htab->freelist);
}
free_percpu(htab->extra_elems);
kvfree(htab->buckets);
kfree(htab);
}
Expand Down

0 comments on commit a6ed3ea

Please sign in to comment.