Skip to content

Commit

Permalink
Merge branch 'map-charge-cleanup'
Browse files Browse the repository at this point in the history
Roman Gushchin says:

====================
During my work on memcg-based memory accounting for bpf maps
I've done some cleanups and refactorings of the existing
memlock rlimit-based code. It makes it more robust, unifies
size to pages conversion, size checks and corresponding error
codes. Also it adds coverage for cgroup local storage and
socket local storage maps.

It looks like some preliminary work on the mm side might be
required to start working on the memcg-based accounting,
so I'm sending these patches as a separate patchset.
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed May 31, 2019
2 parents 576240c + c85d691 commit d168286
Show file tree
Hide file tree
Showing 14 changed files with 132 additions and 135 deletions.
15 changes: 11 additions & 4 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ struct bpf_map_ops {
u64 imm, u32 *off);
};

struct bpf_map_memory {
u32 pages;
struct user_struct *user;
};

struct bpf_map {
/* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
Expand All @@ -86,16 +91,15 @@ struct bpf_map {
u32 btf_key_type_id;
u32 btf_value_type_id;
struct btf *btf;
u32 pages;
struct bpf_map_memory memory;
bool unpriv_array;
bool frozen; /* write-once */
/* 48 bytes hole */

/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
struct user_struct *user ____cacheline_aligned;
atomic_t refcnt;
atomic_t refcnt ____cacheline_aligned;
atomic_t usercnt;
struct work_struct work;
char name[BPF_OBJ_NAME_LEN];
Expand Down Expand Up @@ -646,9 +650,12 @@ struct bpf_map *__bpf_map_get(struct fd f);
struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref);
void bpf_map_put_with_uref(struct bpf_map *map);
void bpf_map_put(struct bpf_map *map);
int bpf_map_precharge_memlock(u32 pages);
int bpf_map_charge_memlock(struct bpf_map *map, u32 pages);
void bpf_map_uncharge_memlock(struct bpf_map *map, u32 pages);
int bpf_map_charge_init(struct bpf_map_memory *mem, size_t size);
void bpf_map_charge_finish(struct bpf_map_memory *mem);
void bpf_map_charge_move(struct bpf_map_memory *dst,
struct bpf_map_memory *src);
void *bpf_map_area_alloc(size_t size, int numa_node);
void bpf_map_area_free(void *base);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
Expand Down
18 changes: 8 additions & 10 deletions kernel/bpf/arraymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
u32 elem_size, index_mask, max_entries;
bool unpriv = !capable(CAP_SYS_ADMIN);
u64 cost, array_size, mask64;
struct bpf_map_memory mem;
struct bpf_array *array;

elem_size = round_up(attr->value_size, 8);
Expand Down Expand Up @@ -116,32 +117,29 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)

/* make sure there is no u32 overflow later in round_up() */
cost = array_size;
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
if (percpu) {
if (percpu)
cost += (u64)attr->max_entries * elem_size * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
}
cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;

ret = bpf_map_precharge_memlock(cost);
ret = bpf_map_charge_init(&mem, cost);
if (ret < 0)
return ERR_PTR(ret);

/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node);
if (!array)
if (!array) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
}
array->index_mask = index_mask;
array->map.unpriv_array = unpriv;

/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
array->map.pages = cost;
bpf_map_charge_move(&array->map.memory, &mem);
array->elem_size = elem_size;

if (percpu && bpf_array_alloc_percpu(array)) {
bpf_map_charge_finish(&array->map.memory);
bpf_map_area_free(array);
return ERR_PTR(-ENOMEM);
}
Expand Down
9 changes: 4 additions & 5 deletions kernel/bpf/cpumap.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,12 +106,9 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */
cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *);
cost += cpu_map_bitmap_size(attr) * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
goto free_cmap;
cmap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;

/* Notice returns -EPERM on if map size is larger than memlock limit */
ret = bpf_map_precharge_memlock(cmap->map.pages);
ret = bpf_map_charge_init(&cmap->map.memory, cost);
if (ret) {
err = ret;
goto free_cmap;
Expand All @@ -121,7 +118,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
cmap->flush_needed = __alloc_percpu(cpu_map_bitmap_size(attr),
__alignof__(unsigned long));
if (!cmap->flush_needed)
goto free_cmap;
goto free_charge;

/* Alloc array for possible remote "destination" CPUs */
cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries *
Expand All @@ -133,6 +130,8 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
return &cmap->map;
free_percpu:
free_percpu(cmap->flush_needed);
free_charge:
bpf_map_charge_finish(&cmap->map.memory);
free_cmap:
kfree(cmap);
return ERR_PTR(err);
Expand Down
14 changes: 6 additions & 8 deletions kernel/bpf/devmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,9 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
/* make sure page count doesn't overflow */
cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *);
cost += dev_map_bitmap_size(attr) * num_possible_cpus();
if (cost >= U32_MAX - PAGE_SIZE)
goto free_dtab;

dtab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;

/* if map size is larger than memlock limit, reject it early */
err = bpf_map_precharge_memlock(dtab->map.pages);
/* if map size is larger than memlock limit, reject it */
err = bpf_map_charge_init(&dtab->map.memory, cost);
if (err)
goto free_dtab;

Expand All @@ -125,19 +121,21 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
__alignof__(unsigned long),
GFP_KERNEL | __GFP_NOWARN);
if (!dtab->flush_needed)
goto free_dtab;
goto free_charge;

dtab->netdev_map = bpf_map_area_alloc(dtab->map.max_entries *
sizeof(struct bpf_dtab_netdev *),
dtab->map.numa_node);
if (!dtab->netdev_map)
goto free_dtab;
goto free_charge;

spin_lock(&dev_map_lock);
list_add_tail_rcu(&dtab->list, &dev_map_list);
spin_unlock(&dev_map_lock);

return &dtab->map;
free_charge:
bpf_map_charge_finish(&dtab->map.memory);
free_dtab:
free_percpu(dtab->flush_needed);
kfree(dtab);
Expand Down
14 changes: 5 additions & 9 deletions kernel/bpf/hashtab.c
Original file line number Diff line number Diff line change
Expand Up @@ -360,14 +360,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
else
cost += (u64) htab->elem_size * num_possible_cpus();

if (cost >= U32_MAX - PAGE_SIZE)
/* make sure page count doesn't overflow */
goto free_htab;

htab->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;

/* if map size is larger than memlock limit, reject it early */
err = bpf_map_precharge_memlock(htab->map.pages);
/* if map size is larger than memlock limit, reject it */
err = bpf_map_charge_init(&htab->map.memory, cost);
if (err)
goto free_htab;

Expand All @@ -376,7 +370,7 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
sizeof(struct bucket),
htab->map.numa_node);
if (!htab->buckets)
goto free_htab;
goto free_charge;

if (htab->map.map_flags & BPF_F_ZERO_SEED)
htab->hashrnd = 0;
Expand Down Expand Up @@ -409,6 +403,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
prealloc_destroy(htab);
free_buckets:
bpf_map_area_free(htab->buckets);
free_charge:
bpf_map_charge_finish(&htab->map.memory);
free_htab:
kfree(htab);
return ERR_PTR(err);
Expand Down
13 changes: 10 additions & 3 deletions kernel/bpf/local_storage.c
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,8 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
{
int numa_node = bpf_map_attr_numa_node(attr);
struct bpf_cgroup_storage_map *map;
struct bpf_map_memory mem;
int ret;

if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
return ERR_PTR(-EINVAL);
Expand All @@ -290,13 +292,18 @@ static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
/* max_entries is not used and enforced to be 0 */
return ERR_PTR(-EINVAL);

ret = bpf_map_charge_init(&mem, sizeof(struct bpf_cgroup_storage_map));
if (ret < 0)
return ERR_PTR(ret);

map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
__GFP_ZERO | GFP_USER, numa_node);
if (!map)
if (!map) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
}

map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
PAGE_SIZE) >> PAGE_SHIFT;
bpf_map_charge_move(&map->map.memory, &mem);

/* copy mandatory map attributes */
bpf_map_init_from_attr(&map->map, attr);
Expand Down
8 changes: 1 addition & 7 deletions kernel/bpf/lpm_trie.c
Original file line number Diff line number Diff line change
Expand Up @@ -573,14 +573,8 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
cost_per_node = sizeof(struct lpm_trie_node) +
attr->value_size + trie->data_size;
cost += (u64) attr->max_entries * cost_per_node;
if (cost >= U32_MAX - PAGE_SIZE) {
ret = -E2BIG;
goto out_err;
}

trie->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;

ret = bpf_map_precharge_memlock(trie->map.pages);
ret = bpf_map_charge_init(&trie->map.memory, cost);
if (ret)
goto out_err;

Expand Down
13 changes: 6 additions & 7 deletions kernel/bpf/queue_stack_maps.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,29 +67,28 @@ static int queue_stack_map_alloc_check(union bpf_attr *attr)
static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr)
{
int ret, numa_node = bpf_map_attr_numa_node(attr);
struct bpf_map_memory mem = {0};
struct bpf_queue_stack *qs;
u64 size, queue_size, cost;

size = (u64) attr->max_entries + 1;
cost = queue_size = sizeof(*qs) + size * attr->value_size;
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-E2BIG);

cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;

ret = bpf_map_precharge_memlock(cost);
ret = bpf_map_charge_init(&mem, cost);
if (ret < 0)
return ERR_PTR(ret);

qs = bpf_map_area_alloc(queue_size, numa_node);
if (!qs)
if (!qs) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
}

memset(qs, 0, sizeof(*qs));

bpf_map_init_from_attr(&qs->map, attr);

qs->map.pages = cost;
bpf_map_charge_move(&qs->map.memory, &mem);
qs->size = size;

raw_spin_lock_init(&qs->lock);
Expand Down
17 changes: 7 additions & 10 deletions kernel/bpf/reuseport_array.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,32 +151,29 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
{
int err, numa_node = bpf_map_attr_numa_node(attr);
struct reuseport_array *array;
u64 cost, array_size;
struct bpf_map_memory mem;
u64 array_size;

if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);

array_size = sizeof(*array);
array_size += (u64)attr->max_entries * sizeof(struct sock *);

/* make sure there is no u32 overflow later in round_up() */
cost = array_size;
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-ENOMEM);
cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;

err = bpf_map_precharge_memlock(cost);
err = bpf_map_charge_init(&mem, array_size);
if (err)
return ERR_PTR(err);

/* allocate all map elements and zero-initialize them */
array = bpf_map_area_alloc(array_size, numa_node);
if (!array)
if (!array) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);
}

/* copy mandatory map attributes */
bpf_map_init_from_attr(&array->map, attr);
array->map.pages = cost;
bpf_map_charge_move(&array->map.memory, &mem);

return &array->map;
}
Expand Down
28 changes: 13 additions & 15 deletions kernel/bpf/stackmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
{
u32 value_size = attr->value_size;
struct bpf_stack_map *smap;
struct bpf_map_memory mem;
u64 cost, n_buckets;
int err;

Expand Down Expand Up @@ -116,40 +117,37 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
n_buckets = roundup_pow_of_two(attr->max_entries);

cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
if (cost >= U32_MAX - PAGE_SIZE)
return ERR_PTR(-E2BIG);
cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
err = bpf_map_charge_init(&mem, cost);
if (err)
return ERR_PTR(err);

smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
if (!smap)
if (!smap) {
bpf_map_charge_finish(&mem);
return ERR_PTR(-ENOMEM);

err = -E2BIG;
cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
if (cost >= U32_MAX - PAGE_SIZE)
goto free_smap;
}

bpf_map_init_from_attr(&smap->map, attr);
smap->map.value_size = value_size;
smap->n_buckets = n_buckets;
smap->map.pages = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT;

err = bpf_map_precharge_memlock(smap->map.pages);
if (err)
goto free_smap;

err = get_callchain_buffers(sysctl_perf_event_max_stack);
if (err)
goto free_smap;
goto free_charge;

err = prealloc_elems_and_freelist(smap);
if (err)
goto put_buffers;

bpf_map_charge_move(&smap->map.memory, &mem);

return &smap->map;

put_buffers:
put_callchain_buffers();
free_smap:
free_charge:
bpf_map_charge_finish(&mem);
bpf_map_area_free(smap);
return ERR_PTR(err);
}
Expand Down
Loading

0 comments on commit d168286

Please sign in to comment.