Skip to content

Commit

Permalink
bpf: Wire up freeing of referenced kptr
Browse files Browse the repository at this point in the history
A destructor kfunc can be defined as void func(type *), where type may
be void or any other pointer type as per convenience.

In this patch, we ensure that the type is sane and capture the function
pointer into off_desc of ptr_off_tab for the specific pointer offset,
with the invariant that the dtor pointer is always set when 'kptr_ref'
tag is applied to the pointer's pointee type, which is indicated by the
flag BPF_MAP_VALUE_OFF_F_REF.

Note that only BTF IDs whose destructor kfunc is registered, thus become
the allowed BTF IDs for embedding as referenced kptr. Hence it serves
the purpose of finding dtor kfunc BTF ID, as well acting as a check
against the whitelist of allowed BTF IDs for this purpose.

Finally, wire up the actual freeing of the referenced pointer if any at
all available offsets, so that no references are leaked after the BPF
map goes away and the BPF program previously moved the ownership a
referenced pointer into it.

The behavior is similar to BPF timers, where bpf_map_{update,delete}_elem
will free any existing referenced kptr. The same case is with LRU map's
bpf_lru_push_free/htab_lru_push_free functions, which are extended to
reset unreferenced and free referenced kptr.

Note that unlike BPF timers, kptr is not reset or freed when map uref
drops to zero.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-8-memxor@gmail.com
  • Loading branch information
Kumar Kartikeya Dwivedi authored and Alexei Starovoitov committed Apr 26, 2022
1 parent 5ce937d commit 14a324f
Show file tree
Hide file tree
Showing 6 changed files with 210 additions and 25 deletions.
4 changes: 4 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <linux/percpu-refcount.h>
#include <linux/bpfptr.h>
#include <linux/btf.h>

struct bpf_verifier_env;
struct bpf_verifier_log;
Expand Down Expand Up @@ -173,6 +174,8 @@ struct bpf_map_value_off_desc {
enum bpf_kptr_type type;
struct {
struct btf *btf;
struct module *module;
btf_dtor_kfunc_t dtor;
u32 btf_id;
} kptr;
};
Expand Down Expand Up @@ -1447,6 +1450,7 @@ struct bpf_map_value_off_desc *bpf_map_kptr_off_contains(struct bpf_map *map, u3
void bpf_map_free_kptr_off_tab(struct bpf_map *map);
struct bpf_map_value_off *bpf_map_copy_kptr_off_tab(const struct bpf_map *map);
bool bpf_map_equal_kptr_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b);
void bpf_map_free_kptrs(struct bpf_map *map, void *map_value);

struct bpf_map *bpf_map_get(u32 ufd);
struct bpf_map *bpf_map_get_with_uref(u32 ufd);
Expand Down
2 changes: 2 additions & 0 deletions include/linux/btf.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ struct btf_id_dtor_kfunc {
u32 kfunc_btf_id;
};

typedef void (*btf_dtor_kfunc_t)(void *);

extern const struct file_operations btf_fops;

void btf_get(struct btf *btf);
Expand Down
18 changes: 14 additions & 4 deletions kernel/bpf/arraymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -287,10 +287,12 @@ static int array_map_get_next_key(struct bpf_map *map, void *key, void *next_key
return 0;
}

static void check_and_free_timer_in_array(struct bpf_array *arr, void *val)
static void check_and_free_fields(struct bpf_array *arr, void *val)
{
if (unlikely(map_value_has_timer(&arr->map)))
if (map_value_has_timer(&arr->map))
bpf_timer_cancel_and_free(val + arr->map.timer_off);
if (map_value_has_kptrs(&arr->map))
bpf_map_free_kptrs(&arr->map, val);
}

/* Called from syscall or from eBPF program */
Expand Down Expand Up @@ -327,7 +329,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
copy_map_value_locked(map, val, value, false);
else
copy_map_value(map, val, value);
check_and_free_timer_in_array(array, val);
check_and_free_fields(array, val);
}
return 0;
}
Expand Down Expand Up @@ -386,7 +388,8 @@ static void array_map_free_timers(struct bpf_map *map)
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;

if (likely(!map_value_has_timer(map)))
/* We don't reset or free kptr on uref dropping to zero. */
if (!map_value_has_timer(map))
return;

for (i = 0; i < array->map.max_entries; i++)
Expand All @@ -398,6 +401,13 @@ static void array_map_free_timers(struct bpf_map *map)
static void array_map_free(struct bpf_map *map)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
int i;

if (map_value_has_kptrs(map)) {
for (i = 0; i < array->map.max_entries; i++)
bpf_map_free_kptrs(map, array->value + array->elem_size * i);
bpf_map_free_kptr_off_tab(map);
}

if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
bpf_array_free_percpu(array);
Expand Down
98 changes: 97 additions & 1 deletion kernel/bpf/btf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3416,6 +3416,7 @@ struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
struct btf_field_info info_arr[BPF_MAP_VALUE_OFF_MAX];
struct bpf_map_value_off *tab;
struct btf *kernel_btf = NULL;
struct module *mod = NULL;
int ret, i, nr_off;

ret = btf_find_field(btf, t, BTF_FIELD_KPTR, info_arr, ARRAY_SIZE(info_arr));
Expand Down Expand Up @@ -3444,16 +3445,69 @@ struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
goto end;
}

/* Find and stash the function pointer for the destruction function that
* needs to be eventually invoked from the map free path.
*/
if (info_arr[i].type == BPF_KPTR_REF) {
const struct btf_type *dtor_func;
const char *dtor_func_name;
unsigned long addr;
s32 dtor_btf_id;

/* This call also serves as a whitelist of allowed objects that
* can be used as a referenced pointer and be stored in a map at
* the same time.
*/
dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id);
if (dtor_btf_id < 0) {
ret = dtor_btf_id;
goto end_btf;
}

dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id);
if (!dtor_func) {
ret = -ENOENT;
goto end_btf;
}

if (btf_is_module(kernel_btf)) {
mod = btf_try_get_module(kernel_btf);
if (!mod) {
ret = -ENXIO;
goto end_btf;
}
}

/* We already verified dtor_func to be btf_type_is_func
* in register_btf_id_dtor_kfuncs.
*/
dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off);
addr = kallsyms_lookup_name(dtor_func_name);
if (!addr) {
ret = -EINVAL;
goto end_mod;
}
tab->off[i].kptr.dtor = (void *)addr;
}

tab->off[i].offset = info_arr[i].off;
tab->off[i].type = info_arr[i].type;
tab->off[i].kptr.btf_id = id;
tab->off[i].kptr.btf = kernel_btf;
tab->off[i].kptr.module = mod;
}
tab->nr_off = nr_off;
return tab;
end_mod:
module_put(mod);
end_btf:
btf_put(kernel_btf);
end:
while (i--)
while (i--) {
btf_put(tab->off[i].kptr.btf);
if (tab->off[i].kptr.module)
module_put(tab->off[i].kptr.module);
}
kfree(tab);
return ERR_PTR(ret);
}
Expand Down Expand Up @@ -7111,6 +7165,43 @@ s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id)
return dtor->kfunc_btf_id;
}

static int btf_check_dtor_kfuncs(struct btf *btf, const struct btf_id_dtor_kfunc *dtors, u32 cnt)
{
const struct btf_type *dtor_func, *dtor_func_proto, *t;
const struct btf_param *args;
s32 dtor_btf_id;
u32 nr_args, i;

for (i = 0; i < cnt; i++) {
dtor_btf_id = dtors[i].kfunc_btf_id;

dtor_func = btf_type_by_id(btf, dtor_btf_id);
if (!dtor_func || !btf_type_is_func(dtor_func))
return -EINVAL;

dtor_func_proto = btf_type_by_id(btf, dtor_func->type);
if (!dtor_func_proto || !btf_type_is_func_proto(dtor_func_proto))
return -EINVAL;

/* Make sure the prototype of the destructor kfunc is 'void func(type *)' */
t = btf_type_by_id(btf, dtor_func_proto->type);
if (!t || !btf_type_is_void(t))
return -EINVAL;

nr_args = btf_type_vlen(dtor_func_proto);
if (nr_args != 1)
return -EINVAL;
args = btf_params(dtor_func_proto);
t = btf_type_by_id(btf, args[0].type);
/* Allow any pointer type, as width on targets Linux supports
* will be same for all pointer types (i.e. sizeof(void *))
*/
if (!t || !btf_type_is_ptr(t))
return -EINVAL;
}
return 0;
}

/* This function must be invoked only from initcalls/module init functions */
int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
struct module *owner)
Expand Down Expand Up @@ -7141,6 +7232,11 @@ int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_c
goto end;
}

/* Ensure that the prototype of dtor kfuncs being registered is sane */
ret = btf_check_dtor_kfuncs(btf, dtors, add_cnt);
if (ret < 0)
goto end;

tab = btf->dtor_kfunc_tab;
/* Only one call allowed for modules */
if (WARN_ON_ONCE(tab && btf_is_module(btf))) {
Expand Down
64 changes: 48 additions & 16 deletions kernel/bpf/hashtab.c
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab)
u32 num_entries = htab->map.max_entries;
int i;

if (likely(!map_value_has_timer(&htab->map)))
if (!map_value_has_timer(&htab->map))
return;
if (htab_has_extra_elems(htab))
num_entries += num_possible_cpus();
Expand All @@ -254,6 +254,25 @@ static void htab_free_prealloced_timers(struct bpf_htab *htab)
}
}

static void htab_free_prealloced_kptrs(struct bpf_htab *htab)
{
u32 num_entries = htab->map.max_entries;
int i;

if (!map_value_has_kptrs(&htab->map))
return;
if (htab_has_extra_elems(htab))
num_entries += num_possible_cpus();

for (i = 0; i < num_entries; i++) {
struct htab_elem *elem;

elem = get_htab_elem(htab, i);
bpf_map_free_kptrs(&htab->map, elem->key + round_up(htab->map.key_size, 8));
cond_resched();
}
}

static void htab_free_elems(struct bpf_htab *htab)
{
int i;
Expand Down Expand Up @@ -725,12 +744,15 @@ static int htab_lru_map_gen_lookup(struct bpf_map *map,
return insn - insn_buf;
}

static void check_and_free_timer(struct bpf_htab *htab, struct htab_elem *elem)
static void check_and_free_fields(struct bpf_htab *htab,
struct htab_elem *elem)
{
if (unlikely(map_value_has_timer(&htab->map)))
bpf_timer_cancel_and_free(elem->key +
round_up(htab->map.key_size, 8) +
htab->map.timer_off);
void *map_value = elem->key + round_up(htab->map.key_size, 8);

if (map_value_has_timer(&htab->map))
bpf_timer_cancel_and_free(map_value + htab->map.timer_off);
if (map_value_has_kptrs(&htab->map))
bpf_map_free_kptrs(&htab->map, map_value);
}

/* It is called from the bpf_lru_list when the LRU needs to delete
Expand All @@ -757,7 +779,7 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l == tgt_l) {
hlist_nulls_del_rcu(&l->hash_node);
check_and_free_timer(htab, l);
check_and_free_fields(htab, l);
break;
}

Expand Down Expand Up @@ -829,7 +851,7 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l)
{
if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH)
free_percpu(htab_elem_get_ptr(l, htab->map.key_size));
check_and_free_timer(htab, l);
check_and_free_fields(htab, l);
kfree(l);
}

Expand Down Expand Up @@ -857,7 +879,7 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
htab_put_fd_value(htab, l);

if (htab_is_prealloc(htab)) {
check_and_free_timer(htab, l);
check_and_free_fields(htab, l);
__pcpu_freelist_push(&htab->freelist, &l->fnode);
} else {
atomic_dec(&htab->count);
Expand Down Expand Up @@ -1104,7 +1126,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
if (!htab_is_prealloc(htab))
free_htab_elem(htab, l_old);
else
check_and_free_timer(htab, l_old);
check_and_free_fields(htab, l_old);
}
ret = 0;
err:
Expand All @@ -1114,7 +1136,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,

static void htab_lru_push_free(struct bpf_htab *htab, struct htab_elem *elem)
{
check_and_free_timer(htab, elem);
check_and_free_fields(htab, elem);
bpf_lru_push_free(&htab->lru, &elem->lru_node);
}

Expand Down Expand Up @@ -1419,8 +1441,14 @@ static void htab_free_malloced_timers(struct bpf_htab *htab)
struct hlist_nulls_node *n;
struct htab_elem *l;

hlist_nulls_for_each_entry(l, n, head, hash_node)
check_and_free_timer(htab, l);
hlist_nulls_for_each_entry(l, n, head, hash_node) {
/* We don't reset or free kptr on uref dropping to zero,
* hence just free timer.
*/
bpf_timer_cancel_and_free(l->key +
round_up(htab->map.key_size, 8) +
htab->map.timer_off);
}
cond_resched_rcu();
}
rcu_read_unlock();
Expand All @@ -1430,7 +1458,8 @@ static void htab_map_free_timers(struct bpf_map *map)
{
struct bpf_htab *htab = container_of(map, struct bpf_htab, map);

if (likely(!map_value_has_timer(&htab->map)))
/* We don't reset or free kptr on uref dropping to zero. */
if (!map_value_has_timer(&htab->map))
return;
if (!htab_is_prealloc(htab))
htab_free_malloced_timers(htab);
Expand All @@ -1453,11 +1482,14 @@ static void htab_map_free(struct bpf_map *map)
* not have executed. Wait for them.
*/
rcu_barrier();
if (!htab_is_prealloc(htab))
if (!htab_is_prealloc(htab)) {
delete_all_elements(htab);
else
} else {
htab_free_prealloced_kptrs(htab);
prealloc_destroy(htab);
}

bpf_map_free_kptr_off_tab(map);
free_percpu(htab->extra_elems);
bpf_map_area_free(htab->buckets);
for (i = 0; i < HASHTAB_MAP_LOCK_COUNT; i++)
Expand Down
Loading

0 comments on commit 14a324f

Please sign in to comment.