Skip to content

Commit

Permalink
bpf: Remove btf_field_offs, use btf_record's fields instead
Browse files Browse the repository at this point in the history
The btf_field_offs struct contains (offset, size) for btf_record fields,
sorted by offset. btf_field_offs is always used in conjunction with
btf_record, which has btf_field 'fields' array with (offset, type), the
latter of which btf_field_offs' size is derived from via
btf_field_type_size.

This patch adds a size field to struct btf_field and sorts btf_record's
fields by offset, making it possible to get rid of btf_field_offs. Less
data duplication and less code complexity results.

Since btf_field_offs' lifetime closely followed the btf_record used to
populate it, most complexity wins are from removal of initialization
code like:

  if (btf_record_successfully_initialized) {
    foffs = btf_parse_field_offs(rec);
    if (IS_ERR_OR_NULL(foffs))
      // free the btf_record and return err
  }

Other changes in this patch are pretty mechanical:

  * foffs->field_off[i] -> rec->fields[i].offset
  * foffs->field_sz[i] -> rec->fields[i].size
  * Sort rec->fields in btf_parse_fields before returning
    * It's possible that this is necessary independently of other
      changes in this patch. btf_record_find in syscall.c expects
      btf_record's fields to be sorted by offset, yet there's no
      explicit sorting of them before this patch, record's fields are
      populated in the order they're read from BTF struct definition.
      BTF docs don't say anything about the sortedness of struct fields.
  * All functions taking struct btf_field_offs * input now instead take
    struct btf_record *. All callsites of these functions already have
    access to the correct btf_record.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Link: https://lore.kernel.org/r/20230415201811.343116-2-davemarchevsky@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Dave Marchevsky authored and Alexei Starovoitov committed Apr 16, 2023
1 parent 4a1e885 commit cd2a807
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 130 deletions.
44 changes: 19 additions & 25 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ struct btf_field_graph_root {

struct btf_field {
u32 offset;
u32 size;
enum btf_field_type type;
union {
struct btf_field_kptr kptr;
Expand All @@ -225,12 +226,6 @@ struct btf_record {
struct btf_field fields[];
};

struct btf_field_offs {
u32 cnt;
u32 field_off[BTF_FIELDS_MAX];
u8 field_sz[BTF_FIELDS_MAX];
};

struct bpf_map {
/* The first two cachelines with read-mostly members of which some
* are also accessed in fast-path (e.g. ops, max_entries).
Expand All @@ -257,7 +252,6 @@ struct bpf_map {
struct obj_cgroup *objcg;
#endif
char name[BPF_OBJ_NAME_LEN];
struct btf_field_offs *field_offs;
/* The 3rd and 4th cacheline with misc members to avoid false sharing
* particularly with refcounting.
*/
Expand Down Expand Up @@ -360,14 +354,14 @@ static inline bool btf_record_has_field(const struct btf_record *rec, enum btf_f
return rec->field_mask & type;
}

static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj)
static inline void bpf_obj_init(const struct btf_record *rec, void *obj)
{
int i;

if (!foffs)
if (IS_ERR_OR_NULL(rec))
return;
for (i = 0; i < foffs->cnt; i++)
memset(obj + foffs->field_off[i], 0, foffs->field_sz[i]);
for (i = 0; i < rec->cnt; i++)
memset(obj + rec->fields[i].offset, 0, rec->fields[i].size);
}

/* 'dst' must be a temporary buffer and should not point to memory that is being
Expand All @@ -379,7 +373,7 @@ static inline void bpf_obj_init(const struct btf_field_offs *foffs, void *obj)
*/
static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
{
bpf_obj_init(map->field_offs, dst);
bpf_obj_init(map->record, dst);
}

/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
Expand All @@ -399,64 +393,64 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size)
}

/* copy everything but bpf_spin_lock, bpf_timer, and kptrs. There could be one of each. */
static inline void bpf_obj_memcpy(struct btf_field_offs *foffs,
static inline void bpf_obj_memcpy(struct btf_record *rec,
void *dst, void *src, u32 size,
bool long_memcpy)
{
u32 curr_off = 0;
int i;

if (likely(!foffs)) {
if (IS_ERR_OR_NULL(rec)) {
if (long_memcpy)
bpf_long_memcpy(dst, src, round_up(size, 8));
else
memcpy(dst, src, size);
return;
}

for (i = 0; i < foffs->cnt; i++) {
u32 next_off = foffs->field_off[i];
for (i = 0; i < rec->cnt; i++) {
u32 next_off = rec->fields[i].offset;
u32 sz = next_off - curr_off;

memcpy(dst + curr_off, src + curr_off, sz);
curr_off += foffs->field_sz[i] + sz;
curr_off += rec->fields[i].size + sz;
}
memcpy(dst + curr_off, src + curr_off, size - curr_off);
}

static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
{
bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, false);
bpf_obj_memcpy(map->record, dst, src, map->value_size, false);
}

static inline void copy_map_value_long(struct bpf_map *map, void *dst, void *src)
{
bpf_obj_memcpy(map->field_offs, dst, src, map->value_size, true);
bpf_obj_memcpy(map->record, dst, src, map->value_size, true);
}

static inline void bpf_obj_memzero(struct btf_field_offs *foffs, void *dst, u32 size)
static inline void bpf_obj_memzero(struct btf_record *rec, void *dst, u32 size)
{
u32 curr_off = 0;
int i;

if (likely(!foffs)) {
if (IS_ERR_OR_NULL(rec)) {
memset(dst, 0, size);
return;
}

for (i = 0; i < foffs->cnt; i++) {
u32 next_off = foffs->field_off[i];
for (i = 0; i < rec->cnt; i++) {
u32 next_off = rec->fields[i].offset;
u32 sz = next_off - curr_off;

memset(dst + curr_off, 0, sz);
curr_off += foffs->field_sz[i] + sz;
curr_off += rec->fields[i].size + sz;
}
memset(dst + curr_off, 0, size - curr_off);
}

static inline void zero_map_value(struct bpf_map *map, void *dst)
{
bpf_obj_memzero(map->field_offs, dst, map->value_size);
bpf_obj_memzero(map->record, dst, map->value_size);
}

void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
Expand Down
2 changes: 0 additions & 2 deletions include/linux/btf.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ struct btf_id_dtor_kfunc {
struct btf_struct_meta {
u32 btf_id;
struct btf_record *record;
struct btf_field_offs *field_offs;
};

struct btf_struct_metas {
Expand Down Expand Up @@ -207,7 +206,6 @@ int btf_find_timer(const struct btf *btf, const struct btf_type *t);
struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
u32 field_mask, u32 value_size);
int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec);
struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec);
bool btf_type_is_void(const struct btf_type *t);
s32 btf_find_by_name_kind(const struct btf *btf, const char *name, u8 kind);
const struct btf_type *btf_type_skip_modifiers(const struct btf *btf,
Expand Down
93 changes: 21 additions & 72 deletions kernel/bpf/btf.c
Original file line number Diff line number Diff line change
Expand Up @@ -1666,10 +1666,8 @@ static void btf_struct_metas_free(struct btf_struct_metas *tab)

if (!tab)
return;
for (i = 0; i < tab->cnt; i++) {
for (i = 0; i < tab->cnt; i++)
btf_record_free(tab->types[i].record);
kfree(tab->types[i].field_offs);
}
kfree(tab);
}

Expand Down Expand Up @@ -3700,12 +3698,24 @@ static int btf_parse_rb_root(const struct btf *btf, struct btf_field *field,
__alignof__(struct bpf_rb_node));
}

static int btf_field_cmp(const void *_a, const void *_b, const void *priv)
{
const struct btf_field *a = (const struct btf_field *)_a;
const struct btf_field *b = (const struct btf_field *)_b;

if (a->offset < b->offset)
return -1;
else if (a->offset > b->offset)
return 1;
return 0;
}

struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type *t,
u32 field_mask, u32 value_size)
{
struct btf_field_info info_arr[BTF_FIELDS_MAX];
u32 next_off = 0, field_type_size;
struct btf_record *rec;
u32 next_off = 0;
int ret, i, cnt;

ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr));
Expand All @@ -3725,7 +3735,8 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
rec->spin_lock_off = -EINVAL;
rec->timer_off = -EINVAL;
for (i = 0; i < cnt; i++) {
if (info_arr[i].off + btf_field_type_size(info_arr[i].type) > value_size) {
field_type_size = btf_field_type_size(info_arr[i].type);
if (info_arr[i].off + field_type_size > value_size) {
WARN_ONCE(1, "verifier bug off %d size %d", info_arr[i].off, value_size);
ret = -EFAULT;
goto end;
Expand All @@ -3734,11 +3745,12 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
ret = -EEXIST;
goto end;
}
next_off = info_arr[i].off + btf_field_type_size(info_arr[i].type);
next_off = info_arr[i].off + field_type_size;

rec->field_mask |= info_arr[i].type;
rec->fields[i].offset = info_arr[i].off;
rec->fields[i].type = info_arr[i].type;
rec->fields[i].size = field_type_size;

switch (info_arr[i].type) {
case BPF_SPIN_LOCK:
Expand Down Expand Up @@ -3808,6 +3820,9 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
goto end;
}

sort_r(rec->fields, rec->cnt, sizeof(struct btf_field), btf_field_cmp,
NULL, rec);

return rec;
end:
btf_record_free(rec);
Expand Down Expand Up @@ -3889,61 +3904,6 @@ int btf_check_and_fixup_fields(const struct btf *btf, struct btf_record *rec)
return 0;
}

static int btf_field_offs_cmp(const void *_a, const void *_b, const void *priv)
{
const u32 a = *(const u32 *)_a;
const u32 b = *(const u32 *)_b;

if (a < b)
return -1;
else if (a > b)
return 1;
return 0;
}

static void btf_field_offs_swap(void *_a, void *_b, int size, const void *priv)
{
struct btf_field_offs *foffs = (void *)priv;
u32 *off_base = foffs->field_off;
u32 *a = _a, *b = _b;
u8 *sz_a, *sz_b;

sz_a = foffs->field_sz + (a - off_base);
sz_b = foffs->field_sz + (b - off_base);

swap(*a, *b);
swap(*sz_a, *sz_b);
}

struct btf_field_offs *btf_parse_field_offs(struct btf_record *rec)
{
struct btf_field_offs *foffs;
u32 i, *off;
u8 *sz;

BUILD_BUG_ON(ARRAY_SIZE(foffs->field_off) != ARRAY_SIZE(foffs->field_sz));
if (IS_ERR_OR_NULL(rec))
return NULL;

foffs = kzalloc(sizeof(*foffs), GFP_KERNEL | __GFP_NOWARN);
if (!foffs)
return ERR_PTR(-ENOMEM);

off = foffs->field_off;
sz = foffs->field_sz;
for (i = 0; i < rec->cnt; i++) {
off[i] = rec->fields[i].offset;
sz[i] = btf_field_type_size(rec->fields[i].type);
}
foffs->cnt = rec->cnt;

if (foffs->cnt == 1)
return foffs;
sort_r(foffs->field_off, foffs->cnt, sizeof(foffs->field_off[0]),
btf_field_offs_cmp, btf_field_offs_swap, foffs);
return foffs;
}

static void __btf_struct_show(const struct btf *btf, const struct btf_type *t,
u32 type_id, void *data, u8 bits_offset,
struct btf_show *show)
Expand Down Expand Up @@ -5386,7 +5346,6 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
for (i = 1; i < n; i++) {
struct btf_struct_metas *new_tab;
const struct btf_member *member;
struct btf_field_offs *foffs;
struct btf_struct_meta *type;
struct btf_record *record;
const struct btf_type *t;
Expand Down Expand Up @@ -5428,17 +5387,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
goto free;
}
foffs = btf_parse_field_offs(record);
/* We need the field_offs to be valid for a valid record,
* either both should be set or both should be unset.
*/
if (IS_ERR_OR_NULL(foffs)) {
btf_record_free(record);
ret = -EFAULT;
goto free;
}
type->record = record;
type->field_offs = foffs;
tab->cnt++;
}
return tab;
Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1893,7 +1893,7 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
if (!p)
return NULL;
if (meta)
bpf_obj_init(meta->field_offs, p);
bpf_obj_init(meta->record, p);
return p;
}

Expand Down
15 changes: 0 additions & 15 deletions kernel/bpf/map_in_map.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
ret = PTR_ERR(inner_map_meta->record);
goto free;
}
if (inner_map_meta->record) {
struct btf_field_offs *field_offs;
/* If btf_record is !IS_ERR_OR_NULL, then field_offs is always
* valid.
*/
field_offs = kmemdup(inner_map->field_offs, sizeof(*inner_map->field_offs), GFP_KERNEL | __GFP_NOWARN);
if (!field_offs) {
ret = -ENOMEM;
goto free_rec;
}
inner_map_meta->field_offs = field_offs;
}
/* Note: We must use the same BTF, as we also used btf_record_dup above
* which relies on BTF being same for both maps, as some members like
* record->fields.list_head have pointers like value_rec pointing into
Expand All @@ -88,8 +76,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)

fdput(f);
return inner_map_meta;
free_rec:
btf_record_free(inner_map_meta->record);
free:
kfree(inner_map_meta);
put:
Expand All @@ -99,7 +85,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)

void bpf_map_meta_free(struct bpf_map *map_meta)
{
kfree(map_meta->field_offs);
bpf_map_free_record(map_meta);
btf_put(map_meta->btf);
kfree(map_meta);
Expand Down
Loading

0 comments on commit cd2a807

Please sign in to comment.