Skip to content

Commit

Permalink
Merge branch 'bpf-add-support-for-local-percpu-kptr'
Browse files Browse the repository at this point in the history
Yonghong Song says:

====================
bpf: Add support for local percpu kptr

Patch set [1] implemented cgroup local storage BPF_MAP_TYPE_CGRP_STORAGE
similar to sk/task/inode local storage and old BPF_MAP_TYPE_CGROUP_STORAGE
map is marked as deprecated since old BPF_MAP_TYPE_CGROUP_STORAGE map can
only work with current cgroup.

Similarly, the existing BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE map
is a percpu version of BPF_MAP_TYPE_CGROUP_STORAGE and only works
with current cgroup. But there is no replacement which can work
with arbitrary cgroup.

This patch set solved this problem but adding support for local
percpu kptr. The map value can have a percpu kptr field which holds
a bpf prog allocated percpu data. The below is an example,

  struct percpu_val_t {
    ... fields ...
  }

  struct map_value_t {
    struct percpu_val_t __percpu_kptr *percpu_data_ptr;
  }

In the above, 'map_value_t' is the map value type for a
BPF_MAP_TYPE_CGRP_STORAGE map. User can access 'percpu_data_ptr'
and then read/write percpu data. This covers BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
and more. So BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE map type
is marked as deprecated.

In additional, local percpu kptr supports the same map type
as other kptrs including hash, lru_hash, array, sk/inode/task/cgrp
local storage. Currently, percpu data structure does not support
non-scalars or special fields (e.g., bpf_spin_lock, bpf_rb_root, etc.).
They can be supported in the future if there exist use cases.

Please for individual patches for details.

  [1] https://lore.kernel.org/all/20221026042835.672317-1-yhs@fb.com/

Changelog:
  v2 -> v3:
    - fix libbpf_str test failure.
  v1 -> v2:
    - does not support special fields in percpu data structure.
    - rename __percpu attr to __percpu_kptr attr.
    - rename BPF_KPTR_PERCPU_REF to BPF_KPTR_PERCPU.
    - better code to handle bpf_{this,per}_cpu_ptr() helpers.
    - add more negative tests.
    - fix a bpftool related test failure.
====================

Link: https://lore.kernel.org/r/20230827152729.1995219-1-yonghong.song@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Sep 8, 2023
2 parents 3903802 + 9bc95a9 commit 1e4a6d9
Show file tree
Hide file tree
Showing 19 changed files with 853 additions and 54 deletions.
22 changes: 14 additions & 8 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ struct cgroup;
extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;
extern struct kobject *btf_kobj;
extern struct bpf_mem_alloc bpf_global_ma;
extern bool bpf_global_ma_set;
extern struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
extern bool bpf_global_ma_set, bpf_global_percpu_ma_set;

typedef u64 (*bpf_callback_t)(u64, u64, u64, u64, u64);
typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
Expand Down Expand Up @@ -180,14 +180,15 @@ enum btf_field_type {
BPF_TIMER = (1 << 1),
BPF_KPTR_UNREF = (1 << 2),
BPF_KPTR_REF = (1 << 3),
BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF,
BPF_LIST_HEAD = (1 << 4),
BPF_LIST_NODE = (1 << 5),
BPF_RB_ROOT = (1 << 6),
BPF_RB_NODE = (1 << 7),
BPF_KPTR_PERCPU = (1 << 4),
BPF_KPTR = BPF_KPTR_UNREF | BPF_KPTR_REF | BPF_KPTR_PERCPU,
BPF_LIST_HEAD = (1 << 5),
BPF_LIST_NODE = (1 << 6),
BPF_RB_ROOT = (1 << 7),
BPF_RB_NODE = (1 << 8),
BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
BPF_RB_NODE | BPF_RB_ROOT,
BPF_REFCOUNT = (1 << 8),
BPF_REFCOUNT = (1 << 9),
};

typedef void (*btf_dtor_kfunc_t)(void *);
Expand Down Expand Up @@ -300,6 +301,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
return "kptr";
case BPF_KPTR_PERCPU:
return "percpu_kptr";
case BPF_LIST_HEAD:
return "bpf_list_head";
case BPF_LIST_NODE:
Expand All @@ -325,6 +328,7 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
return sizeof(struct bpf_timer);
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
return sizeof(u64);
case BPF_LIST_HEAD:
return sizeof(struct bpf_list_head);
Expand All @@ -351,6 +355,7 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
return __alignof__(struct bpf_timer);
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
return __alignof__(u64);
case BPF_LIST_HEAD:
return __alignof__(struct bpf_list_head);
Expand Down Expand Up @@ -389,6 +394,7 @@ static inline void bpf_obj_init_field(const struct btf_field *field, void *addr)
case BPF_TIMER:
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
break;
default:
WARN_ON_ONCE(1);
Expand Down
1 change: 1 addition & 0 deletions include/linux/bpf_verifier.h
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ struct bpf_insn_aux_data {
bool zext_dst; /* this insn zero extends dst reg */
bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
u8 alu_state; /* used in combination with alu_limit */

/* below fields are initialized once */
Expand Down
9 changes: 8 additions & 1 deletion include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,14 @@ enum bpf_map_type {
*/
BPF_MAP_TYPE_CGROUP_STORAGE = BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED,
BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
/* BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE is available to bpf programs
* attaching to a cgroup. The new mechanism (BPF_MAP_TYPE_CGRP_STORAGE +
* local percpu kptr) supports all BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
* functionality and more. So mark * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE
* deprecated.
*/
BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED,
BPF_MAP_TYPE_QUEUE,
BPF_MAP_TYPE_STACK,
BPF_MAP_TYPE_SK_STORAGE,
Expand Down
5 changes: 5 additions & 0 deletions kernel/bpf/btf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3293,6 +3293,8 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
type = BPF_KPTR_UNREF;
else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
type = BPF_KPTR_REF;
else if (!strcmp("percpu_kptr", __btf_name_by_offset(btf, t->name_off)))
type = BPF_KPTR_PERCPU;
else
return -EINVAL;

Expand Down Expand Up @@ -3457,6 +3459,7 @@ static int btf_find_struct_field(const struct btf *btf,
break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
ret = btf_find_kptr(btf, member_type, off, sz,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
Expand Down Expand Up @@ -3523,6 +3526,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
ret = btf_find_kptr(btf, var_type, off, sz,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
Expand Down Expand Up @@ -3783,6 +3787,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
if (ret < 0)
goto end;
Expand Down
8 changes: 5 additions & 3 deletions kernel/bpf/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
#define OFF insn->off
#define IMM insn->imm

struct bpf_mem_alloc bpf_global_ma;
bool bpf_global_ma_set;
struct bpf_mem_alloc bpf_global_ma, bpf_global_percpu_ma;
bool bpf_global_ma_set, bpf_global_percpu_ma_set;

/* No hurry in this branch
*
Expand Down Expand Up @@ -2921,7 +2921,9 @@ static int __init bpf_global_ma_init(void)

ret = bpf_mem_alloc_init(&bpf_global_ma, 0, false);
bpf_global_ma_set = !ret;
return ret;
ret = bpf_mem_alloc_init(&bpf_global_percpu_ma, 0, true);
bpf_global_percpu_ma_set = !ret;
return !bpf_global_ma_set || !bpf_global_percpu_ma_set;
}
late_initcall(bpf_global_ma_init);
#endif
Expand Down
16 changes: 16 additions & 0 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1902,6 +1902,14 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
return p;
}

__bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
{
u64 size = local_type_id__k;

/* The verifier has ensured that meta__ign must be NULL */
return bpf_mem_alloc(&bpf_global_percpu_ma, size);
}

/* Must be called under migrate_disable(), as required by bpf_mem_free */
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
{
Expand Down Expand Up @@ -1930,6 +1938,12 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
__bpf_obj_drop_impl(p, meta ? meta->record : NULL);
}

__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
{
/* The verifier has ensured that meta__ign must be NULL */
bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc);
}

__bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
{
struct btf_struct_meta *meta = meta__ign;
Expand Down Expand Up @@ -2442,7 +2456,9 @@ BTF_SET8_START(generic_btf_ids)
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_push_front_impl)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
Expand Down
14 changes: 6 additions & 8 deletions kernel/bpf/memalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -499,15 +499,16 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
struct obj_cgroup *objcg = NULL;
int cpu, i, unit_size, percpu_size = 0;

/* room for llist_node and per-cpu pointer */
if (percpu)
percpu_size = LLIST_NODE_SZ + sizeof(void *);

if (size) {
pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL);
if (!pc)
return -ENOMEM;

if (percpu)
/* room for llist_node and per-cpu pointer */
percpu_size = LLIST_NODE_SZ + sizeof(void *);
else
if (!percpu)
size += LLIST_NODE_SZ; /* room for llist_node */
unit_size = size;

Expand All @@ -527,10 +528,6 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
return 0;
}

/* size == 0 && percpu is an invalid combination */
if (WARN_ON_ONCE(percpu))
return -EINVAL;

pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL);
if (!pcc)
return -ENOMEM;
Expand All @@ -543,6 +540,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu)
c = &cc->cache[i];
c->unit_size = sizes[i];
c->objcg = objcg;
c->percpu_size = percpu_size;
c->tgt = c;
prefill_mem_cache(c, cpu);
}
Expand Down
4 changes: 4 additions & 0 deletions kernel/bpf/syscall.c
Original file line number Diff line number Diff line change
Expand Up @@ -514,6 +514,7 @@ void btf_record_free(struct btf_record *rec)
switch (rec->fields[i].type) {
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
if (rec->fields[i].kptr.module)
module_put(rec->fields[i].kptr.module);
btf_put(rec->fields[i].kptr.btf);
Expand Down Expand Up @@ -560,6 +561,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
switch (fields[i].type) {
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
btf_get(fields[i].kptr.btf);
if (fields[i].kptr.module && !try_module_get(fields[i].kptr.module)) {
ret = -ENXIO;
Expand Down Expand Up @@ -650,6 +652,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
WRITE_ONCE(*(u64 *)field_ptr, 0);
break;
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0);
if (!xchgd_field)
break;
Expand Down Expand Up @@ -1045,6 +1048,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
break;
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
case BPF_KPTR_PERCPU:
case BPF_REFCOUNT:
if (map->map_type != BPF_MAP_TYPE_HASH &&
map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
Expand Down
Loading

0 comments on commit 1e4a6d9

Please sign in to comment.