Skip to content

Commit

Permalink
Merge branch 'bpf_iter-for-map-elems'
Browse files Browse the repository at this point in the history
Yonghong Song says:

====================
Bpf iterator has been implemented for task, task_file,
bpf_map, ipv6_route, netlink, tcp and udp so far.

For map elements, there are two ways to traverse all elements from
user space:
  1. using BPF_MAP_GET_NEXT_KEY bpf subcommand to get elements
     one by one.
  2. using BPF_MAP_LOOKUP_BATCH bpf subcommand to get a batch of
     elements.
Both these approaches need to copy data from kernel to user space
in order to do inspection.

This patch implements bpf iterator for map elements.
User can have a bpf program in kernel to run with each map element,
do checking, filtering, aggregation, modifying values etc.
without copying data to user space.

Patch #1 and #2 are refactoring. Patch #3 implements readonly/readwrite
buffer support in verifier. Patches #4 - #7 implements map element
support for hash, percpu hash, lru hash lru percpu hash, array,
percpu array and sock local storage maps. Patches #8 - #9 are libbpf
and bpftool support. Patches #10 - #13 are selftests for implemented
map element iterators.

Changelogs:
  v3 -> v4:
    . fix a kasan failure triggered by a failed bpf_iter link_create,
      not just free_link but need cleanup_link. (Alexei)
  v2 -> v3:
    . rebase on top of latest bpf-next
  v1 -> v2:
    . support to modify map element values. (Alexei)
    . map key/values can be used with helper arguments
      for those arguments with ARG_PTR_TO_MEM or
      ARG_PTR_TO_INIT_MEM register type. (Alexei)
    . remove usused variable. (kernel test robot)
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Jul 26, 2020
2 parents a228a64 + 9efcc4a commit 909e446
Show file tree
Hide file tree
Showing 32 changed files with 1,687 additions and 62 deletions.
2 changes: 1 addition & 1 deletion fs/proc/proc_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ static const struct proc_ops proc_net_seq_ops = {
.proc_release = seq_release_net,
};

int bpf_iter_init_seq_net(void *priv_data)
int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux)
{
#ifdef CONFIG_NET_NS
struct seq_net_private *p = priv_data;
Expand Down
42 changes: 36 additions & 6 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,21 @@ struct btf;
struct btf_type;
struct exception_table_entry;
struct seq_operations;
struct bpf_iter_aux_info;

extern struct idr btf_idr;
extern spinlock_t btf_idr_lock;

typedef int (*bpf_iter_init_seq_priv_t)(void *private_data,
struct bpf_iter_aux_info *aux);
typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
struct bpf_iter_seq_info {
const struct seq_operations *seq_ops;
bpf_iter_init_seq_priv_t init_seq_private;
bpf_iter_fini_seq_priv_t fini_seq_private;
u32 seq_priv_size;
};

/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
/* funcs callable from userspace (via syscall) */
Expand Down Expand Up @@ -96,6 +107,9 @@ struct bpf_map_ops {
/* BTF name and id of struct allocated by map_alloc */
const char * const map_btf_name;
int *map_btf_id;

/* bpf_iter info used to open a seq_file */
const struct bpf_iter_seq_info *iter_seq_info;
};

struct bpf_map_memory {
Expand Down Expand Up @@ -342,6 +356,10 @@ enum bpf_reg_type {
PTR_TO_BTF_ID_OR_NULL, /* reg points to kernel struct or NULL */
PTR_TO_MEM, /* reg points to valid memory region */
PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */
PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */
PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */
PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */
PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */
};

/* The information passed from prog-specific *_is_valid_access
Expand Down Expand Up @@ -683,6 +701,8 @@ struct bpf_prog_aux {
u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
u32 attach_btf_id; /* in-kernel BTF type id to attach to */
u32 ctx_arg_info_size;
u32 max_rdonly_access;
u32 max_rdwr_access;
const struct bpf_ctx_arg_aux *ctx_arg_info;
struct bpf_prog *linked_prog;
bool verifier_zext; /* Zero extensions has been inserted by verifier. */
Expand Down Expand Up @@ -1189,18 +1209,21 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
extern int bpf_iter_ ## target(args); \
int __init bpf_iter_ ## target(args) { return 0; }

typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
struct bpf_iter_aux_info {
struct bpf_map *map;
};

typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog,
struct bpf_iter_aux_info *aux);

#define BPF_ITER_CTX_ARG_MAX 2
struct bpf_iter_reg {
const char *target;
const struct seq_operations *seq_ops;
bpf_iter_init_seq_priv_t init_seq_private;
bpf_iter_fini_seq_priv_t fini_seq_private;
u32 seq_priv_size;
bpf_iter_check_target_t check_target;
u32 ctx_arg_info_size;
enum bpf_iter_link_info req_linfo;
struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
const struct bpf_iter_seq_info *seq_info;
};

struct bpf_iter_meta {
Expand All @@ -1209,6 +1232,13 @@ struct bpf_iter_meta {
u64 seq_num;
};

struct bpf_iter__bpf_map_elem {
__bpf_md_ptr(struct bpf_iter_meta *, meta);
__bpf_md_ptr(struct bpf_map *, map);
__bpf_md_ptr(void *, key);
__bpf_md_ptr(void *, value);
};

int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
bool bpf_iter_prog_supported(struct bpf_prog *prog);
Expand Down
3 changes: 2 additions & 1 deletion include/linux/proc_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
void *data);
extern struct pid *tgid_pidfd_to_pid(const struct file *file);

extern int bpf_iter_init_seq_net(void *priv_data);
struct bpf_iter_aux_info;
extern int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux);
extern void bpf_iter_fini_seq_net(void *priv_data);

#ifdef CONFIG_PROC_PID_ARCH_STATUS
Expand Down
7 changes: 7 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,13 @@ enum bpf_link_type {
MAX_BPF_LINK_TYPE,
};

enum bpf_iter_link_info {
BPF_ITER_LINK_UNSPEC = 0,
BPF_ITER_LINK_MAP_FD = 1,

MAX_BPF_ITER_LINK_INFO,
};

/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
*
* NONE(default): No further bpf programs allowed in the subtree.
Expand Down
138 changes: 138 additions & 0 deletions kernel/bpf/arraymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,142 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
vma->vm_pgoff + pgoff);
}

struct bpf_iter_seq_array_map_info {
struct bpf_map *map;
void *percpu_value_buf;
u32 index;
};

static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos)
{
struct bpf_iter_seq_array_map_info *info = seq->private;
struct bpf_map *map = info->map;
struct bpf_array *array;
u32 index;

if (info->index >= map->max_entries)
return NULL;

if (*pos == 0)
++*pos;
array = container_of(map, struct bpf_array, map);
index = info->index & array->index_mask;
if (info->percpu_value_buf)
return array->pptrs[index];
return array->value + array->elem_size * index;
}

static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct bpf_iter_seq_array_map_info *info = seq->private;
struct bpf_map *map = info->map;
struct bpf_array *array;
u32 index;

++*pos;
++info->index;
if (info->index >= map->max_entries)
return NULL;

array = container_of(map, struct bpf_array, map);
index = info->index & array->index_mask;
if (info->percpu_value_buf)
return array->pptrs[index];
return array->value + array->elem_size * index;
}

static int __bpf_array_map_seq_show(struct seq_file *seq, void *v)
{
struct bpf_iter_seq_array_map_info *info = seq->private;
struct bpf_iter__bpf_map_elem ctx = {};
struct bpf_map *map = info->map;
struct bpf_iter_meta meta;
struct bpf_prog *prog;
int off = 0, cpu = 0;
void __percpu **pptr;
u32 size;

meta.seq = seq;
prog = bpf_iter_get_info(&meta, v == NULL);
if (!prog)
return 0;

ctx.meta = &meta;
ctx.map = info->map;
if (v) {
ctx.key = &info->index;

if (!info->percpu_value_buf) {
ctx.value = v;
} else {
pptr = v;
size = round_up(map->value_size, 8);
for_each_possible_cpu(cpu) {
bpf_long_memcpy(info->percpu_value_buf + off,
per_cpu_ptr(pptr, cpu),
size);
off += size;
}
ctx.value = info->percpu_value_buf;
}
}

return bpf_iter_run_prog(prog, &ctx);
}

static int bpf_array_map_seq_show(struct seq_file *seq, void *v)
{
return __bpf_array_map_seq_show(seq, v);
}

static void bpf_array_map_seq_stop(struct seq_file *seq, void *v)
{
if (!v)
(void)__bpf_array_map_seq_show(seq, NULL);
}

static int bpf_iter_init_array_map(void *priv_data,
struct bpf_iter_aux_info *aux)
{
struct bpf_iter_seq_array_map_info *seq_info = priv_data;
struct bpf_map *map = aux->map;
void *value_buf;
u32 buf_size;

if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
buf_size = round_up(map->value_size, 8) * num_possible_cpus();
value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN);
if (!value_buf)
return -ENOMEM;

seq_info->percpu_value_buf = value_buf;
}

seq_info->map = map;
return 0;
}

static void bpf_iter_fini_array_map(void *priv_data)
{
struct bpf_iter_seq_array_map_info *seq_info = priv_data;

kfree(seq_info->percpu_value_buf);
}

static const struct seq_operations bpf_array_map_seq_ops = {
.start = bpf_array_map_seq_start,
.next = bpf_array_map_seq_next,
.stop = bpf_array_map_seq_stop,
.show = bpf_array_map_seq_show,
};

static const struct bpf_iter_seq_info iter_seq_info = {
.seq_ops = &bpf_array_map_seq_ops,
.init_seq_private = bpf_iter_init_array_map,
.fini_seq_private = bpf_iter_fini_array_map,
.seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info),
};

static int array_map_btf_id;
const struct bpf_map_ops array_map_ops = {
.map_alloc_check = array_map_alloc_check,
Expand All @@ -506,6 +642,7 @@ const struct bpf_map_ops array_map_ops = {
.map_update_batch = generic_map_update_batch,
.map_btf_name = "bpf_array",
.map_btf_id = &array_map_btf_id,
.iter_seq_info = &iter_seq_info,
};

static int percpu_array_map_btf_id;
Expand All @@ -521,6 +658,7 @@ const struct bpf_map_ops percpu_array_map_ops = {
.map_check_btf = array_map_check_btf,
.map_btf_name = "bpf_array",
.map_btf_id = &percpu_array_map_btf_id,
.iter_seq_info = &iter_seq_info,
};

static int fd_array_map_alloc_check(union bpf_attr *attr)
Expand Down
Loading

0 comments on commit 909e446

Please sign in to comment.