diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index dba63b2429f05..ed8a6306990c4 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -98,7 +98,7 @@ static const struct proc_ops proc_net_seq_ops = { .proc_release = seq_release_net, }; -int bpf_iter_init_seq_net(void *priv_data) +int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux) { #ifdef CONFIG_NET_NS struct seq_net_private *p = priv_data; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 72221aea1c60c..4175cf1f46657 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -33,10 +33,21 @@ struct btf; struct btf_type; struct exception_table_entry; struct seq_operations; +struct bpf_iter_aux_info; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; +typedef int (*bpf_iter_init_seq_priv_t)(void *private_data, + struct bpf_iter_aux_info *aux); +typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); +struct bpf_iter_seq_info { + const struct seq_operations *seq_ops; + bpf_iter_init_seq_priv_t init_seq_private; + bpf_iter_fini_seq_priv_t fini_seq_private; + u32 seq_priv_size; +}; + /* map is generic key/value storage optionally accesible by eBPF programs */ struct bpf_map_ops { /* funcs callable from userspace (via syscall) */ @@ -96,6 +107,9 @@ struct bpf_map_ops { /* BTF name and id of struct allocated by map_alloc */ const char * const map_btf_name; int *map_btf_id; + + /* bpf_iter info used to open a seq_file */ + const struct bpf_iter_seq_info *iter_seq_info; }; struct bpf_map_memory { @@ -342,6 +356,10 @@ enum bpf_reg_type { PTR_TO_BTF_ID_OR_NULL, /* reg points to kernel struct or NULL */ PTR_TO_MEM, /* reg points to valid memory region */ PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */ + PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */ + PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ + PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ + PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ }; /* The information passed from prog-specific *_is_valid_access @@ -683,6 +701,8 @@ struct bpf_prog_aux { u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ u32 attach_btf_id; /* in-kernel BTF type id to attach to */ u32 ctx_arg_info_size; + u32 max_rdonly_access; + u32 max_rdwr_access; const struct bpf_ctx_arg_aux *ctx_arg_info; struct bpf_prog *linked_prog; bool verifier_zext; /* Zero extensions has been inserted by verifier. */ @@ -1189,18 +1209,21 @@ int bpf_obj_get_user(const char __user *pathname, int flags); extern int bpf_iter_ ## target(args); \ int __init bpf_iter_ ## target(args) { return 0; } -typedef int (*bpf_iter_init_seq_priv_t)(void *private_data); -typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data); +struct bpf_iter_aux_info { + struct bpf_map *map; +}; + +typedef int (*bpf_iter_check_target_t)(struct bpf_prog *prog, + struct bpf_iter_aux_info *aux); #define BPF_ITER_CTX_ARG_MAX 2 struct bpf_iter_reg { const char *target; - const struct seq_operations *seq_ops; - bpf_iter_init_seq_priv_t init_seq_private; - bpf_iter_fini_seq_priv_t fini_seq_private; - u32 seq_priv_size; + bpf_iter_check_target_t check_target; u32 ctx_arg_info_size; + enum bpf_iter_link_info req_linfo; struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX]; + const struct bpf_iter_seq_info *seq_info; }; struct bpf_iter_meta { @@ -1209,6 +1232,13 @@ struct bpf_iter_meta { u64 seq_num; }; +struct bpf_iter__bpf_map_elem { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct bpf_map *, map); + __bpf_md_ptr(void *, key); + __bpf_md_ptr(void *, value); +}; + int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info); void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info); bool bpf_iter_prog_supported(struct bpf_prog *prog); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index d1eed1b436517..2df965cd09742 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -133,7 +133,8 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo void *data); extern struct pid *tgid_pidfd_to_pid(const struct file *file); -extern int bpf_iter_init_seq_net(void *priv_data); +struct bpf_iter_aux_info; +extern int bpf_iter_init_seq_net(void *priv_data, struct bpf_iter_aux_info *aux); extern void bpf_iter_fini_seq_net(void *priv_data); #ifdef CONFIG_PROC_PID_ARCH_STATUS diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 54d0c886e3bac..828c2f6438f27 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -246,6 +246,13 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; +enum bpf_iter_link_info { + BPF_ITER_LINK_UNSPEC = 0, + BPF_ITER_LINK_MAP_FD = 1, + + MAX_BPF_ITER_LINK_INFO, +}; + /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index c66e8273fccda..8ff419b632a65 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -487,6 +487,142 @@ static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) vma->vm_pgoff + pgoff); } +struct bpf_iter_seq_array_map_info { + struct bpf_map *map; + void *percpu_value_buf; + u32 index; +}; + +static void *bpf_array_map_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct bpf_iter_seq_array_map_info *info = seq->private; + struct bpf_map *map = info->map; + struct bpf_array *array; + u32 index; + + if (info->index >= map->max_entries) + return NULL; + + if (*pos == 0) + ++*pos; + array = container_of(map, struct bpf_array, map); + index = info->index & array->index_mask; + if (info->percpu_value_buf) + return array->pptrs[index]; + return array->value + array->elem_size * index; +} + +static void *bpf_array_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_iter_seq_array_map_info *info = seq->private; + struct bpf_map *map = info->map; + struct bpf_array *array; + u32 index; + + ++*pos; + ++info->index; + if (info->index >= map->max_entries) + return NULL; + + array = container_of(map, struct bpf_array, map); + index = info->index & array->index_mask; + if (info->percpu_value_buf) + return array->pptrs[index]; + return array->value + array->elem_size * index; +} + +static int __bpf_array_map_seq_show(struct seq_file *seq, void *v) +{ + struct bpf_iter_seq_array_map_info *info = seq->private; + struct bpf_iter__bpf_map_elem ctx = {}; + struct bpf_map *map = info->map; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + int off = 0, cpu = 0; + void __percpu **pptr; + u32 size; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, v == NULL); + if (!prog) + return 0; + + ctx.meta = &meta; + ctx.map = info->map; + if (v) { + ctx.key = &info->index; + + if (!info->percpu_value_buf) { + ctx.value = v; + } else { + pptr = v; + size = round_up(map->value_size, 8); + for_each_possible_cpu(cpu) { + bpf_long_memcpy(info->percpu_value_buf + off, + per_cpu_ptr(pptr, cpu), + size); + off += size; + } + ctx.value = info->percpu_value_buf; + } + } + + return bpf_iter_run_prog(prog, &ctx); +} + +static int bpf_array_map_seq_show(struct seq_file *seq, void *v) +{ + return __bpf_array_map_seq_show(seq, v); +} + +static void bpf_array_map_seq_stop(struct seq_file *seq, void *v) +{ + if (!v) + (void)__bpf_array_map_seq_show(seq, NULL); +} + +static int bpf_iter_init_array_map(void *priv_data, + struct bpf_iter_aux_info *aux) +{ + struct bpf_iter_seq_array_map_info *seq_info = priv_data; + struct bpf_map *map = aux->map; + void *value_buf; + u32 buf_size; + + if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) { + buf_size = round_up(map->value_size, 8) * num_possible_cpus(); + value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); + if (!value_buf) + return -ENOMEM; + + seq_info->percpu_value_buf = value_buf; + } + + seq_info->map = map; + return 0; +} + +static void bpf_iter_fini_array_map(void *priv_data) +{ + struct bpf_iter_seq_array_map_info *seq_info = priv_data; + + kfree(seq_info->percpu_value_buf); +} + +static const struct seq_operations bpf_array_map_seq_ops = { + .start = bpf_array_map_seq_start, + .next = bpf_array_map_seq_next, + .stop = bpf_array_map_seq_stop, + .show = bpf_array_map_seq_show, +}; + +static const struct bpf_iter_seq_info iter_seq_info = { + .seq_ops = &bpf_array_map_seq_ops, + .init_seq_private = bpf_iter_init_array_map, + .fini_seq_private = bpf_iter_fini_array_map, + .seq_priv_size = sizeof(struct bpf_iter_seq_array_map_info), +}; + static int array_map_btf_id; const struct bpf_map_ops array_map_ops = { .map_alloc_check = array_map_alloc_check, @@ -506,6 +642,7 @@ const struct bpf_map_ops array_map_ops = { .map_update_batch = generic_map_update_batch, .map_btf_name = "bpf_array", .map_btf_id = &array_map_btf_id, + .iter_seq_info = &iter_seq_info, }; static int percpu_array_map_btf_id; @@ -521,6 +658,7 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_check_btf = array_map_check_btf, .map_btf_name = "bpf_array", .map_btf_id = &percpu_array_map_btf_id, + .iter_seq_info = &iter_seq_info, }; static int fd_array_map_alloc_check(union bpf_attr *attr) diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index dd612b80b9fea..363b9cafc2d81 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -14,11 +14,13 @@ struct bpf_iter_target_info { struct bpf_iter_link { struct bpf_link link; + struct bpf_iter_aux_info aux; struct bpf_iter_target_info *tinfo; }; struct bpf_iter_priv_data { struct bpf_iter_target_info *tinfo; + const struct bpf_iter_seq_info *seq_info; struct bpf_prog *prog; u64 session_id; u64 seq_num; @@ -35,7 +37,8 @@ static DEFINE_MUTEX(link_mutex); /* incremented on every opened seq_file */ static atomic64_t session_id; -static int prepare_seq_file(struct file *file, struct bpf_iter_link *link); +static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, + const struct bpf_iter_seq_info *seq_info); static void bpf_iter_inc_seq_num(struct seq_file *seq) { @@ -199,11 +202,25 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, return copied; } +static const struct bpf_iter_seq_info * +__get_seq_info(struct bpf_iter_link *link) +{ + const struct bpf_iter_seq_info *seq_info; + + if (link->aux.map) { + seq_info = link->aux.map->ops->iter_seq_info; + if (seq_info) + return seq_info; + } + + return link->tinfo->reg_info->seq_info; +} + static int iter_open(struct inode *inode, struct file *file) { struct bpf_iter_link *link = inode->i_private; - return prepare_seq_file(file, link); + return prepare_seq_file(file, link, __get_seq_info(link)); } static int iter_release(struct inode *inode, struct file *file) @@ -218,8 +235,8 @@ static int iter_release(struct inode *inode, struct file *file) iter_priv = container_of(seq->private, struct bpf_iter_priv_data, target_private); - if (iter_priv->tinfo->reg_info->fini_seq_private) - iter_priv->tinfo->reg_info->fini_seq_private(seq->private); + if (iter_priv->seq_info->fini_seq_private) + iter_priv->seq_info->fini_seq_private(seq->private); bpf_prog_put(iter_priv->prog); seq->private = iter_priv; @@ -318,6 +335,11 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog) static void bpf_iter_link_release(struct bpf_link *link) { + struct bpf_iter_link *iter_link = + container_of(link, struct bpf_iter_link, link); + + if (iter_link->aux.map) + bpf_map_put_with_uref(iter_link->aux.map); } static void bpf_iter_link_dealloc(struct bpf_link *link) @@ -370,14 +392,13 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { struct bpf_link_primer link_primer; struct bpf_iter_target_info *tinfo; + struct bpf_iter_aux_info aux = {}; struct bpf_iter_link *link; + u32 prog_btf_id, target_fd; bool existed = false; - u32 prog_btf_id; + struct bpf_map *map; int err; - if (attr->link_create.target_fd || attr->link_create.flags) - return -EINVAL; - prog_btf_id = prog->aux->attach_btf_id; mutex_lock(&targets_mutex); list_for_each_entry(tinfo, &targets, list) { @@ -390,6 +411,13 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) if (!existed) return -ENOENT; + /* Make sure user supplied flags are target expected. */ + target_fd = attr->link_create.target_fd; + if (attr->link_create.flags != tinfo->reg_info->req_linfo) + return -EINVAL; + if (!attr->link_create.flags && target_fd) + return -EINVAL; + link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); if (!link) return -ENOMEM; @@ -403,21 +431,45 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) return err; } + if (tinfo->reg_info->req_linfo == BPF_ITER_LINK_MAP_FD) { + map = bpf_map_get_with_uref(target_fd); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto cleanup_link; + } + + aux.map = map; + err = tinfo->reg_info->check_target(prog, &aux); + if (err) { + bpf_map_put_with_uref(map); + goto cleanup_link; + } + + link->aux.map = map; + } + return bpf_link_settle(&link_primer); + +cleanup_link: + bpf_link_cleanup(&link_primer); + return err; } static void init_seq_meta(struct bpf_iter_priv_data *priv_data, struct bpf_iter_target_info *tinfo, + const struct bpf_iter_seq_info *seq_info, struct bpf_prog *prog) { priv_data->tinfo = tinfo; + priv_data->seq_info = seq_info; priv_data->prog = prog; priv_data->session_id = atomic64_inc_return(&session_id); priv_data->seq_num = 0; priv_data->done_stop = false; } -static int prepare_seq_file(struct file *file, struct bpf_iter_link *link) +static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, + const struct bpf_iter_seq_info *seq_info) { struct bpf_iter_priv_data *priv_data; struct bpf_iter_target_info *tinfo; @@ -433,21 +485,21 @@ static int prepare_seq_file(struct file *file, struct bpf_iter_link *link) tinfo = link->tinfo; total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + - tinfo->reg_info->seq_priv_size; - priv_data = __seq_open_private(file, tinfo->reg_info->seq_ops, + seq_info->seq_priv_size; + priv_data = __seq_open_private(file, seq_info->seq_ops, total_priv_dsize); if (!priv_data) { err = -ENOMEM; goto release_prog; } - if (tinfo->reg_info->init_seq_private) { - err = tinfo->reg_info->init_seq_private(priv_data->target_private); + if (seq_info->init_seq_private) { + err = seq_info->init_seq_private(priv_data->target_private, &link->aux); if (err) goto release_seq_file; } - init_seq_meta(priv_data, tinfo, prog); + init_seq_meta(priv_data, tinfo, seq_info, prog); seq = file->private_data; seq->private = priv_data->target_private; @@ -463,6 +515,7 @@ static int prepare_seq_file(struct file *file, struct bpf_iter_link *link) int bpf_iter_new_fd(struct bpf_link *link) { + struct bpf_iter_link *iter_link; struct file *file; unsigned int flags; int err, fd; @@ -481,8 +534,8 @@ int bpf_iter_new_fd(struct bpf_link *link) goto free_fd; } - err = prepare_seq_file(file, - container_of(link, struct bpf_iter_link, link)); + iter_link = container_of(link, struct bpf_iter_link, link); + err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link)); if (err) goto free_file; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index ee36b7f609368..0fd6bb62be3ab 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3806,6 +3806,19 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, btf_kind_str[BTF_INFO_KIND(t->info)]); return false; } + + /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */ + for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { + const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; + + if (ctx_arg_info->offset == off && + (ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL || + ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) { + info->reg_type = ctx_arg_info->reg_type; + return true; + } + } + if (t->type == 0) /* This is a pointer to void. * It is the same as scalar from the verifier safety pov. diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index d4378d7d442b9..0242767870550 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1612,6 +1612,196 @@ htab_lru_map_lookup_and_delete_batch(struct bpf_map *map, true, false); } +struct bpf_iter_seq_hash_map_info { + struct bpf_map *map; + struct bpf_htab *htab; + void *percpu_value_buf; // non-zero means percpu hash + unsigned long flags; + u32 bucket_id; + u32 skip_elems; +}; + +static struct htab_elem * +bpf_hash_map_seq_find_next(struct bpf_iter_seq_hash_map_info *info, + struct htab_elem *prev_elem) +{ + const struct bpf_htab *htab = info->htab; + unsigned long flags = info->flags; + u32 skip_elems = info->skip_elems; + u32 bucket_id = info->bucket_id; + struct hlist_nulls_head *head; + struct hlist_nulls_node *n; + struct htab_elem *elem; + struct bucket *b; + u32 i, count; + + if (bucket_id >= htab->n_buckets) + return NULL; + + /* try to find next elem in the same bucket */ + if (prev_elem) { + /* no update/deletion on this bucket, prev_elem should be still valid + * and we won't skip elements. + */ + n = rcu_dereference_raw(hlist_nulls_next_rcu(&prev_elem->hash_node)); + elem = hlist_nulls_entry_safe(n, struct htab_elem, hash_node); + if (elem) + return elem; + + /* not found, unlock and go to the next bucket */ + b = &htab->buckets[bucket_id++]; + htab_unlock_bucket(htab, b, flags); + skip_elems = 0; + } + + for (i = bucket_id; i < htab->n_buckets; i++) { + b = &htab->buckets[i]; + flags = htab_lock_bucket(htab, b); + + count = 0; + head = &b->head; + hlist_nulls_for_each_entry_rcu(elem, n, head, hash_node) { + if (count >= skip_elems) { + info->flags = flags; + info->bucket_id = i; + info->skip_elems = count; + return elem; + } + count++; + } + + htab_unlock_bucket(htab, b, flags); + skip_elems = 0; + } + + info->bucket_id = i; + info->skip_elems = 0; + return NULL; +} + +static void *bpf_hash_map_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct bpf_iter_seq_hash_map_info *info = seq->private; + struct htab_elem *elem; + + elem = bpf_hash_map_seq_find_next(info, NULL); + if (!elem) + return NULL; + + if (*pos == 0) + ++*pos; + return elem; +} + +static void *bpf_hash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct bpf_iter_seq_hash_map_info *info = seq->private; + + ++*pos; + ++info->skip_elems; + return bpf_hash_map_seq_find_next(info, v); +} + +static int __bpf_hash_map_seq_show(struct seq_file *seq, struct htab_elem *elem) +{ + struct bpf_iter_seq_hash_map_info *info = seq->private; + u32 roundup_key_size, roundup_value_size; + struct bpf_iter__bpf_map_elem ctx = {}; + struct bpf_map *map = info->map; + struct bpf_iter_meta meta; + int ret = 0, off = 0, cpu; + struct bpf_prog *prog; + void __percpu *pptr; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, elem == NULL); + if (prog) { + ctx.meta = &meta; + ctx.map = info->map; + if (elem) { + roundup_key_size = round_up(map->key_size, 8); + ctx.key = elem->key; + if (!info->percpu_value_buf) { + ctx.value = elem->key + roundup_key_size; + } else { + roundup_value_size = round_up(map->value_size, 8); + pptr = htab_elem_get_ptr(elem, map->key_size); + for_each_possible_cpu(cpu) { + bpf_long_memcpy(info->percpu_value_buf + off, + per_cpu_ptr(pptr, cpu), + roundup_value_size); + off += roundup_value_size; + } + ctx.value = info->percpu_value_buf; + } + } + ret = bpf_iter_run_prog(prog, &ctx); + } + + return ret; +} + +static int bpf_hash_map_seq_show(struct seq_file *seq, void *v) +{ + return __bpf_hash_map_seq_show(seq, v); +} + +static void bpf_hash_map_seq_stop(struct seq_file *seq, void *v) +{ + struct bpf_iter_seq_hash_map_info *info = seq->private; + + if (!v) + (void)__bpf_hash_map_seq_show(seq, NULL); + else + htab_unlock_bucket(info->htab, + &info->htab->buckets[info->bucket_id], + info->flags); +} + +static int bpf_iter_init_hash_map(void *priv_data, + struct bpf_iter_aux_info *aux) +{ + struct bpf_iter_seq_hash_map_info *seq_info = priv_data; + struct bpf_map *map = aux->map; + void *value_buf; + u32 buf_size; + + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + buf_size = round_up(map->value_size, 8) * num_possible_cpus(); + value_buf = kmalloc(buf_size, GFP_USER | __GFP_NOWARN); + if (!value_buf) + return -ENOMEM; + + seq_info->percpu_value_buf = value_buf; + } + + seq_info->map = map; + seq_info->htab = container_of(map, struct bpf_htab, map); + return 0; +} + +static void bpf_iter_fini_hash_map(void *priv_data) +{ + struct bpf_iter_seq_hash_map_info *seq_info = priv_data; + + kfree(seq_info->percpu_value_buf); +} + +static const struct seq_operations bpf_hash_map_seq_ops = { + .start = bpf_hash_map_seq_start, + .next = bpf_hash_map_seq_next, + .stop = bpf_hash_map_seq_stop, + .show = bpf_hash_map_seq_show, +}; + +static const struct bpf_iter_seq_info iter_seq_info = { + .seq_ops = &bpf_hash_map_seq_ops, + .init_seq_private = bpf_iter_init_hash_map, + .fini_seq_private = bpf_iter_fini_hash_map, + .seq_priv_size = sizeof(struct bpf_iter_seq_hash_map_info), +}; + static int htab_map_btf_id; const struct bpf_map_ops htab_map_ops = { .map_alloc_check = htab_map_alloc_check, @@ -1626,6 +1816,7 @@ const struct bpf_map_ops htab_map_ops = { BATCH_OPS(htab), .map_btf_name = "bpf_htab", .map_btf_id = &htab_map_btf_id, + .iter_seq_info = &iter_seq_info, }; static int htab_lru_map_btf_id; @@ -1643,6 +1834,7 @@ const struct bpf_map_ops htab_lru_map_ops = { BATCH_OPS(htab_lru), .map_btf_name = "bpf_htab", .map_btf_id = &htab_lru_map_btf_id, + .iter_seq_info = &iter_seq_info, }; /* Called from eBPF program */ @@ -1760,6 +1952,7 @@ const struct bpf_map_ops htab_percpu_map_ops = { BATCH_OPS(htab_percpu), .map_btf_name = "bpf_htab", .map_btf_id = &htab_percpu_map_btf_id, + .iter_seq_info = &iter_seq_info, }; static int htab_lru_percpu_map_btf_id; @@ -1775,6 +1968,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = { BATCH_OPS(htab_lru_percpu), .map_btf_name = "bpf_htab", .map_btf_id = &htab_lru_percpu_map_btf_id, + .iter_seq_info = &iter_seq_info, }; static int fd_htab_map_alloc_check(union bpf_attr *attr) diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c index 5926c76d854ed..fbe1f557cb887 100644 --- a/kernel/bpf/map_iter.c +++ b/kernel/bpf/map_iter.c @@ -81,23 +81,79 @@ static const struct seq_operations bpf_map_seq_ops = { BTF_ID_LIST(btf_bpf_map_id) BTF_ID(struct, bpf_map) -static struct bpf_iter_reg bpf_map_reg_info = { - .target = "bpf_map", +static const struct bpf_iter_seq_info bpf_map_seq_info = { .seq_ops = &bpf_map_seq_ops, .init_seq_private = NULL, .fini_seq_private = NULL, .seq_priv_size = sizeof(struct bpf_iter_seq_map_info), +}; + +static struct bpf_iter_reg bpf_map_reg_info = { + .target = "bpf_map", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_map, map), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &bpf_map_seq_info, +}; + +static int bpf_iter_check_map(struct bpf_prog *prog, + struct bpf_iter_aux_info *aux) +{ + u32 key_acc_size, value_acc_size, key_size, value_size; + struct bpf_map *map = aux->map; + bool is_percpu = false; + + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + is_percpu = true; + else if (map->map_type != BPF_MAP_TYPE_HASH && + map->map_type != BPF_MAP_TYPE_LRU_HASH && + map->map_type != BPF_MAP_TYPE_ARRAY) + return -EINVAL; + + key_acc_size = prog->aux->max_rdonly_access; + value_acc_size = prog->aux->max_rdwr_access; + key_size = map->key_size; + if (!is_percpu) + value_size = map->value_size; + else + value_size = round_up(map->value_size, 8) * num_possible_cpus(); + + if (key_acc_size > key_size || value_acc_size > value_size) + return -EACCES; + + return 0; +} + +DEFINE_BPF_ITER_FUNC(bpf_map_elem, struct bpf_iter_meta *meta, + struct bpf_map *map, void *key, void *value) + +static const struct bpf_iter_reg bpf_map_elem_reg_info = { + .target = "bpf_map_elem", + .check_target = bpf_iter_check_map, + .req_linfo = BPF_ITER_LINK_MAP_FD, + .ctx_arg_info_size = 2, + .ctx_arg_info = { + { offsetof(struct bpf_iter__bpf_map_elem, key), + PTR_TO_RDONLY_BUF_OR_NULL }, + { offsetof(struct bpf_iter__bpf_map_elem, value), + PTR_TO_RDWR_BUF_OR_NULL }, + }, }; static int __init bpf_map_iter_init(void) { + int ret; + bpf_map_reg_info.ctx_arg_info[0].btf_id = *btf_bpf_map_id; - return bpf_iter_reg_target(&bpf_map_reg_info); + ret = bpf_iter_reg_target(&bpf_map_reg_info); + if (ret) + return ret; + + return bpf_iter_reg_target(&bpf_map_elem_reg_info); } late_initcall(bpf_map_iter_init); diff --git a/kernel/bpf/prog_iter.c b/kernel/bpf/prog_iter.c index 6541b577d69f7..53a73c841c137 100644 --- a/kernel/bpf/prog_iter.c +++ b/kernel/bpf/prog_iter.c @@ -81,17 +81,21 @@ static const struct seq_operations bpf_prog_seq_ops = { BTF_ID_LIST(btf_bpf_prog_id) BTF_ID(struct, bpf_prog) -static struct bpf_iter_reg bpf_prog_reg_info = { - .target = "bpf_prog", +static const struct bpf_iter_seq_info bpf_prog_seq_info = { .seq_ops = &bpf_prog_seq_ops, .init_seq_private = NULL, .fini_seq_private = NULL, .seq_priv_size = sizeof(struct bpf_iter_seq_prog_info), +}; + +static struct bpf_iter_reg bpf_prog_reg_info = { + .target = "bpf_prog", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__bpf_prog, prog), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &bpf_prog_seq_info, }; static int __init bpf_prog_iter_init(void) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 1039e52ebd8be..232df29793e96 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -293,7 +293,7 @@ static void task_file_seq_stop(struct seq_file *seq, void *v) } } -static int init_seq_pidns(void *priv_data) +static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux) { struct bpf_iter_seq_task_common *common = priv_data; @@ -319,25 +319,32 @@ BTF_ID_LIST(btf_task_file_ids) BTF_ID(struct, task_struct) BTF_ID(struct, file) -static struct bpf_iter_reg task_reg_info = { - .target = "task", +static const struct bpf_iter_seq_info task_seq_info = { .seq_ops = &task_seq_ops, .init_seq_private = init_seq_pidns, .fini_seq_private = fini_seq_pidns, .seq_priv_size = sizeof(struct bpf_iter_seq_task_info), +}; + +static struct bpf_iter_reg task_reg_info = { + .target = "task", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__task, task), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &task_seq_info, }; -static struct bpf_iter_reg task_file_reg_info = { - .target = "task_file", +static const struct bpf_iter_seq_info task_file_seq_info = { .seq_ops = &task_file_seq_ops, .init_seq_private = init_seq_pidns, .fini_seq_private = fini_seq_pidns, .seq_priv_size = sizeof(struct bpf_iter_seq_task_file_info), +}; + +static struct bpf_iter_reg task_file_reg_info = { + .target = "task_file", .ctx_arg_info_size = 2, .ctx_arg_info = { { offsetof(struct bpf_iter__task_file, task), @@ -345,6 +352,7 @@ static struct bpf_iter_reg task_file_reg_info = { { offsetof(struct bpf_iter__task_file, file), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &task_file_seq_info, }; static int __init task_iter_init(void) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9a6703bc3f36f..8d6979db48d8c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -409,7 +409,9 @@ static bool reg_type_may_be_null(enum bpf_reg_type type) type == PTR_TO_SOCK_COMMON_OR_NULL || type == PTR_TO_TCP_SOCK_OR_NULL || type == PTR_TO_BTF_ID_OR_NULL || - type == PTR_TO_MEM_OR_NULL; + type == PTR_TO_MEM_OR_NULL || + type == PTR_TO_RDONLY_BUF_OR_NULL || + type == PTR_TO_RDWR_BUF_OR_NULL; } static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) @@ -503,6 +505,10 @@ static const char * const reg_type_str[] = { [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", [PTR_TO_MEM] = "mem", [PTR_TO_MEM_OR_NULL] = "mem_or_null", + [PTR_TO_RDONLY_BUF] = "rdonly_buf", + [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", + [PTR_TO_RDWR_BUF] = "rdwr_buf", + [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", }; static char slot_type_char[] = { @@ -2173,6 +2179,10 @@ static bool is_spillable_regtype(enum bpf_reg_type type) case PTR_TO_XDP_SOCK: case PTR_TO_BTF_ID: case PTR_TO_BTF_ID_OR_NULL: + case PTR_TO_RDONLY_BUF: + case PTR_TO_RDONLY_BUF_OR_NULL: + case PTR_TO_RDWR_BUF: + case PTR_TO_RDWR_BUF_OR_NULL: return true; default: return false; @@ -3052,14 +3062,15 @@ int check_ctx_reg(struct bpf_verifier_env *env, return 0; } -static int check_tp_buffer_access(struct bpf_verifier_env *env, - const struct bpf_reg_state *reg, - int regno, int off, int size) +static int __check_buffer_access(struct bpf_verifier_env *env, + const char *buf_info, + const struct bpf_reg_state *reg, + int regno, int off, int size) { if (off < 0) { verbose(env, - "R%d invalid tracepoint buffer access: off=%d, size=%d", - regno, off, size); + "R%d invalid %s buffer access: off=%d, size=%d", + regno, buf_info, off, size); return -EACCES; } if (!tnum_is_const(reg->var_off) || reg->var_off.value) { @@ -3071,12 +3082,45 @@ static int check_tp_buffer_access(struct bpf_verifier_env *env, regno, off, tn_buf); return -EACCES; } + + return 0; +} + +static int check_tp_buffer_access(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + int regno, int off, int size) +{ + int err; + + err = __check_buffer_access(env, "tracepoint", reg, regno, off, size); + if (err) + return err; + if (off + size > env->prog->aux->max_tp_access) env->prog->aux->max_tp_access = off + size; return 0; } +static int check_buffer_access(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + int regno, int off, int size, + bool zero_size_allowed, + const char *buf_info, + u32 *max_access) +{ + int err; + + err = __check_buffer_access(env, buf_info, reg, regno, off, size); + if (err) + return err; + + if (off + size > *max_access) + *max_access = off + size; + + return 0; +} + /* BPF architecture zero extends alu32 ops into 64-bit registesr */ static void zext_32_to_64(struct bpf_reg_state *reg) { @@ -3427,6 +3471,23 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } else if (reg->type == CONST_PTR_TO_MAP) { err = check_ptr_to_map_access(env, regs, regno, off, size, t, value_regno); + } else if (reg->type == PTR_TO_RDONLY_BUF) { + if (t == BPF_WRITE) { + verbose(env, "R%d cannot write into %s\n", + regno, reg_type_str[reg->type]); + return -EACCES; + } + err = check_buffer_access(env, reg, regno, off, size, "rdonly", + false, + &env->prog->aux->max_rdonly_access); + if (!err && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_RDWR_BUF) { + err = check_buffer_access(env, reg, regno, off, size, "rdwr", + false, + &env->prog->aux->max_rdwr_access); + if (!err && t == BPF_READ && value_regno >= 0) + mark_reg_unknown(env, regs, value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); @@ -3668,6 +3729,18 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, return check_mem_region_access(env, regno, reg->off, access_size, reg->mem_size, zero_size_allowed); + case PTR_TO_RDONLY_BUF: + if (meta && meta->raw_mode) + return -EACCES; + return check_buffer_access(env, reg, regno, reg->off, + access_size, zero_size_allowed, + "rdonly", + &env->prog->aux->max_rdonly_access); + case PTR_TO_RDWR_BUF: + return check_buffer_access(env, reg, regno, reg->off, + access_size, zero_size_allowed, + "rdwr", + &env->prog->aux->max_rdwr_access); default: /* scalar_value|ptr_to_stack or invalid ptr */ return check_stack_boundary(env, regno, access_size, zero_size_allowed, meta); @@ -3933,6 +4006,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, else if (!type_is_pkt_pointer(type) && type != PTR_TO_MAP_VALUE && type != PTR_TO_MEM && + type != PTR_TO_RDONLY_BUF && + type != PTR_TO_RDWR_BUF && type != expected_type) goto err_type; meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; @@ -6806,6 +6881,10 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, reg->type = PTR_TO_BTF_ID; } else if (reg->type == PTR_TO_MEM_OR_NULL) { reg->type = PTR_TO_MEM; + } else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) { + reg->type = PTR_TO_RDONLY_BUF; + } else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) { + reg->type = PTR_TO_RDWR_BUF; } if (is_null) { /* We don't need id and ref_obj_id from this point diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 6f921c4ddc2ce..eafcd15e7dfd7 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -1217,3 +1218,208 @@ int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, return err; } EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); + +struct bpf_iter_seq_sk_storage_map_info { + struct bpf_map *map; + unsigned int bucket_id; + unsigned skip_elems; +}; + +static struct bpf_sk_storage_elem * +bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, + struct bpf_sk_storage_elem *prev_selem) +{ + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_elem *selem; + u32 skip_elems = info->skip_elems; + struct bpf_sk_storage_map *smap; + u32 bucket_id = info->bucket_id; + u32 i, count, n_buckets; + struct bucket *b; + + smap = (struct bpf_sk_storage_map *)info->map; + n_buckets = 1U << smap->bucket_log; + if (bucket_id >= n_buckets) + return NULL; + + /* try to find next selem in the same bucket */ + selem = prev_selem; + count = 0; + while (selem) { + selem = hlist_entry_safe(selem->map_node.next, + struct bpf_sk_storage_elem, map_node); + if (!selem) { + /* not found, unlock and go to the next bucket */ + b = &smap->buckets[bucket_id++]; + raw_spin_unlock_bh(&b->lock); + skip_elems = 0; + break; + } + sk_storage = rcu_dereference_raw(selem->sk_storage); + if (sk_storage) { + info->skip_elems = skip_elems + count; + return selem; + } + count++; + } + + for (i = bucket_id; i < (1U << smap->bucket_log); i++) { + b = &smap->buckets[i]; + raw_spin_lock_bh(&b->lock); + count = 0; + hlist_for_each_entry(selem, &b->list, map_node) { + sk_storage = rcu_dereference_raw(selem->sk_storage); + if (sk_storage && count >= skip_elems) { + info->bucket_id = i; + info->skip_elems = count; + return selem; + } + count++; + } + raw_spin_unlock_bh(&b->lock); + skip_elems = 0; + } + + info->bucket_id = i; + info->skip_elems = 0; + return NULL; +} + +static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct bpf_sk_storage_elem *selem; + + selem = bpf_sk_storage_map_seq_find_next(seq->private, NULL); + if (!selem) + return NULL; + + if (*pos == 0) + ++*pos; + return selem; +} + +static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v, + loff_t *pos) +{ + struct bpf_iter_seq_sk_storage_map_info *info = seq->private; + + ++*pos; + ++info->skip_elems; + return bpf_sk_storage_map_seq_find_next(seq->private, v); +} + +struct bpf_iter__bpf_sk_storage_map { + __bpf_md_ptr(struct bpf_iter_meta *, meta); + __bpf_md_ptr(struct bpf_map *, map); + __bpf_md_ptr(struct sock *, sk); + __bpf_md_ptr(void *, value); +}; + +DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, + struct bpf_map *map, struct sock *sk, + void *value) + +static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, + struct bpf_sk_storage_elem *selem) +{ + struct bpf_iter_seq_sk_storage_map_info *info = seq->private; + struct bpf_iter__bpf_sk_storage_map ctx = {}; + struct bpf_sk_storage *sk_storage; + struct bpf_iter_meta meta; + struct bpf_prog *prog; + int ret = 0; + + meta.seq = seq; + prog = bpf_iter_get_info(&meta, selem == NULL); + if (prog) { + ctx.meta = &meta; + ctx.map = info->map; + if (selem) { + sk_storage = rcu_dereference_raw(selem->sk_storage); + ctx.sk = sk_storage->sk; + ctx.value = SDATA(selem)->data; + } + ret = bpf_iter_run_prog(prog, &ctx); + } + + return ret; +} + +static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) +{ + return __bpf_sk_storage_map_seq_show(seq, v); +} + +static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) +{ + struct bpf_iter_seq_sk_storage_map_info *info = seq->private; + struct bpf_sk_storage_map *smap; + struct bucket *b; + + if (!v) { + (void)__bpf_sk_storage_map_seq_show(seq, v); + } else { + smap = (struct bpf_sk_storage_map *)info->map; + b = &smap->buckets[info->bucket_id]; + raw_spin_unlock_bh(&b->lock); + } +} + +static int bpf_iter_init_sk_storage_map(void *priv_data, + struct bpf_iter_aux_info *aux) +{ + struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; + + seq_info->map = aux->map; + return 0; +} + +static int bpf_iter_check_map(struct bpf_prog *prog, + struct bpf_iter_aux_info *aux) +{ + struct bpf_map *map = aux->map; + + if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) + return -EINVAL; + + if (prog->aux->max_rdonly_access > map->value_size) + return -EACCES; + + return 0; +} + +static const struct seq_operations bpf_sk_storage_map_seq_ops = { + .start = bpf_sk_storage_map_seq_start, + .next = bpf_sk_storage_map_seq_next, + .stop = bpf_sk_storage_map_seq_stop, + .show = bpf_sk_storage_map_seq_show, +}; + +static const struct bpf_iter_seq_info iter_seq_info = { + .seq_ops = &bpf_sk_storage_map_seq_ops, + .init_seq_private = bpf_iter_init_sk_storage_map, + .fini_seq_private = NULL, + .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), +}; + +static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { + .target = "bpf_sk_storage_map", + .check_target = bpf_iter_check_map, + .req_linfo = BPF_ITER_LINK_MAP_FD, + .ctx_arg_info_size = 2, + .ctx_arg_info = { + { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), + PTR_TO_BTF_ID_OR_NULL }, + { offsetof(struct bpf_iter__bpf_sk_storage_map, value), + PTR_TO_RDWR_BUF_OR_NULL }, + }, + .seq_info = &iter_seq_info, +}; + +static int __init bpf_sk_storage_map_iter_init(void) +{ + bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id = + btf_sock_ids[BTF_SOCK_TYPE_SOCK]; + return bpf_iter_reg_target(&bpf_sk_storage_map_reg_info); +} +late_initcall(bpf_sk_storage_map_iter_init); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f8913923a6c05..5084333b5ab64 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2921,7 +2921,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { DEFINE_BPF_ITER_FUNC(tcp, struct bpf_iter_meta *meta, struct sock_common *sk_common, uid_t uid) -static int bpf_iter_init_tcp(void *priv_data) +static int bpf_iter_init_tcp(void *priv_data, struct bpf_iter_aux_info *aux) { struct tcp_iter_state *st = priv_data; struct tcp_seq_afinfo *afinfo; @@ -2933,7 +2933,7 @@ static int bpf_iter_init_tcp(void *priv_data) afinfo->family = AF_UNSPEC; st->bpf_seq_afinfo = afinfo; - ret = bpf_iter_init_seq_net(priv_data); + ret = bpf_iter_init_seq_net(priv_data, aux); if (ret) kfree(afinfo); return ret; @@ -2947,17 +2947,21 @@ static void bpf_iter_fini_tcp(void *priv_data) bpf_iter_fini_seq_net(priv_data); } -static struct bpf_iter_reg tcp_reg_info = { - .target = "tcp", +static const struct bpf_iter_seq_info tcp_seq_info = { .seq_ops = &bpf_iter_tcp_seq_ops, .init_seq_private = bpf_iter_init_tcp, .fini_seq_private = bpf_iter_fini_tcp, .seq_priv_size = sizeof(struct tcp_iter_state), +}; + +static struct bpf_iter_reg tcp_reg_info = { + .target = "tcp", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__tcp, sk_common), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &tcp_seq_info, }; static void __init bpf_iter_register(void) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0fb5e4ea133f0..7ce31beccfc2d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -3181,7 +3181,7 @@ static struct pernet_operations __net_initdata udp_sysctl_ops = { DEFINE_BPF_ITER_FUNC(udp, struct bpf_iter_meta *meta, struct udp_sock *udp_sk, uid_t uid, int bucket) -static int bpf_iter_init_udp(void *priv_data) +static int bpf_iter_init_udp(void *priv_data, struct bpf_iter_aux_info *aux) { struct udp_iter_state *st = priv_data; struct udp_seq_afinfo *afinfo; @@ -3194,7 +3194,7 @@ static int bpf_iter_init_udp(void *priv_data) afinfo->family = AF_UNSPEC; afinfo->udp_table = &udp_table; st->bpf_seq_afinfo = afinfo; - ret = bpf_iter_init_seq_net(priv_data); + ret = bpf_iter_init_seq_net(priv_data, aux); if (ret) kfree(afinfo); return ret; @@ -3208,17 +3208,21 @@ static void bpf_iter_fini_udp(void *priv_data) bpf_iter_fini_seq_net(priv_data); } -static struct bpf_iter_reg udp_reg_info = { - .target = "udp", +static const struct bpf_iter_seq_info udp_seq_info = { .seq_ops = &bpf_iter_udp_seq_ops, .init_seq_private = bpf_iter_init_udp, .fini_seq_private = bpf_iter_fini_udp, .seq_priv_size = sizeof(struct udp_iter_state), +}; + +static struct bpf_iter_reg udp_reg_info = { + .target = "udp", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__udp, udp_sk), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &udp_seq_info, }; static void __init bpf_iter_register(void) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 33f5efbad0a98..8bfc57b0802a9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6427,17 +6427,21 @@ DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *r BTF_ID_LIST(btf_fib6_info_id) BTF_ID(struct, fib6_info) -static struct bpf_iter_reg ipv6_route_reg_info = { - .target = "ipv6_route", +static const struct bpf_iter_seq_info ipv6_route_seq_info = { .seq_ops = &ipv6_route_seq_ops, .init_seq_private = bpf_iter_init_seq_net, .fini_seq_private = bpf_iter_fini_seq_net, .seq_priv_size = sizeof(struct ipv6_route_iter), +}; + +static struct bpf_iter_reg ipv6_route_reg_info = { + .target = "ipv6_route", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__ipv6_route, rt), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &ipv6_route_seq_info, }; static int __init bpf_iter_register(void) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index d8921b8337445..b5f30d7d30d06 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2807,17 +2807,21 @@ static const struct rhashtable_params netlink_rhashtable_params = { BTF_ID_LIST(btf_netlink_sock_id) BTF_ID(struct, netlink_sock) -static struct bpf_iter_reg netlink_reg_info = { - .target = "netlink", +static const struct bpf_iter_seq_info netlink_seq_info = { .seq_ops = &netlink_seq_ops, .init_seq_private = bpf_iter_init_seq_net, .fini_seq_private = bpf_iter_fini_seq_net, .seq_priv_size = sizeof(struct nl_seq_iter), +}; + +static struct bpf_iter_reg netlink_reg_info = { + .target = "netlink", .ctx_arg_info_size = 1, .ctx_arg_info = { { offsetof(struct bpf_iter__netlink, sk), PTR_TO_BTF_ID_OR_NULL }, }, + .seq_info = &netlink_seq_info, }; static int __init bpf_iter_register(void) diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst index 8dce698eab79b..070ffacb42b52 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst @@ -17,14 +17,15 @@ SYNOPSIS ITER COMMANDS =================== -| **bpftool** **iter pin** *OBJ* *PATH* +| **bpftool** **iter pin** *OBJ* *PATH* [**map** *MAP*] | **bpftool** **iter help** | | *OBJ* := /a/file/of/bpf_iter_target.o +| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } DESCRIPTION =========== - **bpftool iter pin** *OBJ* *PATH* + **bpftool iter pin** *OBJ* *PATH* [**map** *MAP*] A bpf iterator combines a kernel iterating of particular kernel data (e.g., tasks, bpf_maps, etc.) and a bpf program called for each kernel data object @@ -37,6 +38,12 @@ DESCRIPTION character ('.'), which is reserved for future extensions of *bpffs*. + Map element bpf iterator requires an additional parameter + *MAP* so bpf program can iterate over map elements for + that map. User can have a bpf program in kernel to run + with each map element, do checking, filtering, aggregation, + etc. without copying data to user space. + User can then *cat PATH* to see the bpf iterator output. **bpftool iter help** @@ -64,6 +71,13 @@ EXAMPLES Create a file-based bpf iterator from bpf_iter_netlink.o and pin it to /sys/fs/bpf/my_netlink +**# bpftool iter pin bpf_iter_hashmap.o /sys/fs/bpf/my_hashmap map id 20** + +:: + + Create a file-based bpf iterator from bpf_iter_hashmap.o and map with + id 20, and pin it to /sys/fs/bpf/my_hashmap + SEE ALSO ======== **bpf**\ (2), diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 7b137264ea3a7..257fa310ea2bb 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -615,7 +615,23 @@ _bpftool() iter) case $command in pin) - _filedir + case $prev in + $command) + _filedir + ;; + id) + _bpftool_get_map_ids + ;; + name) + _bpftool_get_map_names + ;; + pinned) + _filedir + ;; + *) + _bpftool_one_of_list $MAP_TYPE + ;; + esac return 0 ;; *) diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 33240fcc63193..c9dba7543dba1 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -2,6 +2,7 @@ // Copyright (C) 2020 Facebook #define _GNU_SOURCE +#include #include #include @@ -9,11 +10,12 @@ static int do_pin(int argc, char **argv) { + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts); const char *objfile, *path; struct bpf_program *prog; struct bpf_object *obj; struct bpf_link *link; - int err; + int err = -1, map_fd = -1; if (!REQ_ARGS(2)) usage(); @@ -21,10 +23,26 @@ static int do_pin(int argc, char **argv) objfile = GET_ARG(); path = GET_ARG(); + /* optional arguments */ + if (argc) { + if (is_prefix(*argv, "map")) { + NEXT_ARG(); + + if (!REQ_ARGS(2)) { + p_err("incorrect map spec"); + return -1; + } + + map_fd = map_parse_fd(&argc, &argv); + if (map_fd < 0) + return -1; + } + } + obj = bpf_object__open(objfile); if (IS_ERR(obj)) { p_err("can't open objfile %s", objfile); - return -1; + goto close_map_fd; } err = bpf_object__load(obj); @@ -39,7 +57,10 @@ static int do_pin(int argc, char **argv) goto close_obj; } - link = bpf_program__attach_iter(prog, NULL); + if (map_fd >= 0) + iter_opts.map_fd = map_fd; + + link = bpf_program__attach_iter(prog, &iter_opts); if (IS_ERR(link)) { err = PTR_ERR(link); p_err("attach_iter failed for program %s", @@ -62,14 +83,18 @@ static int do_pin(int argc, char **argv) bpf_link__destroy(link); close_obj: bpf_object__close(obj); +close_map_fd: + if (map_fd >= 0) + close(map_fd); return err; } static int do_help(int argc, char **argv) { fprintf(stderr, - "Usage: %1$s %2$s pin OBJ PATH\n" + "Usage: %1$s %2$s pin OBJ PATH [map MAP]\n" " %1$s %2$s help\n" + " " HELP_SPEC_MAP "\n" "", bin_name, "iter"); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 54d0c886e3bac..828c2f6438f27 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -246,6 +246,13 @@ enum bpf_link_type { MAX_BPF_LINK_TYPE, }; +enum bpf_iter_link_info { + BPF_ITER_LINK_UNSPEC = 0, + BPF_ITER_LINK_MAP_FD = 1, + + MAX_BPF_ITER_LINK_INFO, +}; + /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index a7329b671c413..e1bdf214f75fe 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -598,6 +598,7 @@ int bpf_link_create(int prog_fd, int target_fd, attr.link_create.prog_fd = prog_fd; attr.link_create.target_fd = target_fd; attr.link_create.attach_type = attach_type; + attr.link_create.flags = OPTS_GET(opts, flags, 0); return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index dbef24ebcfcb9..6d367e01d05e9 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -170,8 +170,9 @@ LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, struct bpf_link_create_opts { size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 flags; }; -#define bpf_link_create_opts__last_field sz +#define bpf_link_create_opts__last_field flags LIBBPF_API int bpf_link_create(int prog_fd, int target_fd, enum bpf_attach_type attach_type, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 846164c79df1c..a05aa7e2bab6c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -8282,13 +8282,20 @@ struct bpf_link * bpf_program__attach_iter(struct bpf_program *prog, const struct bpf_iter_attach_opts *opts) { + DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, link_fd; + __u32 target_fd = 0; if (!OPTS_VALID(opts, bpf_iter_attach_opts)) return ERR_PTR(-EINVAL); + if (OPTS_HAS(opts, map_fd)) { + target_fd = opts->map_fd; + link_create_opts.flags = BPF_ITER_LINK_MAP_FD; + } + prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { pr_warn("program '%s': can't attach before loaded\n", @@ -8301,7 +8308,8 @@ bpf_program__attach_iter(struct bpf_program *prog, return ERR_PTR(-ENOMEM); link->detach = &bpf_link__detach_fd; - link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_ITER, NULL); + link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER, + &link_create_opts); if (link_fd < 0) { link_fd = -errno; free(link); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index c2272132e929d..c6813791fa7e7 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -264,8 +264,9 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map); struct bpf_iter_attach_opts { size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 map_fd; }; -#define bpf_iter_attach_opts__last_field sz +#define bpf_iter_attach_opts__last_field map_fd LIBBPF_API struct bpf_link * bpf_program__attach_iter(struct bpf_program *prog, diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index fed42755416db..d95de80b1851f 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -15,6 +15,12 @@ #include "bpf_iter_test_kern2.skel.h" #include "bpf_iter_test_kern3.skel.h" #include "bpf_iter_test_kern4.skel.h" +#include "bpf_iter_bpf_hash_map.skel.h" +#include "bpf_iter_bpf_percpu_hash_map.skel.h" +#include "bpf_iter_bpf_array_map.skel.h" +#include "bpf_iter_bpf_percpu_array_map.skel.h" +#include "bpf_iter_bpf_sk_storage_map.skel.h" +#include "bpf_iter_test_kern5.skel.h" static int duration; @@ -455,6 +461,430 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) bpf_iter_test_kern4__destroy(skel); } +static void test_bpf_hash_map(void) +{ + __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0; + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_bpf_hash_map *skel; + int err, i, len, map_fd, iter_fd; + __u64 val, expected_val = 0; + struct bpf_link *link; + struct key_t { + int a; + int b; + int c; + } key; + char buf[64]; + + skel = bpf_iter_bpf_hash_map__open(); + if (CHECK(!skel, "bpf_iter_bpf_hash_map__open", + "skeleton open failed\n")) + return; + + skel->bss->in_test_mode = true; + + err = bpf_iter_bpf_hash_map__load(skel); + if (CHECK(!skel, "bpf_iter_bpf_hash_map__load", + "skeleton load failed\n")) + goto out; + + /* iterator with hashmap2 and hashmap3 should fail */ + opts.map_fd = bpf_map__fd(skel->maps.hashmap2); + link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); + if (CHECK(!IS_ERR(link), "attach_iter", + "attach_iter for hashmap2 unexpected succeeded\n")) + goto out; + + opts.map_fd = bpf_map__fd(skel->maps.hashmap3); + link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); + if (CHECK(!IS_ERR(link), "attach_iter", + "attach_iter for hashmap3 unexpected succeeded\n")) + goto out; + + /* hashmap1 should be good, update map values here */ + map_fd = bpf_map__fd(skel->maps.hashmap1); + for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) { + key.a = i + 1; + key.b = i + 2; + key.c = i + 3; + val = i + 4; + expected_key_a += key.a; + expected_key_b += key.b; + expected_key_c += key.c; + expected_val += val; + + err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->key_sum_a != expected_key_a, + "key_sum_a", "got %u expected %u\n", + skel->bss->key_sum_a, expected_key_a)) + goto close_iter; + if (CHECK(skel->bss->key_sum_b != expected_key_b, + "key_sum_b", "got %u expected %u\n", + skel->bss->key_sum_b, expected_key_b)) + goto close_iter; + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %llu expected %llu\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + bpf_iter_bpf_hash_map__destroy(skel); +} + +static void test_bpf_percpu_hash_map(void) +{ + __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0; + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_bpf_percpu_hash_map *skel; + int err, i, j, len, map_fd, iter_fd; + __u32 expected_val = 0; + struct bpf_link *link; + struct key_t { + int a; + int b; + int c; + } key; + char buf[64]; + void *val; + + val = malloc(8 * bpf_num_possible_cpus()); + + skel = bpf_iter_bpf_percpu_hash_map__open(); + if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open", + "skeleton open failed\n")) + return; + + skel->rodata->num_cpus = bpf_num_possible_cpus(); + + err = bpf_iter_bpf_percpu_hash_map__load(skel); + if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load", + "skeleton load failed\n")) + goto out; + + /* update map values here */ + map_fd = bpf_map__fd(skel->maps.hashmap1); + for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) { + key.a = i + 1; + key.b = i + 2; + key.c = i + 3; + expected_key_a += key.a; + expected_key_b += key.b; + expected_key_c += key.c; + + for (j = 0; j < bpf_num_possible_cpus(); j++) { + *(__u32 *)(val + j * 8) = i + j; + expected_val += i + j; + } + + err = bpf_map_update_elem(map_fd, &key, val, BPF_ANY); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->key_sum_a != expected_key_a, + "key_sum_a", "got %u expected %u\n", + skel->bss->key_sum_a, expected_key_a)) + goto close_iter; + if (CHECK(skel->bss->key_sum_b != expected_key_b, + "key_sum_b", "got %u expected %u\n", + skel->bss->key_sum_b, expected_key_b)) + goto close_iter; + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %u expected %u\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + bpf_iter_bpf_percpu_hash_map__destroy(skel); +} + +static void test_bpf_array_map(void) +{ + __u64 val, expected_val = 0, res_first_val, first_val = 0; + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + __u32 expected_key = 0, res_first_key; + struct bpf_iter_bpf_array_map *skel; + int err, i, map_fd, iter_fd; + struct bpf_link *link; + char buf[64] = {}; + int len, start; + + skel = bpf_iter_bpf_array_map__open_and_load(); + if (CHECK(!skel, "bpf_iter_bpf_array_map__open_and_load", + "skeleton open_and_load failed\n")) + return; + + map_fd = bpf_map__fd(skel->maps.arraymap1); + for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { + val = i + 4; + expected_key += i; + expected_val += val; + + if (i == 0) + first_val = val; + + err = bpf_map_update_elem(map_fd, &i, &val, BPF_ANY); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + start = 0; + while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0) + start += len; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + res_first_key = *(__u32 *)buf; + res_first_val = *(__u64 *)(buf + sizeof(__u32)); + if (CHECK(res_first_key != 0 || res_first_val != first_val, + "bpf_seq_write", + "seq_write failure: first key %u vs expected 0, " + " first value %llu vs expected %llu\n", + res_first_key, res_first_val, first_val)) + goto close_iter; + + if (CHECK(skel->bss->key_sum != expected_key, + "key_sum", "got %u expected %u\n", + skel->bss->key_sum, expected_key)) + goto close_iter; + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %llu expected %llu\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + + for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { + err = bpf_map_lookup_elem(map_fd, &i, &val); + if (CHECK(err, "map_lookup", "map_lookup failed\n")) + goto out; + if (CHECK(i != val, "invalid_val", + "got value %llu expected %u\n", val, i)) + goto out; + } + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + bpf_iter_bpf_array_map__destroy(skel); +} + +static void test_bpf_percpu_array_map(void) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_bpf_percpu_array_map *skel; + __u32 expected_key = 0, expected_val = 0; + int err, i, j, map_fd, iter_fd; + struct bpf_link *link; + char buf[64]; + void *val; + int len; + + val = malloc(8 * bpf_num_possible_cpus()); + + skel = bpf_iter_bpf_percpu_array_map__open(); + if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open", + "skeleton open failed\n")) + return; + + skel->rodata->num_cpus = bpf_num_possible_cpus(); + + err = bpf_iter_bpf_percpu_array_map__load(skel); + if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load", + "skeleton load failed\n")) + goto out; + + /* update map values here */ + map_fd = bpf_map__fd(skel->maps.arraymap1); + for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) { + expected_key += i; + + for (j = 0; j < bpf_num_possible_cpus(); j++) { + *(__u32 *)(val + j * 8) = i + j; + expected_val += i + j; + } + + err = bpf_map_update_elem(map_fd, &i, val, BPF_ANY); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->key_sum != expected_key, + "key_sum", "got %u expected %u\n", + skel->bss->key_sum, expected_key)) + goto close_iter; + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %u expected %u\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + bpf_iter_bpf_percpu_array_map__destroy(skel); +} + +static void test_bpf_sk_storage_map(void) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + int err, i, len, map_fd, iter_fd, num_sockets; + struct bpf_iter_bpf_sk_storage_map *skel; + int sock_fd[3] = {-1, -1, -1}; + __u32 val, expected_val = 0; + struct bpf_link *link; + char buf[64]; + + skel = bpf_iter_bpf_sk_storage_map__open_and_load(); + if (CHECK(!skel, "bpf_iter_bpf_sk_storage_map__open_and_load", + "skeleton open_and_load failed\n")) + return; + + map_fd = bpf_map__fd(skel->maps.sk_stg_map); + num_sockets = ARRAY_SIZE(sock_fd); + for (i = 0; i < num_sockets; i++) { + sock_fd[i] = socket(AF_INET6, SOCK_STREAM, 0); + if (CHECK(sock_fd[i] < 0, "socket", "errno: %d\n", errno)) + goto out; + + val = i + 1; + expected_val += val; + + err = bpf_map_update_elem(map_fd, &sock_fd[i], &val, + BPF_NOEXIST); + if (CHECK(err, "map_update", "map_update failed\n")) + goto out; + } + + opts.map_fd = map_fd; + link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->ipv6_sk_count != num_sockets, + "ipv6_sk_count", "got %u expected %u\n", + skel->bss->ipv6_sk_count, num_sockets)) + goto close_iter; + + if (CHECK(skel->bss->val_sum != expected_val, + "val_sum", "got %u expected %u\n", + skel->bss->val_sum, expected_val)) + goto close_iter; + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + for (i = 0; i < num_sockets; i++) { + if (sock_fd[i] >= 0) + close(sock_fd[i]); + } + bpf_iter_bpf_sk_storage_map__destroy(skel); +} + +static void test_rdonly_buf_out_of_bound(void) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + struct bpf_iter_test_kern5 *skel; + struct bpf_link *link; + + skel = bpf_iter_test_kern5__open_and_load(); + if (CHECK(!skel, "bpf_iter_test_kern5__open_and_load", + "skeleton open_and_load failed\n")) + return; + + opts.map_fd = bpf_map__fd(skel->maps.hashmap1); + link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts); + if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n")) + bpf_link__destroy(link); + + bpf_iter_test_kern5__destroy(skel); +} + void test_bpf_iter(void) { if (test__start_subtest("btf_id_or_null")) @@ -491,4 +921,16 @@ void test_bpf_iter(void) test_overflow(true, false); if (test__start_subtest("prog-ret-1")) test_overflow(false, true); + if (test__start_subtest("bpf_hash_map")) + test_bpf_hash_map(); + if (test__start_subtest("bpf_percpu_hash_map")) + test_bpf_percpu_hash_map(); + if (test__start_subtest("bpf_array_map")) + test_bpf_array_map(); + if (test__start_subtest("bpf_percpu_array_map")) + test_bpf_percpu_array_map(); + if (test__start_subtest("bpf_sk_storage_map")) + test_bpf_sk_storage_map(); + if (test__start_subtest("rdonly-buf-out-of-bound")) + test_rdonly_buf_out_of_bound(); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c new file mode 100644 index 0000000000000..6286023fd62ba --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 3); + __type(key, __u32); + __type(value, __u64); +} arraymap1 SEC(".maps"); + +__u32 key_sum = 0; +__u64 val_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx) +{ + __u32 *key = ctx->key; + __u64 *val = ctx->value; + + if (key == (void *)0 || val == (void *)0) + return 0; + + bpf_seq_write(ctx->meta->seq, key, sizeof(__u32)); + bpf_seq_write(ctx->meta->seq, val, sizeof(__u64)); + key_sum += *key; + val_sum += *val; + *val = *key; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c new file mode 100644 index 0000000000000..07ddbfdbcab70 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, struct key_t); + __type(value, __u64); +} hashmap1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, __u64); + __type(value, __u64); +} hashmap2 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, struct key_t); + __type(value, __u32); +} hashmap3 SEC(".maps"); + +/* will set before prog run */ +bool in_test_mode = 0; + +/* will collect results during prog run */ +__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0; +__u64 val_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + __u32 seq_num = ctx->meta->seq_num; + struct bpf_map *map = ctx->map; + struct key_t *key = ctx->key; + __u64 *val = ctx->value; + + if (in_test_mode) { + /* test mode is used by selftests to + * test functionality of bpf_hash_map iter. + * + * the above hashmap1 will have correct size + * and will be accepted, hashmap2 and hashmap3 + * should be rejected due to smaller key/value + * size. + */ + if (key == (void *)0 || val == (void *)0) + return 0; + + key_sum_a += key->a; + key_sum_b += key->b; + key_sum_c += key->c; + val_sum += *val; + return 0; + } + + /* non-test mode, the map is prepared with the + * below bpftool command sequence: + * bpftool map create /sys/fs/bpf/m1 type hash \ + * key 12 value 8 entries 3 name map1 + * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 1 \ + * value 0 0 0 1 0 0 0 1 + * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 2 \ + * value 0 0 0 1 0 0 0 2 + * The bpftool iter command line: + * bpftool iter pin ./bpf_iter_bpf_hash_map.o /sys/fs/bpf/p1 \ + * map id 77 + * The below output will be: + * map dump starts + * 77: (1000000 0 2000000) (200000001000000) + * 77: (1000000 0 1000000) (100000001000000) + * map dump ends + */ + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, "map dump starts\n"); + + if (key == (void *)0 || val == (void *)0) { + BPF_SEQ_PRINTF(seq, "map dump ends\n"); + return 0; + } + + BPF_SEQ_PRINTF(seq, "%d: (%x %d %x) (%llx)\n", map->id, + key->a, key->b, key->c, *val); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c new file mode 100644 index 0000000000000..85fa710fad908 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 3); + __type(key, __u32); + __type(value, __u32); +} arraymap1 SEC(".maps"); + +/* will set before prog run */ +volatile const __u32 num_cpus = 0; + +__u32 key_sum = 0, val_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_percpu_array_map(struct bpf_iter__bpf_map_elem *ctx) +{ + __u32 *key = ctx->key; + void *pptr = ctx->value; + __u32 step; + int i; + + if (key == (void *)0 || pptr == (void *)0) + return 0; + + key_sum += *key; + + step = 8; + for (i = 0; i < num_cpus; i++) { + val_sum += *(__u32 *)pptr; + pptr += step; + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c new file mode 100644 index 0000000000000..feaaa2b89c574 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 3); + __type(key, struct key_t); + __type(value, __u32); +} hashmap1 SEC(".maps"); + +/* will set before prog run */ +volatile const __u32 num_cpus = 0; + +/* will collect results during prog run */ +__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0; +__u32 val_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_percpu_hash_map(struct bpf_iter__bpf_map_elem *ctx) +{ + struct key_t *key = ctx->key; + void *pptr = ctx->value; + __u32 step; + int i; + + if (key == (void *)0 || pptr == (void *)0) + return 0; + + key_sum_a += key->a; + key_sum_b += key->b; + key_sum_c += key->c; + + step = 8; + for (i = 0; i < num_cpus; i++) { + val_sum += *(__u32 *)pptr; + pptr += step; + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c new file mode 100644 index 0000000000000..6b70ccaba301f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} sk_stg_map SEC(".maps"); + +__u32 val_sum = 0; +__u32 ipv6_sk_count = 0; + +SEC("iter/bpf_sk_storage_map") +int dump_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx) +{ + struct sock *sk = ctx->sk; + __u32 *val = ctx->value; + + if (sk == (void *)0 || val == (void *)0) + return 0; + + if (sk->sk_family == AF_INET6) + ipv6_sk_count++; + + val_sum += *val; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c new file mode 100644 index 0000000000000..e3a7575e81d2c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include +#include + +char _license[] SEC("license") = "GPL"; + +struct key_t { + int a; + int b; + int c; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 3); + __type(key, struct key_t); + __type(value, __u64); +} hashmap1 SEC(".maps"); + +__u32 key_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) +{ + void *key = ctx->key; + + if (key == (void *)0) + return 0; + + /* out of bound access w.r.t. hashmap1 */ + key_sum += *(__u32 *)(key + sizeof(struct key_t)); + return 0; +}