From ad46061fca87c0ab6670af3a44e03237f99d7a1f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Jan 2018 19:13:25 -0800 Subject: [PATCH 1/8] bpf: arraymap: move checks out of alloc function Use the new callback to perform allocation checks for array maps. The fd maps don't need a special allocation callback, they only need a special check callback. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- kernel/bpf/arraymap.c | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index ab94d304a634a..68336092bfb44 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -49,27 +49,35 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) } /* Called from syscall */ -static struct bpf_map *array_map_alloc(union bpf_attr *attr) +static int array_map_alloc_check(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; int numa_node = bpf_map_attr_numa_node(attr); - u32 elem_size, index_mask, max_entries; - bool unpriv = !capable(CAP_SYS_ADMIN); - struct bpf_array *array; - u64 array_size, mask64; /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size == 0 || attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || (percpu && numa_node != NUMA_NO_NODE)) - return ERR_PTR(-EINVAL); + return -EINVAL; if (attr->value_size > KMALLOC_MAX_SIZE) /* if value_size is bigger, the user space won't be able to * access the elements. */ - return ERR_PTR(-E2BIG); + return -E2BIG; + + return 0; +} + +static struct bpf_map *array_map_alloc(union bpf_attr *attr) +{ + bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; + int numa_node = bpf_map_attr_numa_node(attr); + u32 elem_size, index_mask, max_entries; + bool unpriv = !capable(CAP_SYS_ADMIN); + struct bpf_array *array; + u64 array_size, mask64; elem_size = round_up(attr->value_size, 8); @@ -327,6 +335,7 @@ static void array_map_free(struct bpf_map *map) } const struct bpf_map_ops array_map_ops = { + .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, @@ -337,6 +346,7 @@ const struct bpf_map_ops array_map_ops = { }; const struct bpf_map_ops percpu_array_map_ops = { + .map_alloc_check = array_map_alloc_check, .map_alloc = array_map_alloc, .map_free = array_map_free, .map_get_next_key = array_map_get_next_key, @@ -345,12 +355,12 @@ const struct bpf_map_ops percpu_array_map_ops = { .map_delete_elem = array_map_delete_elem, }; -static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr) +static int fd_array_map_alloc_check(union bpf_attr *attr) { /* only file descriptors can be stored in this type of map */ if (attr->value_size != sizeof(u32)) - return ERR_PTR(-EINVAL); - return array_map_alloc(attr); + return -EINVAL; + return array_map_alloc_check(attr); } static void fd_array_map_free(struct bpf_map *map) @@ -474,7 +484,8 @@ void bpf_fd_array_map_clear(struct bpf_map *map) } const struct bpf_map_ops prog_array_map_ops = { - .map_alloc = fd_array_map_alloc, + .map_alloc_check = fd_array_map_alloc_check, + .map_alloc = array_map_alloc, .map_free = fd_array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, @@ -561,7 +572,8 @@ static void perf_event_fd_array_release(struct bpf_map *map, } const struct bpf_map_ops perf_event_array_map_ops = { - .map_alloc = fd_array_map_alloc, + .map_alloc_check = fd_array_map_alloc_check, + .map_alloc = array_map_alloc, .map_free = fd_array_map_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, @@ -592,7 +604,8 @@ static void cgroup_fd_array_free(struct bpf_map *map) } const struct bpf_map_ops cgroup_array_map_ops = { - .map_alloc = fd_array_map_alloc, + .map_alloc_check = fd_array_map_alloc_check, + .map_alloc = array_map_alloc, .map_free = cgroup_fd_array_free, .map_get_next_key = array_map_get_next_key, .map_lookup_elem = fd_array_map_lookup_elem, @@ -610,7 +623,7 @@ static struct bpf_map *array_of_map_alloc(union bpf_attr *attr) if (IS_ERR(inner_map_meta)) return inner_map_meta; - map = fd_array_map_alloc(attr); + map = array_map_alloc(attr); if (IS_ERR(map)) { bpf_map_meta_free(inner_map_meta); return map; @@ -673,6 +686,7 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map, } const struct bpf_map_ops array_of_maps_map_ops = { + .map_alloc_check = fd_array_map_alloc_check, .map_alloc = array_of_map_alloc, .map_free = array_of_map_free, .map_get_next_key = array_map_get_next_key, From 32852649ba3f74aab10025f2e59ca2b49d5cccfa Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Jan 2018 19:13:26 -0800 Subject: [PATCH 2/8] bpf: arraymap: use bpf_map_init_from_attr() Arraymap was not converted to use bpf_map_init_from_attr() to avoid merge conflicts with emergency fixes. Do it now. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- kernel/bpf/arraymap.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 68336092bfb44..b1f66480135b3 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -120,12 +120,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array->map.unpriv_array = unpriv; /* copy mandatory map attributes */ - array->map.map_type = attr->map_type; - array->map.key_size = attr->key_size; - array->map.value_size = attr->value_size; - array->map.max_entries = attr->max_entries; - array->map.map_flags = attr->map_flags; - array->map.numa_node = numa_node; + bpf_map_init_from_attr(&array->map, attr); array->elem_size = elem_size; if (!percpu) From 7a0ef6939548b9eb74bf464daf55ad68a23602a2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Jan 2018 19:13:27 -0800 Subject: [PATCH 3/8] bpf: offload: allow array map offload The special handling of different map types is left to the driver. Allow offload of array maps by simply adding it to accepted types. For nfp we have to make sure array elements are not deleted. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 2 ++ kernel/bpf/offload.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index c452bf9462e0e..1a357aacc444b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -176,6 +176,8 @@ nfp_bpf_map_get_next_key(struct bpf_offloaded_map *offmap, static int nfp_bpf_map_delete_elem(struct bpf_offloaded_map *offmap, void *key) { + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) + return -EINVAL; return nfp_bpf_ctrl_del_entry(offmap, key); } diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 6c0baa1cf8f8b..2657976aec2a8 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -299,7 +299,8 @@ struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr) if (!capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); - if (attr->map_type != BPF_MAP_TYPE_HASH) + if (attr->map_type != BPF_MAP_TYPE_ARRAY && + attr->map_type != BPF_MAP_TYPE_HASH) return ERR_PTR(-EINVAL); offmap = kzalloc(sizeof(*offmap), GFP_USER); From 52775b33bb5072fbc07b02c0cf4fe8da1f7ee7cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Jan 2018 19:13:28 -0800 Subject: [PATCH 4/8] bpf: offload: report device information about offloaded maps Tell user space about device on which the map was created. Unfortunate reality of user ABI makes sharing this code with program offload difficult but the information is the same. Signed-off-by: Jakub Kicinski Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 ++ include/uapi/linux/bpf.h | 3 ++ kernel/bpf/offload.c | 55 ++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 6 ++++ tools/include/uapi/linux/bpf.h | 3 ++ 5 files changed, 69 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 025b1c2f80532..66df387106de4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -586,6 +586,8 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog); int bpf_prog_offload_info_fill(struct bpf_prog_info *info, struct bpf_prog *prog); +int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map); + int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value); int bpf_map_offload_update_elem(struct bpf_map *map, void *key, void *value, u64 flags); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 74dc4dc98681a..406c19d6016b2 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -938,6 +938,9 @@ struct bpf_map_info { __u32 max_entries; __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 2657976aec2a8..c9401075b58c8 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -413,6 +413,61 @@ int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key) return ret; } +struct ns_get_path_bpf_map_args { + struct bpf_offloaded_map *offmap; + struct bpf_map_info *info; +}; + +static struct ns_common *bpf_map_offload_info_fill_ns(void *private_data) +{ + struct ns_get_path_bpf_map_args *args = private_data; + struct ns_common *ns; + struct net *net; + + rtnl_lock(); + down_read(&bpf_devs_lock); + + if (args->offmap->netdev) { + args->info->ifindex = args->offmap->netdev->ifindex; + net = dev_net(args->offmap->netdev); + get_net(net); + ns = &net->ns; + } else { + args->info->ifindex = 0; + ns = NULL; + } + + up_read(&bpf_devs_lock); + rtnl_unlock(); + + return ns; +} + +int bpf_map_offload_info_fill(struct bpf_map_info *info, struct bpf_map *map) +{ + struct ns_get_path_bpf_map_args args = { + .offmap = map_to_offmap(map), + .info = info, + }; + struct inode *ns_inode; + struct path ns_path; + void *res; + + res = ns_get_path_cb(&ns_path, bpf_map_offload_info_fill_ns, &args); + if (IS_ERR(res)) { + if (!info->ifindex) + return -ENODEV; + return PTR_ERR(res); + } + + ns_inode = ns_path.dentry->d_inode; + info->netns_dev = new_encode_dev(ns_inode->i_sb->s_dev); + info->netns_ino = ns_inode->i_ino; + path_put(&ns_path); + + return 0; +} + bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map) { struct bpf_offloaded_map *offmap; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 97a825ffc7639..5bdb0cc84ad24 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1801,6 +1801,12 @@ static int bpf_map_get_info_by_fd(struct bpf_map *map, info.map_flags = map->map_flags; memcpy(info.name, map->name, sizeof(map->name)); + if (bpf_map_is_dev_bound(map)) { + err = bpf_map_offload_info_fill(&info, map); + if (err) + return err; + } + if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) return -EFAULT; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7c2259e8bc545..af1f49ad8b88d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -938,6 +938,9 @@ struct bpf_map_info { __u32 max_entries; __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; + __u32 ifindex; + __u64 netns_dev; + __u64 netns_ino; } __attribute__((aligned(8))); /* User bpf_sock_ops struct to access socket values and specify request ops From 064a07cba2919bcfbadf9edf5c26c740e69fa585 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Jan 2018 19:13:29 -0800 Subject: [PATCH 5/8] tools: bpftool: report device information for offloaded maps Print the information about device on which map is created. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/map.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 8d7db9d6b9cdf..a152c1a5c94c7 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -428,6 +428,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_name(json_wtr, "flags"); jsonw_printf(json_wtr, "%#x", info->map_flags); + + print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); + jsonw_uint_field(json_wtr, "bytes_key", info->key_size); jsonw_uint_field(json_wtr, "bytes_value", info->value_size); jsonw_uint_field(json_wtr, "max_entries", info->max_entries); @@ -469,7 +472,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (*info->name) printf("name %s ", info->name); - printf("flags 0x%x\n", info->map_flags); + printf("flags 0x%x", info->map_flags); + print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); + printf("\n"); printf("\tkey %uB value %uB max_entries %u", info->key_size, info->value_size, info->max_entries); From 395cacb5f1a0a290f1ae9ca4692c400d2b57a705 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Jan 2018 19:13:30 -0800 Subject: [PATCH 6/8] netdevsim: bpf: support fake map offload Add to netdevsim ability to pretend it's offloading BPF maps. We only allow allocation of tiny 2 entry maps, to keep things simple. Mutex lock may seem heavy for the operations we perform, but we want to make sure callbacks can sleep. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/netdevsim/bpf.c | 246 ++++++++++++++++++++++++++++++ drivers/net/netdevsim/netdevsim.h | 3 + 2 files changed, 249 insertions(+) diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 5134d5c1306c7..b3851bbefad3b 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -31,6 +32,19 @@ struct nsim_bpf_bound_prog { struct list_head l; }; +#define NSIM_BPF_MAX_KEYS 2 + +struct nsim_bpf_bound_map { + struct netdevsim *ns; + struct bpf_offloaded_map *map; + struct mutex mutex; + struct nsim_map_entry { + void *key; + void *value; + } entry[NSIM_BPF_MAX_KEYS]; + struct list_head l; +}; + static int nsim_debugfs_bpf_string_read(struct seq_file *file, void *data) { const char **str = file->private; @@ -284,6 +298,224 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf) return 0; } +static bool +nsim_map_key_match(struct bpf_map *map, struct nsim_map_entry *e, void *key) +{ + return e->key && !memcmp(key, e->key, map->key_size); +} + +static int nsim_map_key_find(struct bpf_offloaded_map *offmap, void *key) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) + if (nsim_map_key_match(&offmap->map, &nmap->entry[i], key)) + return i; + + return -ENOENT; +} + +static int +nsim_map_alloc_elem(struct bpf_offloaded_map *offmap, unsigned int idx) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + + nmap->entry[idx].key = kmalloc(offmap->map.key_size, GFP_USER); + if (!nmap->entry[idx].key) + return -ENOMEM; + nmap->entry[idx].value = kmalloc(offmap->map.value_size, GFP_USER); + if (!nmap->entry[idx].value) { + kfree(nmap->entry[idx].key); + nmap->entry[idx].key = NULL; + return -ENOMEM; + } + + return 0; +} + +static int +nsim_map_get_next_key(struct bpf_offloaded_map *offmap, + void *key, void *next_key) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + int idx = -ENOENT; + + mutex_lock(&nmap->mutex); + + if (key) + idx = nsim_map_key_find(offmap, key); + if (idx == -ENOENT) + idx = 0; + else + idx++; + + for (; idx < ARRAY_SIZE(nmap->entry); idx++) { + if (nmap->entry[idx].key) { + memcpy(next_key, nmap->entry[idx].key, + offmap->map.key_size); + break; + } + } + + mutex_unlock(&nmap->mutex); + + if (idx == ARRAY_SIZE(nmap->entry)) + return -ENOENT; + return 0; +} + +static int +nsim_map_lookup_elem(struct bpf_offloaded_map *offmap, void *key, void *value) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + int idx; + + mutex_lock(&nmap->mutex); + + idx = nsim_map_key_find(offmap, key); + if (idx >= 0) + memcpy(value, nmap->entry[idx].value, offmap->map.value_size); + + mutex_unlock(&nmap->mutex); + + return idx < 0 ? idx : 0; +} + +static int +nsim_map_update_elem(struct bpf_offloaded_map *offmap, + void *key, void *value, u64 flags) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + int idx, err = 0; + + mutex_lock(&nmap->mutex); + + idx = nsim_map_key_find(offmap, key); + if (idx < 0 && flags == BPF_EXIST) { + err = idx; + goto exit_unlock; + } + if (idx >= 0 && flags == BPF_NOEXIST) { + err = -EEXIST; + goto exit_unlock; + } + + if (idx < 0) { + for (idx = 0; idx < ARRAY_SIZE(nmap->entry); idx++) + if (!nmap->entry[idx].key) + break; + if (idx == ARRAY_SIZE(nmap->entry)) { + err = -E2BIG; + goto exit_unlock; + } + + err = nsim_map_alloc_elem(offmap, idx); + if (err) + goto exit_unlock; + } + + memcpy(nmap->entry[idx].key, key, offmap->map.key_size); + memcpy(nmap->entry[idx].value, value, offmap->map.value_size); +exit_unlock: + mutex_unlock(&nmap->mutex); + + return err; +} + +static int nsim_map_delete_elem(struct bpf_offloaded_map *offmap, void *key) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + int idx; + + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) + return -EINVAL; + + mutex_lock(&nmap->mutex); + + idx = nsim_map_key_find(offmap, key); + if (idx >= 0) { + kfree(nmap->entry[idx].key); + kfree(nmap->entry[idx].value); + memset(&nmap->entry[idx], 0, sizeof(nmap->entry[idx])); + } + + mutex_unlock(&nmap->mutex); + + return idx < 0 ? idx : 0; +} + +static const struct bpf_map_dev_ops nsim_bpf_map_ops = { + .map_get_next_key = nsim_map_get_next_key, + .map_lookup_elem = nsim_map_lookup_elem, + .map_update_elem = nsim_map_update_elem, + .map_delete_elem = nsim_map_delete_elem, +}; + +static int +nsim_bpf_map_alloc(struct netdevsim *ns, struct bpf_offloaded_map *offmap) +{ + struct nsim_bpf_bound_map *nmap; + unsigned int i; + int err; + + if (WARN_ON(offmap->map.map_type != BPF_MAP_TYPE_ARRAY && + offmap->map.map_type != BPF_MAP_TYPE_HASH)) + return -EINVAL; + if (offmap->map.max_entries > NSIM_BPF_MAX_KEYS) + return -ENOMEM; + if (offmap->map.map_flags) + return -EINVAL; + + nmap = kzalloc(sizeof(*nmap), GFP_USER); + if (!nmap) + return -ENOMEM; + + offmap->dev_priv = nmap; + nmap->ns = ns; + nmap->map = offmap; + mutex_init(&nmap->mutex); + + if (offmap->map.map_type == BPF_MAP_TYPE_ARRAY) { + for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) { + u32 *key; + + err = nsim_map_alloc_elem(offmap, i); + if (err) + goto err_free; + key = nmap->entry[i].key; + *key = i; + } + } + + offmap->dev_ops = &nsim_bpf_map_ops; + list_add_tail(&nmap->l, &ns->bpf_bound_maps); + + return 0; + +err_free: + while (--i) { + kfree(nmap->entry[i].key); + kfree(nmap->entry[i].value); + } + kfree(nmap); + return err; +} + +static void nsim_bpf_map_free(struct bpf_offloaded_map *offmap) +{ + struct nsim_bpf_bound_map *nmap = offmap->dev_priv; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(nmap->entry); i++) { + kfree(nmap->entry[i].key); + kfree(nmap->entry[i].value); + } + list_del_init(&nmap->l); + mutex_destroy(&nmap->mutex); + kfree(nmap); +} + int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) { struct netdevsim *ns = netdev_priv(dev); @@ -328,6 +560,14 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) return err; return nsim_xdp_set_prog(ns, bpf); + case BPF_OFFLOAD_MAP_ALLOC: + if (!ns->bpf_map_accept) + return -EOPNOTSUPP; + + return nsim_bpf_map_alloc(ns, bpf->offmap); + case BPF_OFFLOAD_MAP_FREE: + nsim_bpf_map_free(bpf->offmap); + return 0; default: return -EINVAL; } @@ -336,6 +576,7 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) int nsim_bpf_init(struct netdevsim *ns) { INIT_LIST_HEAD(&ns->bpf_bound_progs); + INIT_LIST_HEAD(&ns->bpf_bound_maps); debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir, &ns->bpf_offloaded_id); @@ -362,12 +603,17 @@ int nsim_bpf_init(struct netdevsim *ns) debugfs_create_bool("bpf_xdpoffload_accept", 0600, ns->ddir, &ns->bpf_xdpoffload_accept); + ns->bpf_map_accept = true; + debugfs_create_bool("bpf_map_accept", 0600, ns->ddir, + &ns->bpf_map_accept); + return 0; } void nsim_bpf_uninit(struct netdevsim *ns) { WARN_ON(!list_empty(&ns->bpf_bound_progs)); + WARN_ON(!list_empty(&ns->bpf_bound_maps)); WARN_ON(ns->xdp_prog); WARN_ON(ns->bpf_offloaded); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 32270de9395a2..b803612003023 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -61,6 +61,9 @@ struct netdevsim { bool bpf_tc_non_bound_accept; bool bpf_xdpdrv_accept; bool bpf_xdpoffload_accept; + + bool bpf_map_accept; + struct list_head bpf_bound_maps; }; extern struct dentry *nsim_ddir; From 7fedbb7c5a7c4bda418bc1056c06c81db36e4299 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Jan 2018 19:13:31 -0800 Subject: [PATCH 7/8] selftest/bpf: extend the offload test with map checks Check map device information is reported correctly, and perform basic map operations. Check device destruction gets rid of the maps and map allocation failure path by telling netdevsim to reject map offload via DebugFS. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/sample_map_ret0.c | 34 +++ tools/testing/selftests/bpf/test_offload.py | 206 ++++++++++++++++-- 3 files changed, 218 insertions(+), 25 deletions(-) create mode 100644 tools/testing/selftests/bpf/sample_map_ret0.c diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index a8aa7e251c8ed..3a44b655d852d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -19,7 +19,8 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \ - test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o + test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ + sample_map_ret0.o TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ test_offload.py diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/sample_map_ret0.c new file mode 100644 index 0000000000000..0756303676aca --- /dev/null +++ b/tools/testing/selftests/bpf/sample_map_ret0.c @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +#include +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") htab = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +struct bpf_map_def SEC("maps") array = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(long), + .max_entries = 2, +}; + +/* Sample program which should always load for testing control paths. */ +SEC(".text") int func() +{ + __u64 key64 = 0; + __u32 key = 0; + long *value; + + value = bpf_map_lookup_elem(&htab, &key); + if (!value) + return 1; + value = bpf_map_lookup_elem(&array, &key64); + if (!value) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index e3c750f17cb83..833b9c1ec4507 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -20,6 +20,7 @@ import pprint import random import string +import struct import subprocess import time @@ -156,6 +157,14 @@ def bpftool_prog_list(expected=None, ns=""): (len(progs), expected)) return progs +def bpftool_map_list(expected=None, ns=""): + _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) + if expected is not None: + if len(maps) != expected: + fail(True, "%d BPF maps loaded, expected %d" % + (len(maps), expected)) + return maps + def bpftool_prog_list_wait(expected=0, n_retry=20): for i in range(n_retry): nprogs = len(bpftool_prog_list()) @@ -164,6 +173,14 @@ def bpftool_prog_list_wait(expected=0, n_retry=20): time.sleep(0.05) raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) +def bpftool_map_list_wait(expected=0, n_retry=20): + for i in range(n_retry): + nmaps = len(bpftool_map_list()) + if nmaps == expected: + return + time.sleep(0.05) + raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) + def ip(args, force=False, JSON=True, ns="", fail=True): if force: args = "-force " + args @@ -193,6 +210,26 @@ def mknetns(n_retry=10): return name return None +def int2str(fmt, val): + ret = [] + for b in struct.pack(fmt, val): + ret.append(int(b)) + return " ".join(map(lambda x: str(x), ret)) + +def str2int(strtab): + inttab = [] + for i in strtab: + inttab.append(int(i, 16)) + ba = bytearray(inttab) + if len(strtab) == 4: + fmt = "I" + elif len(strtab) == 8: + fmt = "Q" + else: + raise Exception("String array of len %d can't be unpacked to an int" % + (len(strtab))) + return struct.unpack(fmt, ba)[0] + class DebugfsDir: """ Class for accessing DebugFS directories as a dictionary. @@ -311,13 +348,13 @@ def set_mtu(self, mtu, fail=True): return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), fail=fail) - def set_xdp(self, bpf, mode, force=False, fail=True): + def set_xdp(self, bpf, mode, force=False, JSON=True, fail=True): return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), - force=force, fail=fail) + force=force, JSON=JSON, fail=fail) - def unset_xdp(self, mode, force=False, fail=True): + def unset_xdp(self, mode, force=False, JSON=True, fail=True): return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode), - force=force, fail=fail) + force=force, JSON=JSON, fail=fail) def ip_link_show(self, xdp): _, link = ip("link show dev %s" % (self['ifname'])) @@ -390,12 +427,16 @@ def set_ethtool_tc_offloads(self, enable, fail=True): ################################################################################ def clean_up(): + global files, netns, devs + for dev in devs: dev.remove() for f in files: cmd("rm -f %s" % (f)) for ns in netns: cmd("ip netns delete %s" % (ns)) + files = [] + netns = [] def pin_prog(file_name, idx=0): progs = bpftool_prog_list(expected=(idx + 1)) @@ -405,16 +446,31 @@ def pin_prog(file_name, idx=0): return file_name, bpf_pinned(file_name) -def check_dev_info(other_ns, ns, pin_file=None, removed=False): - if removed: - bpftool_prog_list(expected=0) - ret, err = bpftool("prog show pin %s" % (pin_file), fail=False) - fail(ret == 0, "Showing prog with removed device did not fail") - fail(err["error"].find("No such device") == -1, - "Showing prog with removed device expected ENODEV, error is %s" % - (err["error"])) - return - progs = bpftool_prog_list(expected=int(not removed), ns=ns) +def pin_map(file_name, idx=0, expected=1): + maps = bpftool_map_list(expected=expected) + m = maps[idx] + bpftool("map pin id %d %s" % (m["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +def check_dev_info_removed(prog_file=None, map_file=None): + bpftool_prog_list(expected=0) + ret, err = bpftool("prog show pin %s" % (prog_file), fail=False) + fail(ret == 0, "Showing prog with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing prog with removed device expected ENODEV, error is %s" % + (err["error"])) + + bpftool_map_list(expected=0) + ret, err = bpftool("map show pin %s" % (map_file), fail=False) + fail(ret == 0, "Showing map with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing map with removed device expected ENODEV, error is %s" % + (err["error"])) + +def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False): + progs = bpftool_prog_list(expected=1, ns=ns) prog = progs[0] fail("dev" not in prog.keys(), "Device parameters not reported") @@ -423,16 +479,17 @@ def check_dev_info(other_ns, ns, pin_file=None, removed=False): fail("ns_dev" not in dev.keys(), "Device parameters not reported") fail("ns_inode" not in dev.keys(), "Device parameters not reported") - if not removed and not other_ns: + if not other_ns: fail("ifname" not in dev.keys(), "Ifname not reported") fail(dev["ifname"] != sim["ifname"], "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"])) else: fail("ifname" in dev.keys(), "Ifname is reported for other ns") - if removed: - fail(dev["ifindex"] != 0, "Device perameters not zero on removed") - fail(dev["ns_dev"] != 0, "Device perameters not zero on removed") - fail(dev["ns_inode"] != 0, "Device perameters not zero on removed") + + maps = bpftool_map_list(expected=2, ns=ns) + for m in maps: + fail("dev" not in m.keys(), "Device parameters not reported") + fail(dev != m["dev"], "Map's device different than program's") # Parse command line parser = argparse.ArgumentParser() @@ -464,7 +521,7 @@ def check_dev_info(other_ns, ns, pin_file=None, removed=False): cmd("mount -t debugfs none /sys/kernel/debug") # Check samples are compiled -samples = ["sample_ret0.o"] +samples = ["sample_ret0.o", "sample_map_ret0.o"] for s in samples: ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False) skip(ret != 0, "sample %s/%s not found, please compile it" % @@ -739,8 +796,9 @@ def check_dev_info(other_ns, ns, pin_file=None, removed=False): bpftool_prog_list_wait(expected=0) sim = NetdevSim() - sim.set_ethtool_tc_offloads(True) - sim.set_xdp(obj, "offload") + map_obj = bpf_obj("sample_map_ret0.o") + start_test("Test loading program with maps...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON start_test("Test bpftool bound info reporting (own ns)...") check_dev_info(False, "") @@ -757,11 +815,111 @@ def check_dev_info(other_ns, ns, pin_file=None, removed=False): sim.set_ns("") check_dev_info(False, "") - pin_file, _ = pin_prog("/sys/fs/bpf/tmp") + prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog") + map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2) sim.remove() start_test("Test bpftool bound info reporting (removed dev)...") - check_dev_info(True, "", pin_file=pin_file, removed=True) + check_dev_info_removed(prog_file=prog_file, map_file=map_file) + + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + + start_test("Test map update (no flags)...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + maps = bpftool_map_list(expected=2) + array = maps[0] if maps[0]["type"] == "array" else maps[1] + htab = maps[0] if maps[0]["type"] == "hash" else maps[1] + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, _ = bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "added too many entries") + + start_test("Test map update (exists)...") + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, err = bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "updated non-existing key") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map update (noexist)...") + for m in maps: + for i in range(2): + ret, err = bpftool("map update id %d key %s value %s noexist" % + (m["id"], int2str("I", i), int2str("Q", i * 3)), + fail=False) + fail(ret == 0, "updated existing key") + fail(err["error"].find("File exists") == -1, + "expected EEXIST, error is '%s'" % (err["error"])) + + start_test("Test map dump...") + for m in maps: + _, entries = bpftool("map dump id %d" % (m["id"])) + for i in range(2): + key = str2int(entries[i]["key"]) + fail(key != i, "expected key %d, got %d" % (key, i)) + val = str2int(entries[i]["value"]) + fail(val != i * 3, "expected value %d, got %d" % (val, i * 3)) + + start_test("Test map getnext...") + for m in maps: + _, entry = bpftool("map getnext id %d" % (m["id"])) + key = str2int(entry["next_key"]) + fail(key != 0, "next key %d, expected %d" % (key, 0)) + _, entry = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 0))) + key = str2int(entry["next_key"]) + fail(key != 1, "next key %d, expected %d" % (key, 1)) + ret, err = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 1)), fail=False) + fail(ret == 0, "got next key past the end of map") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map delete (htab)...") + for i in range(2): + bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i))) + + start_test("Test map delete (array)...") + for i in range(2): + ret, err = bpftool("map delete id %d key %s" % + (htab["id"], int2str("I", i)), fail=False) + fail(ret == 0, "removed entry from an array") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map remove...") + sim.unset_xdp("offload") + bpftool_map_list_wait(expected=0) + sim.remove() + + sim = NetdevSim() + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + sim.remove() + bpftool_map_list_wait(expected=0) + + start_test("Test map creation fail path...") + sim = NetdevSim() + sim.dfs["bpf_map_accept"] = "N" + ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False) + fail(ret == 0, + "netdevsim didn't refuse to create a map with offload disabled") print("%s: OK" % (os.path.basename(__file__))) From ca027a1c45e30d89c5cc6dcacbdcea74e1ff65fc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 17 Jan 2018 19:13:32 -0800 Subject: [PATCH 8/8] nfp: bpf: add short busy wait for FW replies Scheduling out and in for every FW message can slow us down unnecessarily. Our experiments show that even under heavy load the FW responds to 99.9% messages within 200 us. Add a short busy wait before entering the wait queue. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/cmsg.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c index 71e6586acc367..80d3aa0fc9d3e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/cmsg.c @@ -157,7 +157,14 @@ nfp_bpf_cmsg_wait_reply(struct nfp_app_bpf *bpf, enum nfp_bpf_cmsg_type type, int tag) { struct sk_buff *skb; - int err; + int i, err; + + for (i = 0; i < 50; i++) { + udelay(4); + skb = nfp_bpf_reply(bpf, tag); + if (skb) + return skb; + } err = wait_event_interruptible_timeout(bpf->cmsg_wq, skb = nfp_bpf_reply(bpf, tag),