From 6dbff13ca8a2ad2fddd904c2e789dd5e59a8644c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 26 Jul 2019 18:06:52 +0200 Subject: [PATCH 1/6] include/bpf.h: Remove map_insert_ctx() stubs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we changed the device and CPU maps to use linked lists instead of bitmaps, we also removed the need for the map_insert_ctx() helpers to keep track of the bitmaps inside each map. However, it seems I forgot to remove the function definitions stubs, so remove those here. Signed-off-by: Toke Høiland-Jørgensen Acked-by: Yonghong Song Acked-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 18f4cc2c6acd4..bfdb54dd2ad14 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -713,7 +713,6 @@ struct xdp_buff; struct sk_buff; struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); -void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, struct net_device *dev_rx); @@ -721,7 +720,6 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); -void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); void __cpu_map_flush(struct bpf_map *map); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); @@ -801,10 +799,6 @@ static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, return NULL; } -static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index) -{ -} - static inline void __dev_map_flush(struct bpf_map *map) { } @@ -834,10 +828,6 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) return NULL; } -static inline void __cpu_map_insert_ctx(struct bpf_map *map, u32 index) -{ -} - static inline void __cpu_map_flush(struct bpf_map *map) { } From fca16e51078e8e5c0af839426b3d2dcd2bede135 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 26 Jul 2019 18:06:53 +0200 Subject: [PATCH 2/6] xdp: Refactor devmap allocation code for reuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The subsequent patch to add a new devmap sub-type can re-use much of the initialisation and allocation code, so refactor it into separate functions. Signed-off-by: Toke Høiland-Jørgensen Acked-by: Yonghong Song Acked-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- kernel/bpf/devmap.c | 136 +++++++++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 53 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index d83cf8ccc8729..a0501266bdb8f 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -60,9 +60,9 @@ struct xdp_bulk_queue { struct bpf_dtab_netdev { struct net_device *dev; /* must be first member, due to tracepoint */ struct bpf_dtab *dtab; - unsigned int bit; struct xdp_bulk_queue __percpu *bulkq; struct rcu_head rcu; + unsigned int idx; /* keep track of map index for tracepoint */ }; struct bpf_dtab { @@ -75,28 +75,21 @@ struct bpf_dtab { static DEFINE_SPINLOCK(dev_map_lock); static LIST_HEAD(dev_map_list); -static struct bpf_map *dev_map_alloc(union bpf_attr *attr) +static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) { - struct bpf_dtab *dtab; int err, cpu; u64 cost; - if (!capable(CAP_NET_ADMIN)) - return ERR_PTR(-EPERM); - /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK) - return ERR_PTR(-EINVAL); + return -EINVAL; /* Lookup returns a pointer straight to dev->ifindex, so make sure the * verifier prevents writes from the BPF side */ attr->map_flags |= BPF_F_RDONLY_PROG; - dtab = kzalloc(sizeof(*dtab), GFP_USER); - if (!dtab) - return ERR_PTR(-ENOMEM); bpf_map_init_from_attr(&dtab->map, attr); @@ -107,9 +100,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) /* if map size is larger than memlock limit, reject it */ err = bpf_map_charge_init(&dtab->map.memory, cost); if (err) - goto free_dtab; - - err = -ENOMEM; + return -EINVAL; dtab->flush_list = alloc_percpu(struct list_head); if (!dtab->flush_list) @@ -124,19 +115,38 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) if (!dtab->netdev_map) goto free_percpu; - spin_lock(&dev_map_lock); - list_add_tail_rcu(&dtab->list, &dev_map_list); - spin_unlock(&dev_map_lock); - - return &dtab->map; + return 0; free_percpu: free_percpu(dtab->flush_list); free_charge: bpf_map_charge_finish(&dtab->map.memory); -free_dtab: - kfree(dtab); - return ERR_PTR(err); + return -ENOMEM; +} + +static struct bpf_map *dev_map_alloc(union bpf_attr *attr) +{ + struct bpf_dtab *dtab; + int err; + + if (!capable(CAP_NET_ADMIN)) + return ERR_PTR(-EPERM); + + dtab = kzalloc(sizeof(*dtab), GFP_USER); + if (!dtab) + return ERR_PTR(-ENOMEM); + + err = dev_map_init_map(dtab, attr); + if (err) { + kfree(dtab); + return ERR_PTR(err); + } + + spin_lock(&dev_map_lock); + list_add_tail_rcu(&dtab->list, &dev_map_list); + spin_unlock(&dev_map_lock); + + return &dtab->map; } static void dev_map_free(struct bpf_map *map) @@ -235,7 +245,7 @@ static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags, out: bq->count = 0; - trace_xdp_devmap_xmit(&obj->dtab->map, obj->bit, + trace_xdp_devmap_xmit(&obj->dtab->map, obj->idx, sent, drops, bq->dev_rx, dev, err); bq->dev_rx = NULL; __list_del_clearprev(&bq->flush_node); @@ -412,17 +422,52 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) return 0; } -static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, - u64 map_flags) +static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, + struct bpf_dtab *dtab, + u32 ifindex, + unsigned int idx) { - struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - struct net *net = current->nsproxy->net_ns; gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; + struct bpf_dtab_netdev *dev; + struct xdp_bulk_queue *bq; + int cpu; + + dev = kmalloc_node(sizeof(*dev), gfp, dtab->map.numa_node); + if (!dev) + return ERR_PTR(-ENOMEM); + + dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq), + sizeof(void *), gfp); + if (!dev->bulkq) { + kfree(dev); + return ERR_PTR(-ENOMEM); + } + + for_each_possible_cpu(cpu) { + bq = per_cpu_ptr(dev->bulkq, cpu); + bq->obj = dev; + } + + dev->dev = dev_get_by_index(net, ifindex); + if (!dev->dev) { + free_percpu(dev->bulkq); + kfree(dev); + return ERR_PTR(-EINVAL); + } + + dev->idx = idx; + dev->dtab = dtab; + + return dev; +} + +static int __dev_map_update_elem(struct net *net, struct bpf_map *map, + void *key, void *value, u64 map_flags) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_dtab_netdev *dev, *old_dev; u32 ifindex = *(u32 *)value; - struct xdp_bulk_queue *bq; u32 i = *(u32 *)key; - int cpu; if (unlikely(map_flags > BPF_EXIST)) return -EINVAL; @@ -434,31 +479,9 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, if (!ifindex) { dev = NULL; } else { - dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node); - if (!dev) - return -ENOMEM; - - dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq), - sizeof(void *), gfp); - if (!dev->bulkq) { - kfree(dev); - return -ENOMEM; - } - - for_each_possible_cpu(cpu) { - bq = per_cpu_ptr(dev->bulkq, cpu); - bq->obj = dev; - } - - dev->dev = dev_get_by_index(net, ifindex); - if (!dev->dev) { - free_percpu(dev->bulkq); - kfree(dev); - return -EINVAL; - } - - dev->bit = i; - dev->dtab = dtab; + dev = __dev_map_alloc_node(net, dtab, ifindex, i); + if (IS_ERR(dev)) + return PTR_ERR(dev); } /* Use call_rcu() here to ensure rcu critical sections have completed @@ -472,6 +495,13 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, return 0; } +static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + return __dev_map_update_elem(current->nsproxy->net_ns, + map, key, value, map_flags); +} + const struct bpf_map_ops dev_map_ops = { .map_alloc = dev_map_alloc, .map_free = dev_map_free, From 6f9d451ab1a33728adb72d7ff66a7b374d665176 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 26 Jul 2019 18:06:55 +0200 Subject: [PATCH 3/6] xdp: Add devmap_hash map type for looking up devices by hashed index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A common pattern when using xdp_redirect_map() is to create a device map where the lookup key is simply ifindex. Because device maps are arrays, this leaves holes in the map, and the map has to be sized to fit the largest ifindex, regardless of how many devices actually are actually needed in the map. This patch adds a second type of device map where the key is looked up using a hashmap, instead of being used as an array index. This allows maps to be densely packed, so they can be smaller. Signed-off-by: Toke Høiland-Jørgensen Acked-by: Yonghong Song Acked-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 7 ++ include/linux/bpf_types.h | 1 + include/trace/events/xdp.h | 3 +- include/uapi/linux/bpf.h | 1 + kernel/bpf/devmap.c | 200 +++++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 2 + net/core/filter.c | 9 +- 7 files changed, 220 insertions(+), 3 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bfdb54dd2ad14..f9a506147c8a4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -713,6 +713,7 @@ struct xdp_buff; struct sk_buff; struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); +struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_flush(struct bpf_map *map); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, struct net_device *dev_rx); @@ -799,6 +800,12 @@ static inline struct net_device *__dev_map_lookup_elem(struct bpf_map *map, return NULL; } +static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map, + u32 key) +{ + return NULL; +} + static inline void __dev_map_flush(struct bpf_map *map) { } diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index eec5aeeeaf928..36a9c2325176b 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -62,6 +62,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) #ifdef CONFIG_NET BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) #if defined(CONFIG_BPF_STREAM_PARSER) BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 68899fdc985b9..8c8420230a10e 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -175,7 +175,8 @@ struct _bpf_dtab_netdev { #endif /* __DEVMAP_OBJ_TYPE */ #define devmap_ifindex(fwd, map) \ - ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ + ((map->map_type == BPF_MAP_TYPE_DEVMAP || \ + map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) ? \ ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0) #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index e985f07a98ed3..6bbef0c7f5853 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -134,6 +134,7 @@ enum bpf_map_type { BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, + BPF_MAP_TYPE_DEVMAP_HASH, }; /* Note that tracing related programs such as diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index a0501266bdb8f..9af048a932b5f 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -37,6 +37,12 @@ * notifier hook walks the map we know that new dev references can not be * added by the user because core infrastructure ensures dev_get_by_index() * calls will fail at this point. + * + * The devmap_hash type is a map type which interprets keys as ifindexes and + * indexes these using a hashmap. This allows maps that use ifindex as key to be + * densely packed instead of having holes in the lookup array for unused + * ifindexes. The setup and packet enqueue/send code is shared between the two + * types of devmap; only the lookup and insertion is different. */ #include #include @@ -59,6 +65,7 @@ struct xdp_bulk_queue { struct bpf_dtab_netdev { struct net_device *dev; /* must be first member, due to tracepoint */ + struct hlist_node index_hlist; struct bpf_dtab *dtab; struct xdp_bulk_queue __percpu *bulkq; struct rcu_head rcu; @@ -70,11 +77,30 @@ struct bpf_dtab { struct bpf_dtab_netdev **netdev_map; struct list_head __percpu *flush_list; struct list_head list; + + /* these are only used for DEVMAP_HASH type maps */ + struct hlist_head *dev_index_head; + spinlock_t index_lock; + unsigned int items; + u32 n_buckets; }; static DEFINE_SPINLOCK(dev_map_lock); static LIST_HEAD(dev_map_list); +static struct hlist_head *dev_map_create_hash(unsigned int entries) +{ + int i; + struct hlist_head *hash; + + hash = kmalloc_array(entries, sizeof(*hash), GFP_KERNEL); + if (hash != NULL) + for (i = 0; i < entries; i++) + INIT_HLIST_HEAD(&hash[i]); + + return hash; +} + static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) { int err, cpu; @@ -97,6 +123,14 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) cost = (u64) dtab->map.max_entries * sizeof(struct bpf_dtab_netdev *); cost += sizeof(struct list_head) * num_possible_cpus(); + if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { + dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); + + if (!dtab->n_buckets) /* Overflow check */ + return -EINVAL; + cost += sizeof(struct hlist_head) * dtab->n_buckets; + } + /* if map size is larger than memlock limit, reject it */ err = bpf_map_charge_init(&dtab->map.memory, cost); if (err) @@ -115,8 +149,18 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) if (!dtab->netdev_map) goto free_percpu; + if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { + dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets); + if (!dtab->dev_index_head) + goto free_map_area; + + spin_lock_init(&dtab->index_lock); + } + return 0; +free_map_area: + bpf_map_area_free(dtab->netdev_map); free_percpu: free_percpu(dtab->flush_list); free_charge: @@ -198,6 +242,7 @@ static void dev_map_free(struct bpf_map *map) free_percpu(dtab->flush_list); bpf_map_area_free(dtab->netdev_map); + kfree(dtab->dev_index_head); kfree(dtab); } @@ -218,6 +263,70 @@ static int dev_map_get_next_key(struct bpf_map *map, void *key, void *next_key) return 0; } +static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab, + int idx) +{ + return &dtab->dev_index_head[idx & (dtab->n_buckets - 1)]; +} + +struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct hlist_head *head = dev_map_index_hash(dtab, key); + struct bpf_dtab_netdev *dev; + + hlist_for_each_entry_rcu(dev, head, index_hlist) + if (dev->idx == key) + return dev; + + return NULL; +} + +static int dev_map_hash_get_next_key(struct bpf_map *map, void *key, + void *next_key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + u32 idx, *next = next_key; + struct bpf_dtab_netdev *dev, *next_dev; + struct hlist_head *head; + int i = 0; + + if (!key) + goto find_first; + + idx = *(u32 *)key; + + dev = __dev_map_hash_lookup_elem(map, idx); + if (!dev) + goto find_first; + + next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(&dev->index_hlist)), + struct bpf_dtab_netdev, index_hlist); + + if (next_dev) { + *next = next_dev->idx; + return 0; + } + + i = idx & (dtab->n_buckets - 1); + i++; + + find_first: + for (; i < dtab->n_buckets; i++) { + head = dev_map_index_hash(dtab, i); + + next_dev = hlist_entry_safe(rcu_dereference_raw(hlist_first_rcu(head)), + struct bpf_dtab_netdev, + index_hlist); + if (next_dev) { + *next = next_dev->idx; + return 0; + } + } + + return -ENOENT; +} + static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags, bool in_napi_ctx) { @@ -373,6 +482,15 @@ static void *dev_map_lookup_elem(struct bpf_map *map, void *key) return dev ? &dev->ifindex : NULL; } +static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key) +{ + struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map, + *(u32 *)key); + struct net_device *dev = obj ? obj->dev : NULL; + + return dev ? &dev->ifindex : NULL; +} + static void dev_map_flush_old(struct bpf_dtab_netdev *dev) { if (dev->dev->netdev_ops->ndo_xdp_xmit) { @@ -422,6 +540,28 @@ static int dev_map_delete_elem(struct bpf_map *map, void *key) return 0; } +static int dev_map_hash_delete_elem(struct bpf_map *map, void *key) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct bpf_dtab_netdev *old_dev; + int k = *(u32 *)key; + unsigned long flags; + int ret = -ENOENT; + + spin_lock_irqsave(&dtab->index_lock, flags); + + old_dev = __dev_map_hash_lookup_elem(map, k); + if (old_dev) { + dtab->items--; + hlist_del_init_rcu(&old_dev->index_hlist); + call_rcu(&old_dev->rcu, __dev_map_entry_free); + ret = 0; + } + spin_unlock_irqrestore(&dtab->index_lock, flags); + + return ret; +} + static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, struct bpf_dtab *dtab, u32 ifindex, @@ -502,6 +642,56 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value, map, key, value, map_flags); } +static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map, + void *key, void *value, u64 map_flags) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + struct bpf_dtab_netdev *dev, *old_dev; + u32 ifindex = *(u32 *)value; + u32 idx = *(u32 *)key; + unsigned long flags; + + if (unlikely(map_flags > BPF_EXIST || !ifindex)) + return -EINVAL; + + old_dev = __dev_map_hash_lookup_elem(map, idx); + if (old_dev && (map_flags & BPF_NOEXIST)) + return -EEXIST; + + dev = __dev_map_alloc_node(net, dtab, ifindex, idx); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + spin_lock_irqsave(&dtab->index_lock, flags); + + if (old_dev) { + hlist_del_rcu(&old_dev->index_hlist); + } else { + if (dtab->items >= dtab->map.max_entries) { + spin_unlock_irqrestore(&dtab->index_lock, flags); + call_rcu(&dev->rcu, __dev_map_entry_free); + return -E2BIG; + } + dtab->items++; + } + + hlist_add_head_rcu(&dev->index_hlist, + dev_map_index_hash(dtab, idx)); + spin_unlock_irqrestore(&dtab->index_lock, flags); + + if (old_dev) + call_rcu(&old_dev->rcu, __dev_map_entry_free); + + return 0; +} + +static int dev_map_hash_update_elem(struct bpf_map *map, void *key, void *value, + u64 map_flags) +{ + return __dev_map_hash_update_elem(current->nsproxy->net_ns, + map, key, value, map_flags); +} + const struct bpf_map_ops dev_map_ops = { .map_alloc = dev_map_alloc, .map_free = dev_map_free, @@ -512,6 +702,16 @@ const struct bpf_map_ops dev_map_ops = { .map_check_btf = map_check_no_btf, }; +const struct bpf_map_ops dev_map_hash_ops = { + .map_alloc = dev_map_alloc, + .map_free = dev_map_free, + .map_get_next_key = dev_map_hash_get_next_key, + .map_lookup_elem = dev_map_hash_lookup_elem, + .map_update_elem = dev_map_hash_update_elem, + .map_delete_elem = dev_map_hash_delete_elem, + .map_check_btf = map_check_no_btf, +}; + static int dev_map_notification(struct notifier_block *notifier, ulong event, void *ptr) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5900cbb966b17..cef851cd5c36a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3457,6 +3457,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, goto error; break; case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: if (func_id != BPF_FUNC_redirect_map && func_id != BPF_FUNC_map_lookup_elem) goto error; @@ -3539,6 +3540,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, break; case BPF_FUNC_redirect_map: if (map->map_type != BPF_MAP_TYPE_DEVMAP && + map->map_type != BPF_MAP_TYPE_DEVMAP_HASH && map->map_type != BPF_MAP_TYPE_CPUMAP && map->map_type != BPF_MAP_TYPE_XSKMAP) goto error; diff --git a/net/core/filter.c b/net/core/filter.c index 3961437ccc506..1eee70f80fbad 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3517,7 +3517,8 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, int err; switch (map->map_type) { - case BPF_MAP_TYPE_DEVMAP: { + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: { struct bpf_dtab_netdev *dst = fwd; err = dev_map_enqueue(dst, xdp, dev_rx); @@ -3554,6 +3555,7 @@ void xdp_do_flush_map(void) if (map) { switch (map->map_type) { case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: __dev_map_flush(map); break; case BPF_MAP_TYPE_CPUMAP: @@ -3574,6 +3576,8 @@ static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) switch (map->map_type) { case BPF_MAP_TYPE_DEVMAP: return __dev_map_lookup_elem(map, index); + case BPF_MAP_TYPE_DEVMAP_HASH: + return __dev_map_hash_lookup_elem(map, index); case BPF_MAP_TYPE_CPUMAP: return __cpu_map_lookup_elem(map, index); case BPF_MAP_TYPE_XSKMAP: @@ -3655,7 +3659,8 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, ri->tgt_value = NULL; WRITE_ONCE(ri->map, NULL); - if (map->map_type == BPF_MAP_TYPE_DEVMAP) { + if (map->map_type == BPF_MAP_TYPE_DEVMAP || + map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { struct bpf_dtab_netdev *dst = fwd; err = dev_map_generic_redirect(dst, skb, xdp_prog); From 10fbe21163fc6f05f5991d9cf9a2da9bdee4e834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 26 Jul 2019 18:06:56 +0200 Subject: [PATCH 4/6] tools/include/uapi: Add devmap_hash BPF map type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds the devmap_hash BPF map type to the uapi headers in tools/. Signed-off-by: Toke Høiland-Jørgensen Acked-by: Yonghong Song Acked-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3d7fc67ec1b81..21d0f6863df3c 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -134,6 +134,7 @@ enum bpf_map_type { BPF_MAP_TYPE_QUEUE, BPF_MAP_TYPE_STACK, BPF_MAP_TYPE_SK_STORAGE, + BPF_MAP_TYPE_DEVMAP_HASH, }; /* Note that tracing related programs such as From e42346192c9f179a9a47845e4663ad2f453d2b7b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 26 Jul 2019 18:06:57 +0200 Subject: [PATCH 5/6] tools/libbpf_probes: Add new devmap_hash type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds the definition for BPF_MAP_TYPE_DEVMAP_HASH to libbpf_probes.c in tools/lib/bpf. Signed-off-by: Toke Høiland-Jørgensen Acked-by: Yonghong Song Acked-by: Jesper Dangaard Brouer --- tools/lib/bpf/libbpf_probes.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index ace1a0708d990..4b0b0364f5fc8 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -244,6 +244,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: case BPF_MAP_TYPE_SOCKMAP: case BPF_MAP_TYPE_CPUMAP: case BPF_MAP_TYPE_XSKMAP: From 1375dc4a4579d5e767dd8c2d2abcd929ff59d0a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Fri, 26 Jul 2019 18:06:58 +0200 Subject: [PATCH 6/6] tools: Add definitions for devmap_hash map type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds selftest and bpftool updates for the devmap_hash map type. Signed-off-by: Toke Høiland-Jørgensen Acked-by: Jesper Dangaard Brouer Signed-off-by: Alexei Starovoitov --- tools/bpf/bpftool/Documentation/bpftool-map.rst | 2 +- tools/bpf/bpftool/bash-completion/bpftool | 4 ++-- tools/bpf/bpftool/map.c | 3 ++- tools/testing/selftests/bpf/test_maps.c | 16 ++++++++++++++++ 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 490b4501cb6ee..61d1d270eb5eb 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -46,7 +46,7 @@ MAP COMMANDS | *TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash** | | **percpu_array** | **stack_trace** | **cgroup_array** | **lru_hash** | | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps** -| | **devmap** | **sockmap** | **cpumap** | **xskmap** | **sockhash** +| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash** | | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage** | | **queue** | **stack** } diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index c8f42e1fcbc99..6b961a5ed1006 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -489,8 +489,8 @@ _bpftool() perf_event_array percpu_hash percpu_array \ stack_trace cgroup_array lru_hash \ lru_percpu_hash lpm_trie array_of_maps \ - hash_of_maps devmap sockmap cpumap xskmap \ - sockhash cgroup_storage reuseport_sockarray \ + hash_of_maps devmap devmap_hash sockmap cpumap \ + xskmap sockhash cgroup_storage reuseport_sockarray \ percpu_cgroup_storage queue stack' -- \ "$cur" ) ) return 0 diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 5da5a7311f130..bfbbc6b4cb83c 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -37,6 +37,7 @@ const char * const map_type_name[] = { [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps", [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps", [BPF_MAP_TYPE_DEVMAP] = "devmap", + [BPF_MAP_TYPE_DEVMAP_HASH] = "devmap_hash", [BPF_MAP_TYPE_SOCKMAP] = "sockmap", [BPF_MAP_TYPE_CPUMAP] = "cpumap", [BPF_MAP_TYPE_XSKMAP] = "xskmap", @@ -1271,7 +1272,7 @@ static int do_help(int argc, char **argv) " TYPE := { hash | array | prog_array | perf_event_array | percpu_hash |\n" " percpu_array | stack_trace | cgroup_array | lru_hash |\n" " lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n" - " devmap | sockmap | cpumap | xskmap | sockhash |\n" + " devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n" " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage }\n" " " HELP_SPEC_OPTIONS "\n" "", diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 5443b9bd75ed7..e1f1becda5290 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -508,6 +508,21 @@ static void test_devmap(unsigned int task, void *data) close(fd); } +static void test_devmap_hash(unsigned int task, void *data) +{ + int fd; + __u32 key, value; + + fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP_HASH, sizeof(key), sizeof(value), + 2, 0); + if (fd < 0) { + printf("Failed to create devmap_hash '%s'!\n", strerror(errno)); + exit(1); + } + + close(fd); +} + static void test_queuemap(unsigned int task, void *data) { const int MAP_SIZE = 32; @@ -1684,6 +1699,7 @@ static void run_all_tests(void) test_arraymap_percpu_many_keys(); test_devmap(0, NULL); + test_devmap_hash(0, NULL); test_sockmap(0, NULL); test_map_large();