Skip to content

Commit

Permalink
bpf: devmap prepare xdp frames for bulking
Browse files Browse the repository at this point in the history
Like cpumap create queue for xdp frames that will be bulked.  For now,
this patch simply invoke ndo_xdp_xmit foreach frame.  This happens,
either when the map flush operation is envoked, or when the limit
DEV_MAP_BULK_SIZE is reached.

V5: Avoid memleak on error path in dev_map_update_elem()

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Jesper Dangaard Brouer authored and Alexei Starovoitov committed May 25, 2018
1 parent 67f29e0 commit 5d053f9
Showing 1 changed file with 70 additions and 4 deletions.
74 changes: 70 additions & 4 deletions kernel/bpf/devmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,17 @@
#define DEV_CREATE_FLAG_MASK \
(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)

#define DEV_MAP_BULK_SIZE 16
struct xdp_bulk_queue {
struct xdp_frame *q[DEV_MAP_BULK_SIZE];
unsigned int count;
};

struct bpf_dtab_netdev {
struct net_device *dev; /* must be first member, due to tracepoint */
struct bpf_dtab *dtab;
unsigned int bit;
struct xdp_bulk_queue __percpu *bulkq;
struct rcu_head rcu;
};

Expand Down Expand Up @@ -208,6 +215,34 @@ void __dev_map_insert_ctx(struct bpf_map *map, u32 bit)
__set_bit(bit, bitmap);
}

static int bq_xmit_all(struct bpf_dtab_netdev *obj,
struct xdp_bulk_queue *bq)
{
struct net_device *dev = obj->dev;
int i;

if (unlikely(!bq->count))
return 0;

for (i = 0; i < bq->count; i++) {
struct xdp_frame *xdpf = bq->q[i];

prefetch(xdpf);
}

for (i = 0; i < bq->count; i++) {
struct xdp_frame *xdpf = bq->q[i];
int err;

err = dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
if (err)
xdp_return_frame(xdpf);
}
bq->count = 0;

return 0;
}

/* __dev_map_flush is called from xdp_do_flush_map() which _must_ be signaled
* from the driver before returning from its napi->poll() routine. The poll()
* routine is called either from busy_poll context or net_rx_action signaled
Expand All @@ -223,6 +258,7 @@ void __dev_map_flush(struct bpf_map *map)

for_each_set_bit(bit, bitmap, map->max_entries) {
struct bpf_dtab_netdev *dev = READ_ONCE(dtab->netdev_map[bit]);
struct xdp_bulk_queue *bq;
struct net_device *netdev;

/* This is possible if the dev entry is removed by user space
Expand All @@ -232,6 +268,9 @@ void __dev_map_flush(struct bpf_map *map)
continue;

__clear_bit(bit, bitmap);

bq = this_cpu_ptr(dev->bulkq);
bq_xmit_all(dev, bq);
netdev = dev->dev;
if (likely(netdev->netdev_ops->ndo_xdp_flush))
netdev->netdev_ops->ndo_xdp_flush(netdev);
Expand All @@ -254,6 +293,20 @@ struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key)
return obj;
}

/* Runs under RCU-read-side, plus in softirq under NAPI protection.
* Thus, safe percpu variable access.
*/
static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf)
{
struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq);

if (unlikely(bq->count == DEV_MAP_BULK_SIZE))
bq_xmit_all(obj, bq);

bq->q[bq->count++] = xdpf;
return 0;
}

int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
{
struct net_device *dev = dst->dev;
Expand All @@ -266,8 +319,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp)
if (unlikely(!xdpf))
return -EOVERFLOW;

/* TODO: implement a bulking/enqueue step later */
return dev->netdev_ops->ndo_xdp_xmit(dev, xdpf);
return bq_enqueue(dst, xdpf);
}

static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
Expand All @@ -282,13 +334,18 @@ static void dev_map_flush_old(struct bpf_dtab_netdev *dev)
{
if (dev->dev->netdev_ops->ndo_xdp_flush) {
struct net_device *fl = dev->dev;
struct xdp_bulk_queue *bq;
unsigned long *bitmap;

int cpu;

for_each_online_cpu(cpu) {
bitmap = per_cpu_ptr(dev->dtab->flush_needed, cpu);
__clear_bit(dev->bit, bitmap);

bq = per_cpu_ptr(dev->bulkq, cpu);
bq_xmit_all(dev, bq);

fl->netdev_ops->ndo_xdp_flush(dev->dev);
}
}
Expand All @@ -300,6 +357,7 @@ static void __dev_map_entry_free(struct rcu_head *rcu)

dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
dev_map_flush_old(dev);
free_percpu(dev->bulkq);
dev_put(dev->dev);
kfree(dev);
}
Expand Down Expand Up @@ -332,6 +390,7 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct net *net = current->nsproxy->net_ns;
gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN;
struct bpf_dtab_netdev *dev, *old_dev;
u32 i = *(u32 *)key;
u32 ifindex = *(u32 *)value;
Expand All @@ -346,13 +405,20 @@ static int dev_map_update_elem(struct bpf_map *map, void *key, void *value,
if (!ifindex) {
dev = NULL;
} else {
dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
map->numa_node);
dev = kmalloc_node(sizeof(*dev), gfp, map->numa_node);
if (!dev)
return -ENOMEM;

dev->bulkq = __alloc_percpu_gfp(sizeof(*dev->bulkq),
sizeof(void *), gfp);
if (!dev->bulkq) {
kfree(dev);
return -ENOMEM;
}

dev->dev = dev_get_by_index(net, ifindex);
if (!dev->dev) {
free_percpu(dev->bulkq);
kfree(dev);
return -EINVAL;
}
Expand Down

0 comments on commit 5d053f9

Please sign in to comment.