Skip to content

Commit

Permalink
Merge branch 'xdp_devmap'
Browse files Browse the repository at this point in the history
David Ahern says:

====================
Implementation of Daniel's proposal for allowing DEVMAP entries to be
a device index, program fd pair.

Programs are run after XDP_REDIRECT and have access to both Rx device
and Tx device.

v4
- moved struct bpf_devmap_val from uapi to devmap.c, named the union
  and dropped the prefix from the elements - Jesper
- fixed 2 bugs in selftests

v3
- renamed struct to bpf_devmap_val
- used offsetofend to check for expected map size, modification of
  Toke's comment
- check for explicit value sizes
- adjusted switch statement in dev_map_run_prog per Andrii's comment
- changed SEC shortcut to xdp_devmap
- changed selftests to use skeleton and new map declaration

v2
- moved dev_map_ext_val definition to uapi to formalize the API for devmap
  extensions; add bpf_ prefix to the prog_fd and prog_id entries
- changed devmap code to handle struct in a way that it can support future
  extensions
- fixed subject in libbpf patch

v1
- fixed prog put on invalid program - Toke
- changed write value from id to fd per Toke's comments about capabilities
- add test cases
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Jun 1, 2020
2 parents b36e62e + d39aec7 commit 906312c
Show file tree
Hide file tree
Showing 11 changed files with 328 additions and 18 deletions.
5 changes: 5 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
struct net_device *dev_rx);
int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
struct bpf_prog *xdp_prog);
bool dev_map_can_have_prog(struct bpf_map *map);

struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
void __cpu_map_flush(void);
Expand Down Expand Up @@ -1363,6 +1364,10 @@ static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map
{
return NULL;
}
static inline bool dev_map_can_have_prog(struct bpf_map *map)
{
return false;
}

static inline void __dev_flush(void)
{
Expand Down
5 changes: 5 additions & 0 deletions include/net/xdp.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,17 @@ struct xdp_rxq_info {
struct xdp_mem_info mem;
} ____cacheline_aligned; /* perf critical, avoid false-sharing */

struct xdp_txq_info {
struct net_device *dev;
};

struct xdp_buff {
void *data;
void *data_end;
void *data_meta;
void *data_hard_start;
struct xdp_rxq_info *rxq;
struct xdp_txq_info *txq;
u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
};

Expand Down
3 changes: 3 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ enum bpf_attach_type {
BPF_CGROUP_INET6_GETPEERNAME,
BPF_CGROUP_INET4_GETSOCKNAME,
BPF_CGROUP_INET6_GETSOCKNAME,
BPF_XDP_DEVMAP,
__MAX_BPF_ATTACH_TYPE
};

Expand Down Expand Up @@ -3706,6 +3707,8 @@ struct xdp_md {
/* Below access go through struct xdp_rxq_info */
__u32 ingress_ifindex; /* rxq->dev->ifindex */
__u32 rx_queue_index; /* rxq->queue_index */

__u32 egress_ifindex; /* txq->dev->ifindex */
};

enum sk_action {
Expand Down
130 changes: 112 additions & 18 deletions kernel/bpf/devmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,23 @@ struct xdp_dev_bulk_queue {
unsigned int count;
};

/* DEVMAP values */
struct bpf_devmap_val {
u32 ifindex; /* device index */
union {
int fd; /* prog fd on map write */
u32 id; /* prog id on map read */
} bpf_prog;
};

struct bpf_dtab_netdev {
struct net_device *dev; /* must be first member, due to tracepoint */
struct hlist_node index_hlist;
struct bpf_dtab *dtab;
struct bpf_prog *xdp_prog;
struct rcu_head rcu;
unsigned int idx;
struct bpf_devmap_val val;
};

struct bpf_dtab {
Expand Down Expand Up @@ -105,12 +116,18 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,

static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
{
u32 valsize = attr->value_size;
u64 cost = 0;
int err;

/* check sanity of attributes */
/* check sanity of attributes. 2 value sizes supported:
* 4 bytes: ifindex
* 8 bytes: ifindex + prog fd
*/
if (attr->max_entries == 0 || attr->key_size != 4 ||
attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
(valsize != offsetofend(struct bpf_devmap_val, ifindex) &&
valsize != offsetofend(struct bpf_devmap_val, bpf_prog.fd)) ||
attr->map_flags & ~DEV_CREATE_FLAG_MASK)
return -EINVAL;

/* Lookup returns a pointer straight to dev->ifindex, so make sure the
Expand Down Expand Up @@ -217,6 +234,8 @@ static void dev_map_free(struct bpf_map *map)

hlist_for_each_entry_safe(dev, next, head, index_hlist) {
hlist_del_rcu(&dev->index_hlist);
if (dev->xdp_prog)
bpf_prog_put(dev->xdp_prog);
dev_put(dev->dev);
kfree(dev);
}
Expand All @@ -231,6 +250,8 @@ static void dev_map_free(struct bpf_map *map)
if (!dev)
continue;

if (dev->xdp_prog)
bpf_prog_put(dev->xdp_prog);
dev_put(dev->dev);
kfree(dev);
}
Expand Down Expand Up @@ -317,6 +338,16 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
return -ENOENT;
}

bool dev_map_can_have_prog(struct bpf_map *map)
{
if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
map->value_size != offsetofend(struct bpf_devmap_val, ifindex))
return true;

return false;
}

static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
{
struct net_device *dev = bq->dev;
Expand Down Expand Up @@ -441,6 +472,33 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
return bq_enqueue(dev, xdpf, dev_rx);
}

static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
struct xdp_txq_info txq = { .dev = dev };
u32 act;

xdp->txq = &txq;

act = bpf_prog_run_xdp(xdp_prog, xdp);
switch (act) {
case XDP_PASS:
return xdp;
case XDP_DROP:
break;
default:
bpf_warn_invalid_xdp_action(act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(dev, xdp_prog, act);
break;
}

xdp_return_buff(xdp);
return NULL;
}

int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
struct net_device *dev_rx)
{
Expand All @@ -452,6 +510,11 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
{
struct net_device *dev = dst->dev;

if (dst->xdp_prog) {
xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
if (!xdp)
return 0;
}
return __xdp_enqueue(dev, xdp, dev_rx);
}

Expand All @@ -472,25 +535,24 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
struct net_device *dev = obj ? obj->dev : NULL;

return dev ? &dev->ifindex : NULL;
return obj ? &obj->val : NULL;
}

static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
{
struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
*(u32 *)key);
struct net_device *dev = obj ? obj->dev : NULL;

return dev ? &dev->ifindex : NULL;
return obj ? &obj->val : NULL;
}

static void __dev_map_entry_free(struct rcu_head *rcu)
{
struct bpf_dtab_netdev *dev;

dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
if (dev->xdp_prog)
bpf_prog_put(dev->xdp_prog);
dev_put(dev->dev);
kfree(dev);
}
Expand Down Expand Up @@ -541,34 +603,57 @@ static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)

static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
struct bpf_dtab *dtab,
u32 ifindex,
struct bpf_devmap_val *val,
unsigned int idx)
{
struct bpf_prog *prog = NULL;
struct bpf_dtab_netdev *dev;

dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
dtab->map.numa_node);
if (!dev)
return ERR_PTR(-ENOMEM);

dev->dev = dev_get_by_index(net, ifindex);
if (!dev->dev) {
kfree(dev);
return ERR_PTR(-EINVAL);
dev->dev = dev_get_by_index(net, val->ifindex);
if (!dev->dev)
goto err_out;

if (val->bpf_prog.fd >= 0) {
prog = bpf_prog_get_type_dev(val->bpf_prog.fd,
BPF_PROG_TYPE_XDP, false);
if (IS_ERR(prog))
goto err_put_dev;
if (prog->expected_attach_type != BPF_XDP_DEVMAP)
goto err_put_prog;
}

dev->idx = idx;
dev->dtab = dtab;
if (prog) {
dev->xdp_prog = prog;
dev->val.bpf_prog.id = prog->aux->id;
} else {
dev->xdp_prog = NULL;
dev->val.bpf_prog.id = 0;
}
dev->val.ifindex = val->ifindex;

return dev;
err_put_prog:
bpf_prog_put(prog);
err_put_dev:
dev_put(dev->dev);
err_out:
kfree(dev);
return ERR_PTR(-EINVAL);
}

static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
void *key, void *value, u64 map_flags)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
struct bpf_dtab_netdev *dev, *old_dev;
u32 ifindex = *(u32 *)value;
u32 i = *(u32 *)key;

if (unlikely(map_flags > BPF_EXIST))
Expand All @@ -578,10 +663,16 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
if (unlikely(map_flags == BPF_NOEXIST))
return -EEXIST;

if (!ifindex) {
/* already verified value_size <= sizeof val */
memcpy(&val, value, map->value_size);

if (!val.ifindex) {
dev = NULL;
/* can not specify fd if ifindex is 0 */
if (val.bpf_prog.fd != -1)
return -EINVAL;
} else {
dev = __dev_map_alloc_node(net, dtab, ifindex, i);
dev = __dev_map_alloc_node(net, dtab, &val, i);
if (IS_ERR(dev))
return PTR_ERR(dev);
}
Expand All @@ -608,13 +699,16 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
void *key, void *value, u64 map_flags)
{
struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
struct bpf_dtab_netdev *dev, *old_dev;
u32 ifindex = *(u32 *)value;
u32 idx = *(u32 *)key;
unsigned long flags;
int err = -EEXIST;

if (unlikely(map_flags > BPF_EXIST || !ifindex))
/* already verified value_size <= sizeof val */
memcpy(&val, value, map->value_size);

if (unlikely(map_flags > BPF_EXIST || !val.ifindex))
return -EINVAL;

spin_lock_irqsave(&dtab->index_lock, flags);
Expand All @@ -623,7 +717,7 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
if (old_dev && (map_flags & BPF_NOEXIST))
goto out_err;

dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
dev = __dev_map_alloc_node(net, dtab, &val, idx);
if (IS_ERR(dev)) {
err = PTR_ERR(dev);
goto out_err;
Expand Down
18 changes: 18 additions & 0 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -5420,6 +5420,18 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
struct bpf_prog *new = xdp->prog;
int ret = 0;

if (new) {
u32 i;

/* generic XDP does not work with DEVMAPs that can
* have a bpf_prog installed on an entry
*/
for (i = 0; i < new->aux->used_map_cnt; i++) {
if (dev_map_can_have_prog(new->aux->used_maps[i]))
return -EINVAL;
}
}

switch (xdp->command) {
case XDP_SETUP_PROG:
rcu_assign_pointer(dev->xdp_prog, new);
Expand Down Expand Up @@ -8835,6 +8847,12 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
return -EINVAL;
}

if (prog->expected_attach_type == BPF_XDP_DEVMAP) {
NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
bpf_prog_put(prog);
return -EINVAL;
}

/* prog->aux->id may be 0 for orphaned device-bound progs */
if (prog->aux->id && prog->aux->id == prog_id) {
bpf_prog_put(prog);
Expand Down
17 changes: 17 additions & 0 deletions net/core/filter.c
Original file line number Diff line number Diff line change
Expand Up @@ -7015,6 +7015,13 @@ static bool xdp_is_valid_access(int off, int size,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
switch (off) {
case offsetof(struct xdp_md, egress_ifindex):
return false;
}
}

if (type == BPF_WRITE) {
if (bpf_prog_is_dev_bound(prog->aux)) {
switch (off) {
Expand Down Expand Up @@ -7985,6 +7992,16 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
offsetof(struct xdp_rxq_info,
queue_index));
break;
case offsetof(struct xdp_md, egress_ifindex):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq),
si->dst_reg, si->src_reg,
offsetof(struct xdp_buff, txq));
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
si->dst_reg, si->dst_reg,
offsetof(struct xdp_txq_info, dev));
*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
offsetof(struct net_device, ifindex));
break;
}

return insn - insn_buf;
Expand Down
Loading

0 comments on commit 906312c

Please sign in to comment.