Skip to content

Commit

Permalink
bpf: offload: add map offload infrastructure
Browse files Browse the repository at this point in the history
BPF map offload follow similar path to program offload.  At creation
time users may specify ifindex of the device on which they want to
create the map.  Map will be validated by the kernel's
.map_alloc_check callback and device driver will be called for the
actual allocation.  Map will have an empty set of operations
associated with it (save for alloc and free callbacks).  The real
device callbacks are kept in map->offload->dev_ops because they
have slightly different signatures.  Map operations are called in
process context so the driver may communicate with HW freely,
msleep(), wait() etc.

Map alloc and free callbacks are muxed via existing .ndo_bpf, and
are always called with rtnl lock held.  Maps and programs are
guaranteed to be destroyed before .ndo_uninit (i.e. before
unregister_netdev() returns).  Map callbacks are invoked with
bpf_devs_lock *read* locked, drivers must take care of exclusive
locking if necessary.

All offload-specific branches are marked with unlikely() (through
bpf_map_is_dev_bound()), given that branch penalty will be
negligible compared to IO anyway, and we don't want to penalize
SW path unnecessarily.

Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
  • Loading branch information
Jakub Kicinski authored and Daniel Borkmann committed Jan 14, 2018
1 parent 5bc2d55 commit a388457
Show file tree
Hide file tree
Showing 7 changed files with 293 additions and 13 deletions.
59 changes: 59 additions & 0 deletions include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,33 @@ struct bpf_map {
char name[BPF_OBJ_NAME_LEN];
};

struct bpf_offloaded_map;

struct bpf_map_dev_ops {
int (*map_get_next_key)(struct bpf_offloaded_map *map,
void *key, void *next_key);
int (*map_lookup_elem)(struct bpf_offloaded_map *map,
void *key, void *value);
int (*map_update_elem)(struct bpf_offloaded_map *map,
void *key, void *value, u64 flags);
int (*map_delete_elem)(struct bpf_offloaded_map *map, void *key);
};

struct bpf_offloaded_map {
struct bpf_map map;
struct net_device *netdev;
const struct bpf_map_dev_ops *dev_ops;
void *dev_priv;
struct list_head offloads;
};

static inline struct bpf_offloaded_map *map_to_offmap(struct bpf_map *map)
{
return container_of(map, struct bpf_offloaded_map, map);
}

extern const struct bpf_map_ops bpf_map_offload_ops;

/* function argument constraints */
enum bpf_arg_type {
ARG_DONTCARE = 0, /* unused argument in helper function */
Expand Down Expand Up @@ -369,6 +396,7 @@ int __bpf_prog_charge(struct user_struct *user, u32 pages);
void __bpf_prog_uncharge(struct user_struct *user, u32 pages);

void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock);
void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock);

struct bpf_map *bpf_map_get_with_uref(u32 ufd);
struct bpf_map *__bpf_map_get(struct fd f);
Expand Down Expand Up @@ -556,13 +584,30 @@ void bpf_prog_offload_destroy(struct bpf_prog *prog);
int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
struct bpf_prog *prog);

int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value);
int bpf_map_offload_update_elem(struct bpf_map *map,
void *key, void *value, u64 flags);
int bpf_map_offload_delete_elem(struct bpf_map *map, void *key);
int bpf_map_offload_get_next_key(struct bpf_map *map,
void *key, void *next_key);

bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map);

#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);

static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
{
return aux->offload_requested;
}

static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
{
return unlikely(map->ops == &bpf_map_offload_ops);
}

struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr);
void bpf_map_offload_map_free(struct bpf_map *map);
#else
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
union bpf_attr *attr)
Expand All @@ -574,6 +619,20 @@ static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
{
return false;
}

static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
{
return false;
}

static inline struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
{
return ERR_PTR(-EOPNOTSUPP);
}

static inline void bpf_map_offload_map_free(struct bpf_map *map)
{
}
#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */

#if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_INET)
Expand Down
6 changes: 6 additions & 0 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,8 @@ enum bpf_netdev_command {
BPF_OFFLOAD_VERIFIER_PREP,
BPF_OFFLOAD_TRANSLATE,
BPF_OFFLOAD_DESTROY,
BPF_OFFLOAD_MAP_ALLOC,
BPF_OFFLOAD_MAP_FREE,
};

struct bpf_prog_offload_ops;
Expand Down Expand Up @@ -834,6 +836,10 @@ struct netdev_bpf {
struct {
struct bpf_prog *prog;
} offload;
/* BPF_OFFLOAD_MAP_ALLOC, BPF_OFFLOAD_MAP_FREE */
struct {
struct bpf_offloaded_map *offmap;
};
};
};

Expand Down
1 change: 1 addition & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ union bpf_attr {
* BPF_F_NUMA_NODE is set).
*/
char map_name[BPF_OBJ_NAME_LEN];
__u32 map_ifindex; /* ifindex of netdev to create on */
};

struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
Expand Down
188 changes: 181 additions & 7 deletions kernel/bpf/offload.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@
#include <linux/rtnetlink.h>
#include <linux/rwsem.h>

/* Protects bpf_prog_offload_devs and offload members of all progs.
/* Protects bpf_prog_offload_devs, bpf_map_offload_devs and offload members
* of all progs.
* RTNL lock cannot be taken when holding this lock.
*/
static DECLARE_RWSEM(bpf_devs_lock);
static LIST_HEAD(bpf_prog_offload_devs);
static LIST_HEAD(bpf_map_offload_devs);

static int bpf_dev_offload_check(struct net_device *netdev)
{
Expand Down Expand Up @@ -250,11 +252,186 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
const struct bpf_prog_ops bpf_offload_prog_ops = {
};

static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
enum bpf_netdev_command cmd)
{
struct netdev_bpf data = {};
struct net_device *netdev;

ASSERT_RTNL();

data.command = cmd;
data.offmap = offmap;
/* Caller must make sure netdev is valid */
netdev = offmap->netdev;

return netdev->netdev_ops->ndo_bpf(netdev, &data);
}

struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
{
struct net *net = current->nsproxy->net_ns;
struct bpf_offloaded_map *offmap;
int err;

if (!capable(CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
if (attr->map_type != BPF_MAP_TYPE_HASH)
return ERR_PTR(-EINVAL);

offmap = kzalloc(sizeof(*offmap), GFP_USER);
if (!offmap)
return ERR_PTR(-ENOMEM);

bpf_map_init_from_attr(&offmap->map, attr);

rtnl_lock();
down_write(&bpf_devs_lock);
offmap->netdev = __dev_get_by_index(net, attr->map_ifindex);
err = bpf_dev_offload_check(offmap->netdev);
if (err)
goto err_unlock;

err = bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_ALLOC);
if (err)
goto err_unlock;

list_add_tail(&offmap->offloads, &bpf_map_offload_devs);
up_write(&bpf_devs_lock);
rtnl_unlock();

return &offmap->map;

err_unlock:
up_write(&bpf_devs_lock);
rtnl_unlock();
kfree(offmap);
return ERR_PTR(err);
}

static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap)
{
WARN_ON(bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_FREE));
/* Make sure BPF_MAP_GET_NEXT_ID can't find this dead map */
bpf_map_free_id(&offmap->map, true);
list_del_init(&offmap->offloads);
offmap->netdev = NULL;
}

void bpf_map_offload_map_free(struct bpf_map *map)
{
struct bpf_offloaded_map *offmap = map_to_offmap(map);

rtnl_lock();
down_write(&bpf_devs_lock);
if (offmap->netdev)
__bpf_map_offload_destroy(offmap);
up_write(&bpf_devs_lock);
rtnl_unlock();

kfree(offmap);
}

int bpf_map_offload_lookup_elem(struct bpf_map *map, void *key, void *value)
{
struct bpf_offloaded_map *offmap = map_to_offmap(map);
int ret = -ENODEV;

down_read(&bpf_devs_lock);
if (offmap->netdev)
ret = offmap->dev_ops->map_lookup_elem(offmap, key, value);
up_read(&bpf_devs_lock);

return ret;
}

int bpf_map_offload_update_elem(struct bpf_map *map,
void *key, void *value, u64 flags)
{
struct bpf_offloaded_map *offmap = map_to_offmap(map);
int ret = -ENODEV;

if (unlikely(flags > BPF_EXIST))
return -EINVAL;

down_read(&bpf_devs_lock);
if (offmap->netdev)
ret = offmap->dev_ops->map_update_elem(offmap, key, value,
flags);
up_read(&bpf_devs_lock);

return ret;
}

int bpf_map_offload_delete_elem(struct bpf_map *map, void *key)
{
struct bpf_offloaded_map *offmap = map_to_offmap(map);
int ret = -ENODEV;

down_read(&bpf_devs_lock);
if (offmap->netdev)
ret = offmap->dev_ops->map_delete_elem(offmap, key);
up_read(&bpf_devs_lock);

return ret;
}

int bpf_map_offload_get_next_key(struct bpf_map *map, void *key, void *next_key)
{
struct bpf_offloaded_map *offmap = map_to_offmap(map);
int ret = -ENODEV;

down_read(&bpf_devs_lock);
if (offmap->netdev)
ret = offmap->dev_ops->map_get_next_key(offmap, key, next_key);
up_read(&bpf_devs_lock);

return ret;
}

bool bpf_offload_dev_match(struct bpf_prog *prog, struct bpf_map *map)
{
struct bpf_offloaded_map *offmap;
struct bpf_prog_offload *offload;
bool ret;

if (!!bpf_prog_is_dev_bound(prog->aux) != !!bpf_map_is_dev_bound(map))
return false;
if (!bpf_prog_is_dev_bound(prog->aux))
return true;

down_read(&bpf_devs_lock);
offload = prog->aux->offload;
offmap = map_to_offmap(map);

ret = offload && offload->netdev == offmap->netdev;
up_read(&bpf_devs_lock);

return ret;
}

static void bpf_offload_orphan_all_progs(struct net_device *netdev)
{
struct bpf_prog_offload *offload, *tmp;

list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs, offloads)
if (offload->netdev == netdev)
__bpf_prog_offload_destroy(offload->prog);
}

static void bpf_offload_orphan_all_maps(struct net_device *netdev)
{
struct bpf_offloaded_map *offmap, *tmp;

list_for_each_entry_safe(offmap, tmp, &bpf_map_offload_devs, offloads)
if (offmap->netdev == netdev)
__bpf_map_offload_destroy(offmap);
}

static int bpf_offload_notification(struct notifier_block *notifier,
ulong event, void *ptr)
{
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
struct bpf_prog_offload *offload, *tmp;

ASSERT_RTNL();

Expand All @@ -265,11 +442,8 @@ static int bpf_offload_notification(struct notifier_block *notifier,
break;

down_write(&bpf_devs_lock);
list_for_each_entry_safe(offload, tmp, &bpf_prog_offload_devs,
offloads) {
if (offload->netdev == netdev)
__bpf_prog_offload_destroy(offload->prog);
}
bpf_offload_orphan_all_progs(netdev);
bpf_offload_orphan_all_maps(netdev);
up_write(&bpf_devs_lock);
break;
default:
Expand Down
Loading

0 comments on commit a388457

Please sign in to comment.