Skip to content

Commit

Permalink
Merge branch 'cgroup-bpf_link'
Browse files Browse the repository at this point in the history
Andrii Nakryiko says:

====================
bpf_link abstraction itself was formalized in [0] with justifications for why
its semantics is a good fit for attaching BPF programs of various types. This
patch set adds bpf_link-based BPF program attachment mechanism for cgroup BPF
programs.

Cgroup BPF link is semantically compatible with current BPF_F_ALLOW_MULTI
semantics of attaching cgroup BPF programs directly. Thus cgroup bpf_link can
co-exist with legacy BPF program multi-attachment.

bpf_link is destroyed and automatically detached when the last open FD holding
the reference to bpf_link is closed. This means that by default, when the
process that created bpf_link exits, attached BPF program will be
automatically detached due to bpf_link's clean up code. Cgroup bpf_link, like
any other bpf_link, can be pinned in BPF FS and by those means survive the
exit of process that created the link. This is useful in many scenarios to
provide long-living BPF program attachments. Pinning also means that there
could be many owners of bpf_link through independent FDs.

Additionally, auto-detachmet of cgroup bpf_link is implemented. When cgroup is
dying it will automatically detach all active bpf_links. This ensures that
cgroup clean up is not delayed due to active bpf_link even despite no chance
for any BPF program to be run for a given cgroup. In that sense it's similar
to existing behavior of dropping refcnt of attached bpf_prog. But in the case
of bpf_link, bpf_link is not destroyed and is still available to user as long
as at least one active FD is still open (or if it's pinned in BPF FS).

There are two main cgroup-specific differences between bpf_link-based and
direct bpf_prog-based attachment.

First, as opposed to direct bpf_prog attachment, cgroup itself doesn't "own"
bpf_link, which makes it possible to auto-clean up attached bpf_link when user
process abruptly exits without explicitly detaching BPF program. This makes
for a safe default behavior proven in BPF tracing program types. But bpf_link
doesn't bump cgroup->bpf.refcnt as well and because of that doesn't prevent
cgroup from cleaning up its BPF state.

Second, only owners of bpf_link (those who created bpf_link in the first place
or obtained a new FD by opening bpf_link from BPF FS) can detach and/or update
it. This makes sure that no other process can accidentally remove/replace BPF
program.

This patch set also implements LINK_UPDATE sub-command, which allows to
replace bpf_link's underlying bpf_prog, similarly to BPF_F_REPLACE flag
behavior for direct bpf_prog cgroup attachment. Similarly to LINK_CREATE, it
is supposed to be generic command for different types of bpf_links.

  [0] https://lore.kernel.org/bpf/20200228223948.360936-1-andriin@fb.com/

v2->v3:
  - revert back to just MULTI mode (Alexei);
  - fix tinyconfig compilation warning (kbuild test robot);

v1->v2:
  - implement exclusive and overridable exclusive modes (Andrey Ignatov);
  - fix build for !CONFIG_CGROUP_BPF build;
  - add more selftests for non-multi mode and inter-operability;
====================

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
  • Loading branch information
Alexei Starovoitov committed Mar 31, 2020
2 parents e5ffcc9 + 7cccee4 commit 8596a75
Show file tree
Hide file tree
Showing 14 changed files with 930 additions and 99 deletions.
41 changes: 36 additions & 5 deletions include/linux/bpf-cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,18 @@ struct bpf_cgroup_storage {
struct rcu_head rcu;
};

struct bpf_cgroup_link {
struct bpf_link link;
struct cgroup *cgroup;
enum bpf_attach_type type;
};

extern const struct bpf_link_ops bpf_cgroup_link_lops;

struct bpf_prog_list {
struct list_head node;
struct bpf_prog *prog;
struct bpf_cgroup_link *link;
struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE];
};

Expand Down Expand Up @@ -84,20 +93,27 @@ struct cgroup_bpf {
int cgroup_bpf_inherit(struct cgroup *cgrp);
void cgroup_bpf_offline(struct cgroup *cgrp);

int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_prog *replace_prog,
int __cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type, u32 flags);
int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_cgroup_link *link,
enum bpf_attach_type type);
int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link,
struct bpf_prog *new_prog);
int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr);

/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */
int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog,
struct bpf_prog *replace_prog, enum bpf_attach_type type,
int cgroup_bpf_attach(struct cgroup *cgrp,
struct bpf_prog *prog, struct bpf_prog *replace_prog,
struct bpf_cgroup_link *link, enum bpf_attach_type type,
u32 flags);
int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
enum bpf_attach_type type, u32 flags);
enum bpf_attach_type type);
int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog,
struct bpf_prog *new_prog);
int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
union bpf_attr __user *uattr);

Expand Down Expand Up @@ -332,11 +348,13 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr,
enum bpf_prog_type ptype, struct bpf_prog *prog);
int cgroup_bpf_prog_detach(const union bpf_attr *attr,
enum bpf_prog_type ptype);
int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr);
#else

struct bpf_prog;
struct bpf_link;
struct cgroup_bpf {};
static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
Expand All @@ -354,6 +372,19 @@ static inline int cgroup_bpf_prog_detach(const union bpf_attr *attr,
return -EINVAL;
}

static inline int cgroup_bpf_link_attach(const union bpf_attr *attr,
struct bpf_prog *prog)
{
return -EINVAL;
}

static inline int cgroup_bpf_replace(struct bpf_link *link,
struct bpf_prog *old_prog,
struct bpf_prog *new_prog)
{
return -EINVAL;
}

static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
Expand Down
10 changes: 9 additions & 1 deletion include/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -1082,15 +1082,23 @@ extern int sysctl_unprivileged_bpf_disabled;
int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog);

struct bpf_link;
struct bpf_link {
atomic64_t refcnt;
const struct bpf_link_ops *ops;
struct bpf_prog *prog;
struct work_struct work;
};

struct bpf_link_ops {
void (*release)(struct bpf_link *link);
void (*dealloc)(struct bpf_link *link);

};

void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
struct bpf_prog *prog);
void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
int link_fd);
void bpf_link_inc(struct bpf_link *link);
void bpf_link_put(struct bpf_link *link);
int bpf_link_new_fd(struct bpf_link *link);
Expand Down
22 changes: 21 additions & 1 deletion include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ enum bpf_cmd {
BPF_MAP_LOOKUP_AND_DELETE_BATCH,
BPF_MAP_UPDATE_BATCH,
BPF_MAP_DELETE_BATCH,
BPF_LINK_CREATE,
BPF_LINK_UPDATE,
};

enum bpf_map_type {
Expand Down Expand Up @@ -541,7 +543,7 @@ union bpf_attr {
__u32 prog_cnt;
} query;

struct {
struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
__u64 name;
__u32 prog_fd;
} raw_tracepoint;
Expand Down Expand Up @@ -569,6 +571,24 @@ union bpf_attr {
__u64 probe_offset; /* output: probe_offset */
__u64 probe_addr; /* output: probe_addr */
} task_fd_query;

struct { /* struct used by BPF_LINK_CREATE command */
__u32 prog_fd; /* eBPF program to attach */
__u32 target_fd; /* object to attach to */
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
} link_create;

struct { /* struct used by BPF_LINK_UPDATE command */
__u32 link_fd; /* link fd */
/* new program fd to update link with */
__u32 new_prog_fd;
__u32 flags; /* extra flags */
/* expected link's program fd; is specified only if
* BPF_F_REPLACE flag is set in flags */
__u32 old_prog_fd;
} link_update;

} __attribute__((aligned(8)));

/* The description below is an attempt at providing documentation to eBPF
Expand Down
Loading

0 comments on commit 8596a75

Please sign in to comment.