Skip to content

Commit

Permalink
Merge branch 'bpf-under-cgroup'
Browse files Browse the repository at this point in the history
Sargun Dhillon says:

====================
Add test_current_task_under_cgroup bpf helper and test

This patchset includes a helper and an example to determine whether the probe is
currently executing in the context of a specific cgroup based on a cgroup bpf
map / array. The helper checks the cgroupsv2 hierarchy based on the handle in
the map and if the current cgroup is equal to it, or a descendant of it. The
helper was tested with the example program, and it was verified that the correct
behaviour occurs in the interrupt context.

In an earlier version of this patchset I had added an "opensnoop"-like tool, and
I realized I was basically reimplementing a lot of the code that already exists
in the bcc repo. So, instead I decided to write a test that creates a new mount
namespace, mounts up the cgroupv2 hierarchy, and does some basic tests.  I used
the sync syscall as a canary for these tests because it's a simple, 0-arg
syscall. Once this patch is accepted, adding support to opensnoop will be easy.

I also added a task_under_cgroup_hierarchy function in cgroups.h, as this
pattern is used in a couple places. Converting those can be done in a later
patchset.

Thanks to Alexei, Tejun, and Daniel for providing review.

v1->v2: Clean up
v2->v3: Move around ifdefs out of *.c files, add an "integration" test
v3->v4: De-genercize arraymap fetching function;
	rename helper from in_cgroup to under_cgroup (makes much more sense)
	Split adding cgroups task_under_cgroup_hierarchy function
v4->v5: Fix formatting
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Aug 13, 2016
2 parents f9f9ab1 + 9e6e60e commit 7cac530
Show file tree
Hide file tree
Showing 9 changed files with 263 additions and 2 deletions.
23 changes: 23 additions & 0 deletions include/linux/cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,23 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp,
return cgrp->ancestor_ids[ancestor->level] == ancestor->id;
}

/**
* task_under_cgroup_hierarchy - test task's membership of cgroup ancestry
* @task: the task to be tested
* @ancestor: possible ancestor of @task's cgroup
*
* Tests whether @task's default cgroup hierarchy is a descendant of @ancestor.
* It follows all the same rules as cgroup_is_descendant, and only applies
* to the default hierarchy.
*/
static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
struct cgroup *ancestor)
{
struct css_set *cset = task_css_set(task);

return cgroup_is_descendant(cset->dfl_cgrp, ancestor);
}

/* no synchronization, the result can only be used as a hint */
static inline bool cgroup_is_populated(struct cgroup *cgrp)
{
Expand Down Expand Up @@ -557,6 +574,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp)
#else /* !CONFIG_CGROUPS */

struct cgroup_subsys_state;
struct cgroup;

static inline void css_put(struct cgroup_subsys_state *css) {}
static inline int cgroup_attach_task_all(struct task_struct *from,
Expand All @@ -574,6 +592,11 @@ static inline void cgroup_free(struct task_struct *p) {}
static inline int cgroup_init_early(void) { return 0; }
static inline int cgroup_init(void) { return 0; }

static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
struct cgroup *ancestor)
{
return true;
}
#endif /* !CONFIG_CGROUPS */

/*
Expand Down
11 changes: 11 additions & 0 deletions include/uapi/linux/bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,17 @@ enum bpf_func_id {
*/
BPF_FUNC_probe_write_user,

/**
* bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task
* @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type
* @index: index of the cgroup in the bpf_map
* Return:
* == 0 current failed the cgroup2 descendant test
* == 1 current succeeded the cgroup2 descendant test
* < 0 error
*/
BPF_FUNC_current_task_under_cgroup,

__BPF_FUNC_MAX_ID,
};

Expand Down
2 changes: 1 addition & 1 deletion kernel/bpf/arraymap.c
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void)
}
late_initcall(register_perf_event_array_map);

#ifdef CONFIG_SOCK_CGROUP_DATA
#ifdef CONFIG_CGROUPS
static void *cgroup_fd_array_get_ptr(struct bpf_map *map,
struct file *map_file /* not used */,
int fd)
Expand Down
4 changes: 3 additions & 1 deletion kernel/bpf/verifier.c
Original file line number Diff line number Diff line change
Expand Up @@ -1053,7 +1053,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
goto error;
break;
case BPF_MAP_TYPE_CGROUP_ARRAY:
if (func_id != BPF_FUNC_skb_in_cgroup)
if (func_id != BPF_FUNC_skb_in_cgroup &&
func_id != BPF_FUNC_current_task_under_cgroup)
goto error;
break;
default:
Expand All @@ -1075,6 +1076,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id)
if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
goto error;
break;
case BPF_FUNC_current_task_under_cgroup:
case BPF_FUNC_skb_in_cgroup:
if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
goto error;
Expand Down
30 changes: 30 additions & 0 deletions kernel/trace/bpf_trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,34 @@ static const struct bpf_func_proto bpf_get_current_task_proto = {
.ret_type = RET_INTEGER,
};

static u64 bpf_current_task_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
struct bpf_map *map = (struct bpf_map *)(long)r1;
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct cgroup *cgrp;
u32 idx = (u32)r2;

if (unlikely(in_interrupt()))
return -EINVAL;

if (unlikely(idx >= array->map.max_entries))
return -E2BIG;

cgrp = READ_ONCE(array->ptrs[idx]);
if (unlikely(!cgrp))
return -EAGAIN;

return task_under_cgroup_hierarchy(current, cgrp);
}

static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
.func = bpf_current_task_under_cgroup,
.gpl_only = false,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_ANYTHING,
};

static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
{
switch (func_id) {
Expand Down Expand Up @@ -407,6 +435,8 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id)
return &bpf_perf_event_read_proto;
case BPF_FUNC_probe_write_user:
return bpf_get_probe_write_proto();
case BPF_FUNC_current_task_under_cgroup:
return &bpf_current_task_under_cgroup_proto;
default:
return NULL;
}
Expand Down
5 changes: 5 additions & 0 deletions samples/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ hostprogs-y += test_overhead
hostprogs-y += test_cgrp2_array_pin
hostprogs-y += xdp1
hostprogs-y += xdp2
hostprogs-y += test_current_task_under_cgroup

test_verifier-objs := test_verifier.o libbpf.o
test_maps-objs := test_maps.o libbpf.o
Expand All @@ -49,6 +50,8 @@ test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o
xdp1-objs := bpf_load.o libbpf.o xdp1_user.o
# reuse xdp1 source intentionally
xdp2-objs := bpf_load.o libbpf.o xdp1_user.o
test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \
test_current_task_under_cgroup_user.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
Expand All @@ -74,6 +77,7 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o
always += test_cgrp2_tc_kern.o
always += xdp1_kern.o
always += xdp2_kern.o
always += test_current_task_under_cgroup_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include

Expand All @@ -97,6 +101,7 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt
HOSTLOADLIBES_test_overhead += -lelf -lrt
HOSTLOADLIBES_xdp1 += -lelf
HOSTLOADLIBES_xdp2 += -lelf
HOSTLOADLIBES_test_current_task_under_cgroup += -lelf

# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
Expand Down
2 changes: 2 additions & 0 deletions samples/bpf/bpf_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
(void *) BPF_FUNC_get_stackid;
static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
(void *) BPF_FUNC_probe_write_user;
static int (*bpf_current_task_under_cgroup)(void *map, int index) =
(void *) BPF_FUNC_current_task_under_cgroup;

/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
Expand Down
43 changes: 43 additions & 0 deletions samples/bpf/test_current_task_under_cgroup_kern.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/

#include <linux/ptrace.h>
#include <uapi/linux/bpf.h>
#include <linux/version.h>
#include "bpf_helpers.h"
#include <uapi/linux/utsname.h>

struct bpf_map_def SEC("maps") cgroup_map = {
.type = BPF_MAP_TYPE_CGROUP_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(u32),
.max_entries = 1,
};

struct bpf_map_def SEC("maps") perf_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(u64),
.max_entries = 1,
};

/* Writes the last PID that called sync to a map at index 0 */
SEC("kprobe/sys_sync")
int bpf_prog1(struct pt_regs *ctx)
{
u64 pid = bpf_get_current_pid_tgid();
int idx = 0;

if (!bpf_current_task_under_cgroup(&cgroup_map, 0))
return 0;

bpf_map_update_elem(&perf_map, &idx, &pid, BPF_ANY);
return 0;
}

char _license[] SEC("license") = "GPL";
u32 _version SEC("version") = LINUX_VERSION_CODE;
145 changes: 145 additions & 0 deletions samples/bpf/test_current_task_under_cgroup_user.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/* Copyright (c) 2016 Sargun Dhillon <sargun@sargun.me>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/

#define _GNU_SOURCE
#include <stdio.h>
#include <linux/bpf.h>
#include <unistd.h>
#include "libbpf.h"
#include "bpf_load.h"
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <linux/bpf.h>
#include <sched.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <linux/limits.h>

#define CGROUP_MOUNT_PATH "/mnt"
#define CGROUP_PATH "/mnt/my-cgroup"

#define clean_errno() (errno == 0 ? "None" : strerror(errno))
#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)

static int join_cgroup(char *path)
{
int fd, rc = 0;
pid_t pid = getpid();
char cgroup_path[PATH_MAX + 1];

snprintf(cgroup_path, sizeof(cgroup_path), "%s/cgroup.procs", path);

fd = open(cgroup_path, O_WRONLY);
if (fd < 0) {
log_err("Opening Cgroup");
return 1;
}

if (dprintf(fd, "%d\n", pid) < 0) {
log_err("Joining Cgroup");
rc = 1;
}
close(fd);
return rc;
}

int main(int argc, char **argv)
{
char filename[256];
int cg2, idx = 0;
pid_t remote_pid, local_pid = getpid();

snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
}

/*
* This is to avoid interfering with existing cgroups. Unfortunately,
* most people don't have cgroupv2 enabled at this point in time.
* It's easier to create our own mount namespace and manage it
* ourselves.
*/
if (unshare(CLONE_NEWNS)) {
log_err("unshare");
return 1;
}

if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
log_err("mount fakeroot");
return 1;
}

if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) {
log_err("mount cgroup2");
return 1;
}

if (mkdir(CGROUP_PATH, 0777) && errno != EEXIST) {
log_err("mkdir cgroup");
return 1;
}

cg2 = open(CGROUP_PATH, O_RDONLY);
if (cg2 < 0) {
log_err("opening target cgroup");
goto cleanup_cgroup_err;
}

if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) {
log_err("Adding target cgroup to map");
goto cleanup_cgroup_err;
}
if (join_cgroup("/mnt/my-cgroup")) {
log_err("Leaving target cgroup");
goto cleanup_cgroup_err;
}

/*
* The installed helper program catched the sync call, and should
* write it to the map.
*/

sync();
bpf_lookup_elem(map_fd[1], &idx, &remote_pid);

if (local_pid != remote_pid) {
fprintf(stderr,
"BPF Helper didn't write correct PID to map, but: %d\n",
remote_pid);
goto leave_cgroup_err;
}

/* Verify the negative scenario; leave the cgroup */
if (join_cgroup(CGROUP_MOUNT_PATH))
goto leave_cgroup_err;

remote_pid = 0;
bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY);

sync();
bpf_lookup_elem(map_fd[1], &idx, &remote_pid);

if (local_pid == remote_pid) {
fprintf(stderr, "BPF cgroup negative test did not work\n");
goto cleanup_cgroup_err;
}

rmdir(CGROUP_PATH);
return 0;

/* Error condition, cleanup */
leave_cgroup_err:
join_cgroup(CGROUP_MOUNT_PATH);
cleanup_cgroup_err:
rmdir(CGROUP_PATH);
return 1;
}

0 comments on commit 7cac530

Please sign in to comment.