Skip to content

Commit

Permalink
Merge branch 'bpf-add-support-for-sys-enter-exit-tracepoints'
Browse files Browse the repository at this point in the history
Yonghong Song says:

====================
bpf: add support for sys_{enter|exit}_* tracepoints

Currently, bpf programs cannot be attached to sys_enter_* and sys_exit_*
style tracepoints. The main reason is that syscalls/sys_enter_* and syscalls/sys_exit_*
tracepoints are treated differently from other tracepoints and there
is no bpf hook to it.

This patch set adds bpf support for these syscalls tracepoints and also
adds a test case for it.

Changelogs:
v3 -> v4:
 - Check the legality of ctx offset access for syscall tracepoint as well.
   trace_event_get_offsets will return correct max offset for each
   specific syscall tracepoint.
 - Use variable length array to avoid hardcode 6 as the maximum
   arguments beyond syscall_nr.
v2 -> v3:
 - Fix a build issue
v1 -> v2:
 - Do not use TRACE_EVENT_FL_CAP_ANY to identify syscall tracepoint.
   Instead use trace_event_call->class.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Aug 7, 2017
2 parents d226a2b + 1da236b commit 1c14dc4
Show file tree
Hide file tree
Showing 6 changed files with 206 additions and 6 deletions.
12 changes: 12 additions & 0 deletions include/linux/syscalls.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,20 @@ extern struct trace_event_functions exit_syscall_print_funcs;
static struct syscall_metadata __used \
__attribute__((section("__syscalls_metadata"))) \
*__p_syscall_meta_##sname = &__syscall_meta_##sname;

static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
{
return tp_event->class == &event_class_syscall_enter ||
tp_event->class == &event_class_syscall_exit;
}

#else
#define SYSCALL_METADATA(sname, nb, ...)

static inline int is_syscall_trace_event(struct trace_event_call *tp_event)
{
return 0;
}
#endif

#define SYSCALL_DEFINE0(sname) \
Expand Down
10 changes: 6 additions & 4 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -8050,7 +8050,7 @@ static void perf_event_free_bpf_handler(struct perf_event *event)

static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
{
bool is_kprobe, is_tracepoint;
bool is_kprobe, is_tracepoint, is_syscall_tp;
struct bpf_prog *prog;

if (event->attr.type != PERF_TYPE_TRACEPOINT)
Expand All @@ -8061,7 +8061,8 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)

is_kprobe = event->tp_event->flags & TRACE_EVENT_FL_UKPROBE;
is_tracepoint = event->tp_event->flags & TRACE_EVENT_FL_TRACEPOINT;
if (!is_kprobe && !is_tracepoint)
is_syscall_tp = is_syscall_trace_event(event->tp_event);
if (!is_kprobe && !is_tracepoint && !is_syscall_tp)
/* bpf programs can only be attached to u/kprobe or tracepoint */
return -EINVAL;

Expand All @@ -8070,13 +8071,14 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
return PTR_ERR(prog);

if ((is_kprobe && prog->type != BPF_PROG_TYPE_KPROBE) ||
(is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
(is_tracepoint && prog->type != BPF_PROG_TYPE_TRACEPOINT) ||
(is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT)) {
/* valid fd, but invalid bpf program type */
bpf_prog_put(prog);
return -EINVAL;
}

if (is_tracepoint) {
if (is_tracepoint || is_syscall_tp) {
int off = trace_event_get_offsets(event->tp_event);

if (prog->aux->max_ctx_offset > off) {
Expand Down
53 changes: 51 additions & 2 deletions kernel/trace/trace_syscalls.c
Original file line number Diff line number Diff line change
Expand Up @@ -559,11 +559,29 @@ static DECLARE_BITMAP(enabled_perf_exit_syscalls, NR_syscalls);
static int sys_perf_refcount_enter;
static int sys_perf_refcount_exit;

static int perf_call_bpf_enter(struct bpf_prog *prog, struct pt_regs *regs,
struct syscall_metadata *sys_data,
struct syscall_trace_enter *rec) {
struct syscall_tp_t {
unsigned long long regs;
unsigned long syscall_nr;
unsigned long args[sys_data->nb_args];
} param;
int i;

*(struct pt_regs **)&param = regs;
param.syscall_nr = rec->nr;
for (i = 0; i < sys_data->nb_args; i++)
param.args[i] = rec->args[i];
return trace_call_bpf(prog, &param);
}

static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
{
struct syscall_metadata *sys_data;
struct syscall_trace_enter *rec;
struct hlist_head *head;
struct bpf_prog *prog;
int syscall_nr;
int rctx;
int size;
Expand All @@ -578,8 +596,9 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
if (!sys_data)
return;

prog = READ_ONCE(sys_data->enter_event->prog);
head = this_cpu_ptr(sys_data->enter_event->perf_events);
if (hlist_empty(head))
if (!prog && hlist_empty(head))
return;

/* get the size after alignment with the u32 buffer size field */
Expand All @@ -594,6 +613,13 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
rec->nr = syscall_nr;
syscall_get_arguments(current, regs, 0, sys_data->nb_args,
(unsigned long *)&rec->args);

if ((prog && !perf_call_bpf_enter(prog, regs, sys_data, rec)) ||
hlist_empty(head)) {
perf_swevent_put_recursion_context(rctx);
return;
}

perf_trace_buf_submit(rec, size, rctx,
sys_data->enter_event->event.type, 1, regs,
head, NULL);
Expand Down Expand Up @@ -633,11 +659,26 @@ static void perf_sysenter_disable(struct trace_event_call *call)
mutex_unlock(&syscall_trace_lock);
}

static int perf_call_bpf_exit(struct bpf_prog *prog, struct pt_regs *regs,
struct syscall_trace_exit *rec) {
struct syscall_tp_t {
unsigned long long regs;
unsigned long syscall_nr;
unsigned long ret;
} param;

*(struct pt_regs **)&param = regs;
param.syscall_nr = rec->nr;
param.ret = rec->ret;
return trace_call_bpf(prog, &param);
}

static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
{
struct syscall_metadata *sys_data;
struct syscall_trace_exit *rec;
struct hlist_head *head;
struct bpf_prog *prog;
int syscall_nr;
int rctx;
int size;
Expand All @@ -652,8 +693,9 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)
if (!sys_data)
return;

prog = READ_ONCE(sys_data->exit_event->prog);
head = this_cpu_ptr(sys_data->exit_event->perf_events);
if (hlist_empty(head))
if (!prog && hlist_empty(head))
return;

/* We can probably do that at build time */
Expand All @@ -666,6 +708,13 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)

rec->nr = syscall_nr;
rec->ret = syscall_get_return_value(current, regs);

if ((prog && !perf_call_bpf_exit(prog, regs, rec)) ||
hlist_empty(head)) {
perf_swevent_put_recursion_context(rctx);
return;
}

perf_trace_buf_submit(rec, size, rctx, sys_data->exit_event->event.type,
1, regs, head, NULL);
}
Expand Down
4 changes: 4 additions & 0 deletions samples/bpf/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ hostprogs-y += per_socket_stats_example
hostprogs-y += load_sock_ops
hostprogs-y += xdp_redirect
hostprogs-y += xdp_redirect_map
hostprogs-y += syscall_tp

# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o
Expand Down Expand Up @@ -82,6 +83,7 @@ test_map_in_map-objs := bpf_load.o $(LIBBPF) test_map_in_map_user.o
per_socket_stats_example-objs := $(LIBBPF) cookie_uid_helper_example.o
xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o

# Tell kbuild to always build the programs
always := $(hostprogs-y)
Expand Down Expand Up @@ -125,6 +127,7 @@ always += tcp_iw_kern.o
always += tcp_clamp_kern.o
always += xdp_redirect_kern.o
always += xdp_redirect_map_kern.o
always += syscall_tp_kern.o

HOSTCFLAGS += -I$(objtree)/usr/include
HOSTCFLAGS += -I$(srctree)/tools/lib/
Expand Down Expand Up @@ -163,6 +166,7 @@ HOSTLOADLIBES_xdp_tx_iptunnel += -lelf
HOSTLOADLIBES_test_map_in_map += -lelf
HOSTLOADLIBES_xdp_redirect += -lelf
HOSTLOADLIBES_xdp_redirect_map += -lelf
HOSTLOADLIBES_syscall_tp += -lelf

# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
Expand Down
62 changes: 62 additions & 0 deletions samples/bpf/syscall_tp_kern.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/* Copyright (c) 2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <uapi/linux/bpf.h>
#include "bpf_helpers.h"

struct syscalls_enter_open_args {
unsigned long long unused;
long syscall_nr;
long filename_ptr;
long flags;
long mode;
};

struct syscalls_exit_open_args {
unsigned long long unused;
long syscall_nr;
long ret;
};

struct bpf_map_def SEC("maps") enter_open_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(u32),
.max_entries = 1,
};

struct bpf_map_def SEC("maps") exit_open_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(u32),
.value_size = sizeof(u32),
.max_entries = 1,
};

static __always_inline void count(void *map)
{
u32 key = 0;
u32 *value, init_val = 1;

value = bpf_map_lookup_elem(map, &key);
if (value)
*value += 1;
else
bpf_map_update_elem(map, &key, &init_val, BPF_NOEXIST);
}

SEC("tracepoint/syscalls/sys_enter_open")
int trace_enter_open(struct syscalls_enter_open_args *ctx)
{
count((void *)&enter_open_map);
return 0;
}

SEC("tracepoint/syscalls/sys_exit_open")
int trace_enter_exit(struct syscalls_exit_open_args *ctx)
{
count((void *)&exit_open_map);
return 0;
}
71 changes: 71 additions & 0 deletions samples/bpf/syscall_tp_user.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/* Copyright (c) 2017 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <signal.h>
#include <linux/bpf.h>
#include <string.h>
#include <linux/perf_event.h>
#include <errno.h>
#include <assert.h>
#include <stdbool.h>
#include <sys/resource.h>
#include "libbpf.h"
#include "bpf_load.h"

/* This program verifies bpf attachment to tracepoint sys_enter_* and sys_exit_*.
* This requires kernel CONFIG_FTRACE_SYSCALLS to be set.
*/

static void verify_map(int map_id)
{
__u32 key = 0;
__u32 val;

if (bpf_map_lookup_elem(map_id, &key, &val) != 0) {
fprintf(stderr, "map_lookup failed: %s\n", strerror(errno));
return;
}
if (val == 0)
fprintf(stderr, "failed: map #%d returns value 0\n", map_id);
}

int main(int argc, char **argv)
{
struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char filename[256];
int fd;

setrlimit(RLIMIT_MEMLOCK, &r);
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);

if (load_bpf_file(filename)) {
fprintf(stderr, "%s", bpf_log_buf);
return 1;
}

/* current load_bpf_file has perf_event_open default pid = -1
* and cpu = 0, which permits attached bpf execution on
* all cpus for all pid's. bpf program execution ignores
* cpu affinity.
*/
/* trigger some "open" operations */
fd = open(filename, O_RDONLY);
if (fd < 0) {
fprintf(stderr, "open failed: %s\n", strerror(errno));
return 1;
}
close(fd);

/* verify the map */
verify_map(map_fd[0]);
verify_map(map_fd[1]);

return 0;
}

0 comments on commit 1c14dc4

Please sign in to comment.