From d46edd671a147032e22cfeb271a5734703093649 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 30 Apr 2020 00:15:04 -0700 Subject: [PATCH 1/3] bpf: Sharing bpf runtime stats with BPF_ENABLE_STATS Currently, sysctl kernel.bpf_stats_enabled controls BPF runtime stats. Typical userspace tools use kernel.bpf_stats_enabled as follows: 1. Enable kernel.bpf_stats_enabled; 2. Check program run_time_ns; 3. Sleep for the monitoring period; 4. Check program run_time_ns again, calculate the difference; 5. Disable kernel.bpf_stats_enabled. The problem with this approach is that only one userspace tool can toggle this sysctl. If multiple tools toggle the sysctl at the same time, the measurement may be inaccurate. To fix this problem while keep backward compatibility, introduce a new bpf command BPF_ENABLE_STATS. On success, this command enables stats and returns a valid fd. BPF_ENABLE_STATS takes argument "type". Currently, only one type, BPF_STATS_RUN_TIME, is supported. We can extend the command to support other types of stats in the future. With BPF_ENABLE_STATS, user space tool would have the following flow: 1. Get a fd with BPF_ENABLE_STATS, and make sure it is valid; 2. Check program run_time_ns; 3. Sleep for the monitoring period; 4. Check program run_time_ns again, calculate the difference; 5. Close the fd. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200430071506.1408910-2-songliubraving@fb.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 11 +++++++ kernel/bpf/syscall.c | 57 ++++++++++++++++++++++++++++++++++ kernel/sysctl.c | 36 ++++++++++++++++++++- tools/include/uapi/linux/bpf.h | 11 +++++++ 5 files changed, 115 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c07b1d2f38245..1262ec460ab35 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -987,6 +987,7 @@ _out: \ #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); +extern struct mutex bpf_stats_enabled_mutex; /* * Block execution of BPF programs attached to instrumentation (perf, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0eccafae55bbc..705e4822f9978 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -115,6 +115,7 @@ enum bpf_cmd { BPF_LINK_UPDATE, BPF_LINK_GET_FD_BY_ID, BPF_LINK_GET_NEXT_ID, + BPF_ENABLE_STATS, }; enum bpf_map_type { @@ -390,6 +391,12 @@ enum { */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) +/* type for BPF_ENABLE_STATS */ +enum bpf_stats_type { + /* enabled run_time_ns and run_cnt */ + BPF_STATS_RUN_TIME = 0, +}; + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, @@ -601,6 +608,10 @@ union bpf_attr { __u32 old_prog_fd; } link_update; + struct { /* struct used by BPF_ENABLE_STATS command */ + __u32 type; + } enable_stats; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c75b2dd2459c8..4f34eecec9cef 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3872,6 +3872,60 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr) return fd; } +DEFINE_MUTEX(bpf_stats_enabled_mutex); + +static int bpf_stats_release(struct inode *inode, struct file *file) +{ + mutex_lock(&bpf_stats_enabled_mutex); + static_key_slow_dec(&bpf_stats_enabled_key.key); + mutex_unlock(&bpf_stats_enabled_mutex); + return 0; +} + +static const struct file_operations bpf_stats_fops = { + .release = bpf_stats_release, +}; + +static int bpf_enable_runtime_stats(void) +{ + int fd; + + mutex_lock(&bpf_stats_enabled_mutex); + + /* Set a very high limit to avoid overflow */ + if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { + mutex_unlock(&bpf_stats_enabled_mutex); + return -EBUSY; + } + + fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); + if (fd >= 0) + static_key_slow_inc(&bpf_stats_enabled_key.key); + + mutex_unlock(&bpf_stats_enabled_mutex); + return fd; +} + +#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type + +static int bpf_enable_stats(union bpf_attr *attr) +{ + + if (CHECK_ATTR(BPF_ENABLE_STATS)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (attr->enable_stats.type) { + case BPF_STATS_RUN_TIME: + return bpf_enable_runtime_stats(); + default: + break; + } + return -EINVAL; +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr; @@ -3996,6 +4050,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz err = bpf_obj_get_next_id(&attr, uattr, &link_idr, &link_idr_lock); break; + case BPF_ENABLE_STATS: + err = bpf_enable_stats(&attr); + break; default: err = -EINVAL; break; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e961286d0e145..7adfe5dbce9d3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -201,6 +201,40 @@ static int max_extfrag_threshold = 1000; #endif /* CONFIG_SYSCTL */ +#ifdef CONFIG_BPF_SYSCALL +static int bpf_stats_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct static_key *key = (struct static_key *)table->data; + static int saved_val; + int val, ret; + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(val), + .mode = table->mode, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + mutex_lock(&bpf_stats_enabled_mutex); + val = saved_val; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret && val != saved_val) { + if (val) + static_key_slow_inc(key); + else + static_key_slow_dec(key); + saved_val = val; + } + mutex_unlock(&bpf_stats_enabled_mutex); + return ret; +} +#endif + /* * /proc/sys support */ @@ -2549,7 +2583,7 @@ static struct ctl_table kern_table[] = { .data = &bpf_stats_enabled_key.key, .maxlen = sizeof(bpf_stats_enabled_key), .mode = 0644, - .proc_handler = proc_do_static_key, + .proc_handler = bpf_stats_handler, }, #endif #if defined(CONFIG_TREE_RCU) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0eccafae55bbc..705e4822f9978 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -115,6 +115,7 @@ enum bpf_cmd { BPF_LINK_UPDATE, BPF_LINK_GET_FD_BY_ID, BPF_LINK_GET_NEXT_ID, + BPF_ENABLE_STATS, }; enum bpf_map_type { @@ -390,6 +391,12 @@ enum { */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) +/* type for BPF_ENABLE_STATS */ +enum bpf_stats_type { + /* enabled run_time_ns and run_cnt */ + BPF_STATS_RUN_TIME = 0, +}; + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, @@ -601,6 +608,10 @@ union bpf_attr { __u32 old_prog_fd; } link_update; + struct { /* struct used by BPF_ENABLE_STATS command */ + __u32 type; + } enable_stats; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF From 0bee106716cfb2c6da81916b968395db22bd7755 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 30 Apr 2020 00:15:05 -0700 Subject: [PATCH 2/3] libbpf: Add support for command BPF_ENABLE_STATS bpf_enable_stats() is added to enable given stats. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200430071506.1408910-3-songliubraving@fb.com --- tools/lib/bpf/bpf.c | 10 ++++++++++ tools/lib/bpf/bpf.h | 1 + tools/lib/bpf/libbpf.map | 1 + 3 files changed, 12 insertions(+) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 8f2f0958d446a..43322f0d6c7f1 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -841,3 +841,13 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, return err; } + +int bpf_enable_stats(enum bpf_stats_type type) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + attr.enable_stats.type = type; + + return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr)); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 335b457b3a25d..1901b2777854a 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -231,6 +231,7 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len, __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset, __u64 *probe_addr); +LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type); #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 7cd49aa380057..e03bd4db827e5 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -257,6 +257,7 @@ LIBBPF_0.0.8 { LIBBPF_0.0.9 { global: + bpf_enable_stats; bpf_link_get_fd_by_id; bpf_link_get_next_id; } LIBBPF_0.0.8; From 31a9f7fe93378ab587d758d5b2e96a237caa7b8c Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 30 Apr 2020 00:15:06 -0700 Subject: [PATCH 3/3] bpf: Add selftest for BPF_ENABLE_STATS Add test for BPF_ENABLE_STATS, which should enable run_time_ns stats. ~/selftests/bpf# ./test_progs -t enable_stats -v test_enable_stats:PASS:skel_open_and_load 0 nsec test_enable_stats:PASS:get_stats_fd 0 nsec test_enable_stats:PASS:attach_raw_tp 0 nsec test_enable_stats:PASS:get_prog_info 0 nsec test_enable_stats:PASS:check_stats_enabled 0 nsec test_enable_stats:PASS:check_run_cnt_valid 0 nsec Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200430071506.1408910-4-songliubraving@fb.com --- .../selftests/bpf/prog_tests/enable_stats.c | 45 +++++++++++++++++++ .../selftests/bpf/progs/test_enable_stats.c | 18 ++++++++ 2 files changed, 63 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/enable_stats.c create mode 100644 tools/testing/selftests/bpf/progs/test_enable_stats.c diff --git a/tools/testing/selftests/bpf/prog_tests/enable_stats.c b/tools/testing/selftests/bpf/prog_tests/enable_stats.c new file mode 100644 index 0000000000000..2cb2085917e7d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/enable_stats.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "test_enable_stats.skel.h" + +void test_enable_stats(void) +{ + struct test_enable_stats *skel; + int stats_fd, err, prog_fd; + struct bpf_prog_info info; + __u32 info_len = sizeof(info); + int duration = 0; + + skel = test_enable_stats__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) + return; + + stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME); + if (CHECK(stats_fd < 0, "get_stats_fd", "failed %d\n", errno)) { + test_enable_stats__destroy(skel); + return; + } + + err = test_enable_stats__attach(skel); + if (CHECK(err, "attach_raw_tp", "err %d\n", err)) + goto cleanup; + + test_enable_stats__detach(skel); + + prog_fd = bpf_program__fd(skel->progs.test_enable_stats); + memset(&info, 0, info_len); + err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len); + if (CHECK(err, "get_prog_info", + "failed to get bpf_prog_info for fd %d\n", prog_fd)) + goto cleanup; + if (CHECK(info.run_time_ns == 0, "check_stats_enabled", + "failed to enable run_time_ns stats\n")) + goto cleanup; + + CHECK(info.run_cnt != skel->bss->count, "check_run_cnt_valid", + "invalid run_cnt stats\n"); + +cleanup: + test_enable_stats__destroy(skel); + close(stats_fd); +} diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c new file mode 100644 index 0000000000000..01a002ade5292 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include +#include +#include +#include + +char _license[] SEC("license") = "GPL"; + +__u64 count = 0; + +SEC("raw_tracepoint/sys_enter") +int test_enable_stats(void *ctx) +{ + count += 1; + return 0; +}