From b1d18a7574d0df5eb4117c14742baf8bc2b9bb74 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 9 Feb 2022 15:19:57 -0800 Subject: [PATCH 01/29] bpf: Extend sys_bpf commands for bpf_syscall programs. bpf_sycall programs can be used directly by the kernel modules to load programs and create maps via kernel skeleton. . Export bpf_sys_bpf syscall wrapper to be used in kernel skeleton. . Export bpf_map_get to be used in kernel skeleton. . Allow prog_run cmd for bpf_syscall programs with recursion check. . Enable link_create and raw_tp_open cmds. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220209232001.27490-2-alexei.starovoitov@gmail.com --- kernel/bpf/syscall.c | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 72ce1edde950d..49f88b30662a4 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -985,6 +985,7 @@ struct bpf_map *bpf_map_get(u32 ufd) return map; } +EXPORT_SYMBOL(bpf_map_get); struct bpf_map *bpf_map_get_with_uref(u32 ufd) { @@ -4756,23 +4757,52 @@ static bool syscall_prog_is_valid_access(int off, int size, return true; } -BPF_CALL_3(bpf_sys_bpf, int, cmd, void *, attr, u32, attr_size) +BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size) { + struct bpf_prog * __maybe_unused prog; + switch (cmd) { case BPF_MAP_CREATE: case BPF_MAP_UPDATE_ELEM: case BPF_MAP_FREEZE: case BPF_PROG_LOAD: case BPF_BTF_LOAD: + case BPF_LINK_CREATE: + case BPF_RAW_TRACEPOINT_OPEN: break; - /* case BPF_PROG_TEST_RUN: - * is not part of this list to prevent recursive test_run - */ +#ifdef CONFIG_BPF_JIT /* __bpf_prog_enter_sleepable used by trampoline and JIT */ + case BPF_PROG_TEST_RUN: + if (attr->test.data_in || attr->test.data_out || + attr->test.ctx_out || attr->test.duration || + attr->test.repeat || attr->test.flags) + return -EINVAL; + + prog = bpf_prog_get_type(attr->test.prog_fd, BPF_PROG_TYPE_SYSCALL); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + if (attr->test.ctx_size_in < prog->aux->max_ctx_offset || + attr->test.ctx_size_in > U16_MAX) { + bpf_prog_put(prog); + return -EINVAL; + } + + if (!__bpf_prog_enter_sleepable(prog)) { + /* recursion detected */ + bpf_prog_put(prog); + return -EBUSY; + } + attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in); + __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */); + bpf_prog_put(prog); + return 0; +#endif default: return -EINVAL; } return __sys_bpf(cmd, KERNEL_BPFPTR(attr), attr_size); } +EXPORT_SYMBOL(bpf_sys_bpf); static const struct bpf_func_proto bpf_sys_bpf_proto = { .func = bpf_sys_bpf, From 6fe65f1b4db3fff305896e997c2804b7b42236ce Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 9 Feb 2022 15:19:58 -0800 Subject: [PATCH 02/29] libbpf: Prepare light skeleton for the kernel. Prepare light skeleton to be used in the kernel module and in the user space. The look and feel of lskel.h is mostly the same with the difference that for user space the skel->rodata is the same pointer before and after skel_load operation, while in the kernel the skel->rodata after skel_open and the skel->rodata after skel_load are different pointers. Typical usage of skeleton remains the same for kernel and user space: skel = my_bpf__open(); skel->rodata->my_global_var = init_val; err = my_bpf__load(skel); err = my_bpf__attach(skel); // access skel->rodata->my_global_var; // access skel->bss->another_var; Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220209232001.27490-3-alexei.starovoitov@gmail.com --- tools/lib/bpf/gen_loader.c | 15 ++- tools/lib/bpf/skel_internal.h | 185 ++++++++++++++++++++++++++++++---- 2 files changed, 179 insertions(+), 21 deletions(-) diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 8ecef1088ba2a..927745b080141 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -1043,18 +1043,27 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, value = add_data(gen, pvalue, value_size); key = add_data(gen, &zero, sizeof(zero)); - /* if (map_desc[map_idx].initial_value) - * copy_from_user(value, initial_value, value_size); + /* if (map_desc[map_idx].initial_value) { + * if (ctx->flags & BPF_SKEL_KERNEL) + * bpf_probe_read_kernel(value, value_size, initial_value); + * else + * bpf_copy_from_user(value, value_size, initial_value); + * } */ emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, sizeof(struct bpf_loader_ctx) + sizeof(struct bpf_map_desc) * map_idx + offsetof(struct bpf_map_desc, initial_value))); - emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 4)); + emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 8)); emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE, 0, 0, 0, value)); emit(gen, BPF_MOV64_IMM(BPF_REG_2, value_size)); + emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, + offsetof(struct bpf_loader_ctx, flags))); + emit(gen, BPF_JMP_IMM(BPF_JSET, BPF_REG_0, BPF_SKEL_KERNEL, 2)); emit(gen, BPF_EMIT_CALL(BPF_FUNC_copy_from_user)); + emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1)); + emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); map_update_attr = add_data(gen, &attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index dcd3336512d42..bd6f4505e7b1e 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -3,9 +3,19 @@ #ifndef __SKEL_INTERNAL_H #define __SKEL_INTERNAL_H +#ifdef __KERNEL__ +#include +#include +#include +#include +#include +#else #include #include #include +#include +#include "bpf.h" +#endif #ifndef __NR_bpf # if defined(__mips__) && defined(_ABIO32) @@ -25,24 +35,23 @@ * requested during loader program generation. */ struct bpf_map_desc { - union { - /* input for the loader prog */ - struct { - __aligned_u64 initial_value; - __u32 max_entries; - }; - /* output of the loader prog */ - struct { - int map_fd; - }; - }; + /* output of the loader prog */ + int map_fd; + /* input for the loader prog */ + __u32 max_entries; + __aligned_u64 initial_value; }; struct bpf_prog_desc { int prog_fd; }; +enum { + BPF_SKEL_KERNEL = (1ULL << 0), +}; + struct bpf_loader_ctx { - size_t sz; + __u32 sz; + __u32 flags; __u32 log_level; __u32 log_size; __u64 log_buf; @@ -57,12 +66,144 @@ struct bpf_load_and_run_opts { const char *errstr; }; +long bpf_sys_bpf(__u32 cmd, void *attr, __u32 attr_size); + static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, unsigned int size) { +#ifdef __KERNEL__ + return bpf_sys_bpf(cmd, attr, size); +#else return syscall(__NR_bpf, cmd, attr, size); +#endif +} + +#ifdef __KERNEL__ +static inline int close(int fd) +{ + return close_fd(fd); +} + +static inline void *skel_alloc(size_t size) +{ + struct bpf_loader_ctx *ctx = kzalloc(size, GFP_KERNEL); + + if (!ctx) + return NULL; + ctx->flags |= BPF_SKEL_KERNEL; + return ctx; +} + +static inline void skel_free(const void *p) +{ + kfree(p); +} + +/* skel->bss/rodata maps are populated the following way: + * + * For kernel use: + * skel_prep_map_data() allocates kernel memory that kernel module can directly access. + * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value. + * The loader program will perform probe_read_kernel() from maps.rodata.initial_value. + * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and + * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree + * is not nessary. + * + * For user space: + * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly. + * Generated lskel stores the pointer in skel->rodata and in skel->maps.rodata.initial_value. + * The loader program will perform copy_from_user() from maps.rodata.initial_value. + * skel_finalize_map_data() remaps bpf array map value from the kernel memory into + * skel->rodata address. + * + * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for + * both kernel and user space. The generated loader program does + * either bpf_probe_read_kernel() or bpf_copy_from_user() from initial_value + * depending on bpf_loader_ctx->flags. + */ +static inline void skel_free_map_data(void *p, __u64 addr, size_t sz) +{ + if (addr != ~0ULL) + kvfree(p); + /* When addr == ~0ULL the 'p' points to + * ((struct bpf_array *)map)->value. See skel_finalize_map_data. + */ } +static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz) +{ + void *addr; + + addr = kvmalloc(val_sz, GFP_KERNEL); + if (!addr) + return NULL; + memcpy(addr, val, val_sz); + return addr; +} + +static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd) +{ + struct bpf_map *map; + void *addr = NULL; + + kvfree((void *) (long) *init_val); + *init_val = ~0ULL; + + /* At this point bpf_load_and_run() finished without error and + * 'fd' is a valid bpf map FD. All sanity checks below should succeed. + */ + map = bpf_map_get(fd); + if (IS_ERR(map)) + return NULL; + if (map->map_type != BPF_MAP_TYPE_ARRAY) + goto out; + addr = ((struct bpf_array *)map)->value; + /* the addr stays valid, since FD is not closed */ +out: + bpf_map_put(map); + return addr; +} + +#else + +static inline void *skel_alloc(size_t size) +{ + return calloc(1, size); +} + +static inline void skel_free(void *p) +{ + free(p); +} + +static inline void skel_free_map_data(void *p, __u64 addr, size_t sz) +{ + munmap(p, sz); +} + +static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz) +{ + void *addr; + + addr = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (addr == (void *) -1) + return NULL; + memcpy(addr, val, val_sz); + return addr; +} + +static inline void *skel_finalize_map_data(__u64 *init_val, size_t mmap_sz, int flags, int fd) +{ + void *addr; + + addr = mmap((void *) (long) *init_val, mmap_sz, flags, MAP_SHARED | MAP_FIXED, fd, 0); + if (addr == (void *) -1) + return NULL; + return addr; +} +#endif + static inline int skel_closenz(int fd) { if (fd > 0) @@ -136,22 +277,28 @@ static inline int skel_link_create(int prog_fd, int target_fd, return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz); } +#ifdef __KERNEL__ +#define set_err +#else +#define set_err err = -errno +#endif + static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) { int map_fd = -1, prog_fd = -1, key = 0, err; union bpf_attr attr; - map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1); + err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1); if (map_fd < 0) { opts->errstr = "failed to create loader map"; - err = -errno; + set_err; goto out; } err = skel_map_update_elem(map_fd, &key, opts->data, 0); if (err < 0) { opts->errstr = "failed to update loader map"; - err = -errno; + set_err; goto out; } @@ -166,10 +313,10 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) attr.log_size = opts->ctx->log_size; attr.log_buf = opts->ctx->log_buf; attr.prog_flags = BPF_F_SLEEPABLE; - prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); if (prog_fd < 0) { opts->errstr = "failed to load loader prog"; - err = -errno; + set_err; goto out; } @@ -181,10 +328,12 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) if (err < 0 || (int)attr.test.retval < 0) { opts->errstr = "failed to execute loader prog"; if (err < 0) { - err = -errno; + set_err; } else { err = (int)attr.test.retval; +#ifndef __KERNEL__ errno = -err; +#endif } goto out; } From 28d743f671272d7a5f676669c84438b0f9600936 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 9 Feb 2022 15:19:59 -0800 Subject: [PATCH 03/29] bpftool: Generalize light skeleton generation. Generealize light skeleton by hiding mmap details in skel_internal.h In this form generated lskel.h is usable both by user space and by the kernel. Note that previously #include was in *.lskel.h file. To avoid #ifdef-s in a generated lskel.h the include of bpf.h is moved to skel_internal.h, but skel_internal.h is also used by gen_loader.c which is part of libbpf. Therefore skel_internal.h does #include "bpf.h" in case of user space, so gen_loader.c and lskel.h have necessary definitions. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220209232001.27490-4-alexei.starovoitov@gmail.com --- tools/bpf/bpftool/gen.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index eacfc6a2060d6..6f2e20be0c62f 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -472,7 +472,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) continue; if (bpf_map__is_internal(map) && (bpf_map__map_flags(map) & BPF_F_MMAPABLE)) - printf("\tmunmap(skel->%1$s, %2$zd);\n", + printf("\tskel_free_map_data(skel->%1$s, skel->maps.%1$s.initial_value, %2$zd);\n", ident, bpf_map_mmap_sz(map)); codegen("\ \n\ @@ -481,7 +481,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name) } codegen("\ \n\ - free(skel); \n\ + skel_free(skel); \n\ } \n\ ", obj_name); @@ -525,7 +525,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h { \n\ struct %1$s *skel; \n\ \n\ - skel = calloc(sizeof(*skel), 1); \n\ + skel = skel_alloc(sizeof(*skel)); \n\ if (!skel) \n\ goto cleanup; \n\ skel->ctx.sz = (void *)&skel->links - (void *)skel; \n\ @@ -543,19 +543,18 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h continue; codegen("\ - \n\ - skel->%1$s = \n\ - mmap(NULL, %2$zd, PROT_READ | PROT_WRITE,\n\ - MAP_SHARED | MAP_ANONYMOUS, -1, 0); \n\ - if (skel->%1$s == (void *) -1) \n\ - goto cleanup; \n\ - memcpy(skel->%1$s, (void *)\"\\ \n\ - ", ident, bpf_map_mmap_sz(map)); + \n\ + skel->%1$s = skel_prep_map_data((void *)\"\\ \n\ + ", ident); mmap_data = bpf_map__initial_value(map, &mmap_size); print_hex(mmap_data, mmap_size); - printf("\", %2$zd);\n" - "\tskel->maps.%1$s.initial_value = (__u64)(long)skel->%1$s;\n", - ident, mmap_size); + codegen("\ + \n\ + \", %1$zd, %2$zd); \n\ + if (!skel->%3$s) \n\ + goto cleanup; \n\ + skel->maps.%3$s.initial_value = (__u64) (long) skel->%3$s;\n\ + ", bpf_map_mmap_sz(map), mmap_size, ident); } codegen("\ \n\ @@ -611,9 +610,13 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h else mmap_flags = "PROT_READ | PROT_WRITE"; - printf("\tskel->%1$s =\n" - "\t\tmmap(skel->%1$s, %2$zd, %3$s, MAP_SHARED | MAP_FIXED,\n" - "\t\t\tskel->maps.%1$s.map_fd, 0);\n", + codegen("\ + \n\ + skel->%1$s = skel_finalize_map_data(&skel->maps.%1$s.initial_value, \n\ + %2$zd, %3$s, skel->maps.%1$s.map_fd);\n\ + if (!skel->%1$s) \n\ + return -ENOMEM; \n\ + ", ident, bpf_map_mmap_sz(map), mmap_flags); } codegen("\ @@ -751,8 +754,6 @@ static int do_skeleton(int argc, char **argv) #ifndef %2$s \n\ #define %2$s \n\ \n\ - #include \n\ - #include \n\ #include \n\ \n\ struct %1$s { \n\ From d7beb3d6aba39480590b30c502fbaa2cc1e5e30b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 9 Feb 2022 15:20:00 -0800 Subject: [PATCH 04/29] bpf: Update iterators.lskel.h. Light skeleton and skel_internal.h have changed. Update iterators.lskel.h. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220209232001.27490-5-alexei.starovoitov@gmail.com --- .../bpf/preload/iterators/iterators.lskel.h | 141 +++++++++--------- 1 file changed, 69 insertions(+), 72 deletions(-) diff --git a/kernel/bpf/preload/iterators/iterators.lskel.h b/kernel/bpf/preload/iterators/iterators.lskel.h index d90562d672d2c..70f236a82fe19 100644 --- a/kernel/bpf/preload/iterators/iterators.lskel.h +++ b/kernel/bpf/preload/iterators/iterators.lskel.h @@ -3,8 +3,6 @@ #ifndef __ITERATORS_BPF_SKEL_H__ #define __ITERATORS_BPF_SKEL_H__ -#include -#include #include struct iterators_bpf { @@ -70,31 +68,28 @@ iterators_bpf__destroy(struct iterators_bpf *skel) iterators_bpf__detach(skel); skel_closenz(skel->progs.dump_bpf_map.prog_fd); skel_closenz(skel->progs.dump_bpf_prog.prog_fd); - munmap(skel->rodata, 4096); + skel_free_map_data(skel->rodata, skel->maps.rodata.initial_value, 4096); skel_closenz(skel->maps.rodata.map_fd); - free(skel); + skel_free(skel); } static inline struct iterators_bpf * iterators_bpf__open(void) { struct iterators_bpf *skel; - skel = calloc(sizeof(*skel), 1); + skel = skel_alloc(sizeof(*skel)); if (!skel) goto cleanup; skel->ctx.sz = (void *)&skel->links - (void *)skel; - skel->rodata = - mmap(NULL, 4096, PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, -1, 0); - if (skel->rodata == (void *) -1) - goto cleanup; - memcpy(skel->rodata, (void *)"\ + skel->rodata = skel_prep_map_data((void *)"\ \x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\ \x20\x20\x20\x6d\x61\x78\x5f\x65\x6e\x74\x72\x69\x65\x73\x0a\0\x25\x34\x75\x20\ \x25\x2d\x31\x36\x73\x25\x36\x64\x0a\0\x20\x20\x69\x64\x20\x6e\x61\x6d\x65\x20\ \x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x20\x61\x74\x74\x61\x63\x68\x65\ -\x64\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0", 98); - skel->maps.rodata.initial_value = (__u64)(long)skel->rodata; +\x64\x0a\0\x25\x34\x75\x20\x25\x2d\x31\x36\x73\x20\x25\x73\x20\x25\x73\x0a\0", 4096, 98); + if (!skel->rodata) + goto cleanup; + skel->maps.rodata.initial_value = (__u64) (long) skel->rodata; return skel; cleanup: iterators_bpf__destroy(skel); @@ -326,7 +321,7 @@ iterators_bpf__load(struct iterators_bpf *skel) \0\0\x01\0\0\0\0\0\0\0\x13\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x10\0\0\0\0\0\ \0\0\x62\x70\x66\x5f\x69\x74\x65\x72\x5f\x62\x70\x66\x5f\x70\x72\x6f\x67\0\0\0\ \0\0\0\0"; - opts.insns_sz = 2184; + opts.insns_sz = 2216; opts.insns = (void *)"\ \xbf\x16\0\0\0\0\0\0\xbf\xa1\0\0\0\0\0\0\x07\x01\0\0\x78\xff\xff\xff\xb7\x02\0\ \0\x88\0\0\0\xb7\x03\0\0\0\0\0\0\x85\0\0\0\x71\0\0\0\x05\0\x14\0\0\0\0\0\x61\ @@ -343,70 +338,72 @@ iterators_bpf__load(struct iterators_bpf *skel) \0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x30\x0e\0\0\xb7\x03\0\0\x1c\0\0\0\x85\0\0\0\ \xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\xd4\xff\0\0\0\0\x63\x7a\x78\xff\0\0\0\0\ \x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x80\x0e\0\0\x63\x01\0\0\0\ -\0\0\0\x61\x60\x20\0\0\0\0\0\x15\0\x03\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\ +\0\0\0\x61\x60\x1c\0\0\0\0\0\x15\0\x03\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\ \x5c\x0e\0\0\x63\x01\0\0\0\0\0\0\xb7\x01\0\0\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\ \0\x50\x0e\0\0\xb7\x03\0\0\x48\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\ \xc5\x07\xc3\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x63\x71\0\0\0\0\0\ -\0\x79\x63\x18\0\0\0\0\0\x15\x03\x04\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x98\ -\x0e\0\0\xb7\x02\0\0\x62\0\0\0\x85\0\0\0\x94\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\x63\x01\0\ -\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\ -\x10\x0f\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x98\x0e\0\0\x18\ -\x61\0\0\0\0\0\0\0\0\0\0\x18\x0f\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x02\0\0\0\ -\x18\x62\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\xb7\x03\0\0\x20\0\0\0\x85\0\0\0\xa6\0\ -\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\xa3\xff\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\0\ -\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\x63\x01\0\0\ -\0\0\0\0\xb7\x01\0\0\x16\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\xb7\x03\ -\0\0\x04\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x96\xff\0\0\0\0\ -\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x78\x11\0\ -\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x38\x0f\0\0\x18\x61\0\0\0\0\ -\0\0\0\0\0\0\x70\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x40\ -\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb8\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\ -\0\0\0\0\0\0\0\0\0\x48\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xc8\x11\0\0\x7b\x01\ -\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xe8\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\ -\0\xe8\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x18\x61\ -\0\0\0\0\0\0\0\0\0\0\xe0\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\x60\x08\0\0\0\0\0\x18\ -\x61\0\0\0\0\0\0\0\0\0\0\x80\x11\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\0\0\0\0\0\ -\x18\x61\0\0\0\0\0\0\0\0\0\0\x84\x11\0\0\x63\x01\0\0\0\0\0\0\x79\x60\x10\0\0\0\ -\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x88\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\xa0\x78\ -\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb0\x11\0\0\x63\x01\0\0\0\0\0\0\x18\ -\x61\0\0\0\0\0\0\0\0\0\0\xf8\x11\0\0\xb7\x02\0\0\x11\0\0\0\xb7\x03\0\0\x0c\0\0\ -\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x60\xff\ -\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x68\x11\0\0\x63\x70\x6c\0\0\0\0\0\x77\x07\ -\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\0\0\0\x18\x62\0\0\0\0\0\0\ -\0\0\0\0\x68\x11\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\ -\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd8\x11\0\0\x61\x01\0\0\0\0\0\0\xd5\x01\x02\0\ -\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\x07\x4e\xff\0\0\0\0\x63\ -\x7a\x80\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x10\x12\0\0\x18\x61\0\0\0\0\0\ -\0\0\0\0\0\x10\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x18\x12\ -\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\ -\0\0\0\0\0\0\0\x28\x14\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x50\x17\0\0\x7b\x01\0\0\ -\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x14\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\ -\x60\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd0\x15\0\0\x18\ -\x61\0\0\0\0\0\0\0\0\0\0\x80\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\ -\0\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x78\x17\0\0\x7b\x01\0\0\0\0\0\0\x61\ -\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x18\x17\0\0\x63\x01\0\0\0\0\0\0\ -\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x1c\x17\0\0\x63\x01\0\0\0\0\ -\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x20\x17\0\0\x7b\x01\0\0\ -\0\0\0\0\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x48\x17\0\0\x63\ -\x01\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x90\x17\0\0\xb7\x02\0\0\x12\0\0\0\ -\xb7\x03\0\0\x0c\0\0\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\0\0\0\ -\0\0\xc5\x07\x17\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\x63\x70\x6c\ -\0\0\0\0\0\x77\x07\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\0\0\0\ -\x18\x62\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\xa6\0\0\ -\0\xbf\x07\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x70\x17\0\0\x61\x01\0\0\0\0\ -\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\x07\x05\ -\xff\0\0\0\0\x63\x7a\x84\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\xd5\x01\x02\0\0\0\ -\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa0\x80\xff\0\0\0\0\x63\x06\ -\x28\0\0\0\0\0\x61\xa0\x84\xff\0\0\0\0\x63\x06\x2c\0\0\0\0\0\x18\x61\0\0\0\0\0\ -\0\0\0\0\0\0\0\0\0\x61\x10\0\0\0\0\0\0\x63\x06\x18\0\0\0\0\0\xb7\0\0\0\0\0\0\0\ -\x95\0\0\0\0\0\0\0"; +\0\x79\x63\x20\0\0\0\0\0\x15\x03\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x98\ +\x0e\0\0\xb7\x02\0\0\x62\0\0\0\x61\x60\x04\0\0\0\0\0\x45\0\x02\0\x01\0\0\0\x85\ +\0\0\0\x94\0\0\0\x05\0\x01\0\0\0\0\0\x85\0\0\0\x71\0\0\0\x18\x62\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\x63\ +\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\ +\0\0\x10\x0f\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x98\x0e\0\0\ +\x18\x61\0\0\0\0\0\0\0\0\0\0\x18\x0f\0\0\x7b\x01\0\0\0\0\0\0\xb7\x01\0\0\x02\0\ +\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x08\x0f\0\0\xb7\x03\0\0\x20\0\0\0\x85\0\0\0\ +\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x9f\xff\0\0\0\0\x18\x62\0\0\0\0\0\0\0\0\ +\0\0\0\0\0\0\x61\x20\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\x63\ +\x01\0\0\0\0\0\0\xb7\x01\0\0\x16\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\x28\x0f\0\0\ +\xb7\x03\0\0\x04\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\x92\xff\ +\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x0f\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\ +\x78\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x38\x0f\0\0\x18\ +\x61\0\0\0\0\0\0\0\0\0\0\x70\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\ +\0\0\0\x40\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb8\x11\0\0\x7b\x01\0\0\0\0\0\0\ +\x18\x60\0\0\0\0\0\0\0\0\0\0\x48\x10\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xc8\x11\0\ +\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xe8\x10\0\0\x18\x61\0\0\0\0\ +\0\0\0\0\0\0\xe8\x11\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\0\0\ +\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xe0\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\x60\x08\0\0\ +\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x80\x11\0\0\x63\x01\0\0\0\0\0\0\x61\x60\x0c\ +\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x84\x11\0\0\x63\x01\0\0\0\0\0\0\x79\x60\ +\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x88\x11\0\0\x7b\x01\0\0\0\0\0\0\x61\ +\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xb0\x11\0\0\x63\x01\0\0\0\0\0\ +\0\x18\x61\0\0\0\0\0\0\0\0\0\0\xf8\x11\0\0\xb7\x02\0\0\x11\0\0\0\xb7\x03\0\0\ +\x0c\0\0\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\0\0\0\0\0\xc5\x07\ +\x5c\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x68\x11\0\0\x63\x70\x6c\0\0\0\0\0\ +\x77\x07\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\0\0\0\x18\x62\0\0\ +\0\0\0\0\0\0\0\0\x68\x11\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\xa6\0\0\0\xbf\x07\ +\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd8\x11\0\0\x61\x01\0\0\0\0\0\0\xd5\ +\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\x07\x4a\xff\0\0\ +\0\0\x63\x7a\x80\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x10\x12\0\0\x18\x61\0\ +\0\0\0\0\0\0\0\0\0\x10\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\ +\x18\x12\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x08\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\ +\x60\0\0\0\0\0\0\0\0\0\0\x28\x14\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x50\x17\0\0\ +\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x30\x14\0\0\x18\x61\0\0\0\0\0\ +\0\0\0\0\0\x60\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\xd0\x15\ +\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x80\x17\0\0\x7b\x01\0\0\0\0\0\0\x18\x60\0\0\0\ +\0\0\0\0\0\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x78\x17\0\0\x7b\x01\0\0\0\0\ +\0\0\x61\x60\x08\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x18\x17\0\0\x63\x01\0\0\ +\0\0\0\0\x61\x60\x0c\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x1c\x17\0\0\x63\x01\ +\0\0\0\0\0\0\x79\x60\x10\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x20\x17\0\0\x7b\ +\x01\0\0\0\0\0\0\x61\xa0\x78\xff\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x48\x17\0\ +\0\x63\x01\0\0\0\0\0\0\x18\x61\0\0\0\0\0\0\0\0\0\0\x90\x17\0\0\xb7\x02\0\0\x12\ +\0\0\0\xb7\x03\0\0\x0c\0\0\0\xb7\x04\0\0\0\0\0\0\x85\0\0\0\xa7\0\0\0\xbf\x07\0\ +\0\0\0\0\0\xc5\x07\x13\xff\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\x63\ +\x70\x6c\0\0\0\0\0\x77\x07\0\0\x20\0\0\0\x63\x70\x70\0\0\0\0\0\xb7\x01\0\0\x05\ +\0\0\0\x18\x62\0\0\0\0\0\0\0\0\0\0\0\x17\0\0\xb7\x03\0\0\x8c\0\0\0\x85\0\0\0\ +\xa6\0\0\0\xbf\x07\0\0\0\0\0\0\x18\x60\0\0\0\0\0\0\0\0\0\0\x70\x17\0\0\x61\x01\ +\0\0\0\0\0\0\xd5\x01\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\xc5\ +\x07\x01\xff\0\0\0\0\x63\x7a\x84\xff\0\0\0\0\x61\xa1\x78\xff\0\0\0\0\xd5\x01\ +\x02\0\0\0\0\0\xbf\x19\0\0\0\0\0\0\x85\0\0\0\xa8\0\0\0\x61\xa0\x80\xff\0\0\0\0\ +\x63\x06\x28\0\0\0\0\0\x61\xa0\x84\xff\0\0\0\0\x63\x06\x2c\0\0\0\0\0\x18\x61\0\ +\0\0\0\0\0\0\0\0\0\0\0\0\0\x61\x10\0\0\0\0\0\0\x63\x06\x18\0\0\0\0\0\xb7\0\0\0\ +\0\0\0\0\x95\0\0\0\0\0\0\0"; err = bpf_load_and_run(&opts); if (err < 0) return err; - skel->rodata = - mmap(skel->rodata, 4096, PROT_READ, MAP_SHARED | MAP_FIXED, - skel->maps.rodata.map_fd, 0); + skel->rodata = skel_finalize_map_data(&skel->maps.rodata.initial_value, + 4096, PROT_READ, skel->maps.rodata.map_fd); + if (!skel->rodata) + return -ENOMEM; return 0; } From cb80ddc67152e72f28ff6ea8517acdf875d7381d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 9 Feb 2022 15:20:01 -0800 Subject: [PATCH 05/29] bpf: Convert bpf_preload.ko to use light skeleton. The main change is a move of the single line #include "iterators.lskel.h" from iterators/iterators.c to bpf_preload_kern.c. Which means that generated light skeleton can be used from user space or user mode driver like iterators.c or from the kernel module or the kernel itself. The direct use of light skeleton from the kernel module simplifies the code, since UMD is no longer necessary. The libbpf.a required user space and UMD. The CO-RE in the kernel and generated "loader bpf program" used by the light skeleton are capable to perform complex loading operations traditionally provided by libbpf. In addition UMD approach was launching UMD process every time bpffs has to be mounted. With light skeleton in the kernel the bpf_preload kernel module loads bpf iterators once and pins them multiple times into different bpffs mounts. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220209232001.27490-6-alexei.starovoitov@gmail.com --- kernel/bpf/inode.c | 39 ++---- kernel/bpf/preload/Kconfig | 7 +- kernel/bpf/preload/Makefile | 14 +-- kernel/bpf/preload/bpf_preload.h | 8 +- kernel/bpf/preload/bpf_preload_kern.c | 119 ++++++++---------- kernel/bpf/preload/bpf_preload_umd_blob.S | 7 -- .../preload/iterators/bpf_preload_common.h | 13 -- kernel/bpf/preload/iterators/iterators.c | 108 ---------------- kernel/bpf/syscall.c | 2 + 9 files changed, 70 insertions(+), 247 deletions(-) delete mode 100644 kernel/bpf/preload/bpf_preload_umd_blob.S delete mode 100644 kernel/bpf/preload/iterators/bpf_preload_common.h delete mode 100644 kernel/bpf/preload/iterators/iterators.c diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 5a8d9f7467bf4..4f841e16779e7 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -710,11 +710,10 @@ static DEFINE_MUTEX(bpf_preload_lock); static int populate_bpffs(struct dentry *parent) { struct bpf_preload_info objs[BPF_PRELOAD_LINKS] = {}; - struct bpf_link *links[BPF_PRELOAD_LINKS] = {}; int err = 0, i; /* grab the mutex to make sure the kernel interactions with bpf_preload - * UMD are serialized + * are serialized */ mutex_lock(&bpf_preload_lock); @@ -722,40 +721,22 @@ static int populate_bpffs(struct dentry *parent) if (!bpf_preload_mod_get()) goto out; - if (!bpf_preload_ops->info.tgid) { - /* preload() will start UMD that will load BPF iterator programs */ - err = bpf_preload_ops->preload(objs); - if (err) + err = bpf_preload_ops->preload(objs); + if (err) + goto out_put; + for (i = 0; i < BPF_PRELOAD_LINKS; i++) { + bpf_link_inc(objs[i].link); + err = bpf_iter_link_pin_kernel(parent, + objs[i].link_name, objs[i].link); + if (err) { + bpf_link_put(objs[i].link); goto out_put; - for (i = 0; i < BPF_PRELOAD_LINKS; i++) { - links[i] = bpf_link_by_id(objs[i].link_id); - if (IS_ERR(links[i])) { - err = PTR_ERR(links[i]); - goto out_put; - } } - for (i = 0; i < BPF_PRELOAD_LINKS; i++) { - err = bpf_iter_link_pin_kernel(parent, - objs[i].link_name, links[i]); - if (err) - goto out_put; - /* do not unlink successfully pinned links even - * if later link fails to pin - */ - links[i] = NULL; - } - /* finish() will tell UMD process to exit */ - err = bpf_preload_ops->finish(); - if (err) - goto out_put; } out_put: bpf_preload_mod_put(); out: mutex_unlock(&bpf_preload_lock); - for (i = 0; i < BPF_PRELOAD_LINKS && err; i++) - if (!IS_ERR_OR_NULL(links[i])) - bpf_link_put(links[i]); return err; } diff --git a/kernel/bpf/preload/Kconfig b/kernel/bpf/preload/Kconfig index 26bced2624738..c9d45c9d6918d 100644 --- a/kernel/bpf/preload/Kconfig +++ b/kernel/bpf/preload/Kconfig @@ -18,10 +18,9 @@ menuconfig BPF_PRELOAD if BPF_PRELOAD config BPF_PRELOAD_UMD - tristate "bpf_preload kernel module with user mode driver" - depends on CC_CAN_LINK - depends on m || CC_CAN_LINK_STATIC + tristate "bpf_preload kernel module" default m help - This builds bpf_preload kernel module with embedded user mode driver. + This builds bpf_preload kernel module with embedded BPF programs for + introspection in bpffs. endif diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile index baf47d9c7557e..167534e3b0b4d 100644 --- a/kernel/bpf/preload/Makefile +++ b/kernel/bpf/preload/Makefile @@ -3,16 +3,6 @@ LIBBPF_SRCS = $(srctree)/tools/lib/bpf/ LIBBPF_INCLUDE = $(LIBBPF_SRCS)/.. -userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \ - -I $(LIBBPF_INCLUDE) -Wno-unused-result - -userprogs := bpf_preload_umd - -bpf_preload_umd-objs := iterators/iterators.o - -$(obj)/bpf_preload_umd: - -$(obj)/bpf_preload_umd_blob.o: $(obj)/bpf_preload_umd - obj-$(CONFIG_BPF_PRELOAD_UMD) += bpf_preload.o -bpf_preload-objs += bpf_preload_kern.o bpf_preload_umd_blob.o +CFLAGS_bpf_preload_kern.o += -I $(LIBBPF_INCLUDE) +bpf_preload-objs += bpf_preload_kern.o diff --git a/kernel/bpf/preload/bpf_preload.h b/kernel/bpf/preload/bpf_preload.h index 2f9932276f2eb..f065c91213a0a 100644 --- a/kernel/bpf/preload/bpf_preload.h +++ b/kernel/bpf/preload/bpf_preload.h @@ -2,13 +2,13 @@ #ifndef _BPF_PRELOAD_H #define _BPF_PRELOAD_H -#include -#include "iterators/bpf_preload_common.h" +struct bpf_preload_info { + char link_name[16]; + struct bpf_link *link; +}; struct bpf_preload_ops { - struct umd_info info; int (*preload)(struct bpf_preload_info *); - int (*finish)(void); struct module *owner; }; extern struct bpf_preload_ops *bpf_preload_ops; diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c index 53736e52c1dfa..30207c048d360 100644 --- a/kernel/bpf/preload/bpf_preload_kern.c +++ b/kernel/bpf/preload/bpf_preload_kern.c @@ -2,101 +2,80 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include -#include -#include -#include #include "bpf_preload.h" +#include "iterators/iterators.lskel.h" -extern char bpf_preload_umd_start; -extern char bpf_preload_umd_end; +static struct bpf_link *maps_link, *progs_link; +static struct iterators_bpf *skel; -static int preload(struct bpf_preload_info *obj); -static int finish(void); +static void free_links_and_skel(void) +{ + if (!IS_ERR_OR_NULL(maps_link)) + bpf_link_put(maps_link); + if (!IS_ERR_OR_NULL(progs_link)) + bpf_link_put(progs_link); + iterators_bpf__destroy(skel); +} + +static int preload(struct bpf_preload_info *obj) +{ + strlcpy(obj[0].link_name, "maps.debug", sizeof(obj[0].link_name)); + obj[0].link = maps_link; + strlcpy(obj[1].link_name, "progs.debug", sizeof(obj[1].link_name)); + obj[1].link = progs_link; + return 0; +} -static struct bpf_preload_ops umd_ops = { - .info.driver_name = "bpf_preload", +static struct bpf_preload_ops ops = { .preload = preload, - .finish = finish, .owner = THIS_MODULE, }; -static int preload(struct bpf_preload_info *obj) +static int load_skel(void) { - int magic = BPF_PRELOAD_START; - loff_t pos = 0; - int i, err; - ssize_t n; + int err; - err = fork_usermode_driver(&umd_ops.info); + skel = iterators_bpf__open(); + if (!skel) + return -ENOMEM; + err = iterators_bpf__load(skel); if (err) - return err; - - /* send the start magic to let UMD proceed with loading BPF progs */ - n = kernel_write(umd_ops.info.pipe_to_umh, - &magic, sizeof(magic), &pos); - if (n != sizeof(magic)) - return -EPIPE; - - /* receive bpf_link IDs and names from UMD */ - pos = 0; - for (i = 0; i < BPF_PRELOAD_LINKS; i++) { - n = kernel_read(umd_ops.info.pipe_from_umh, - &obj[i], sizeof(*obj), &pos); - if (n != sizeof(*obj)) - return -EPIPE; + goto out; + err = iterators_bpf__attach(skel); + if (err) + goto out; + maps_link = bpf_link_get_from_fd(skel->links.dump_bpf_map_fd); + if (IS_ERR(maps_link)) { + err = PTR_ERR(maps_link); + goto out; } - return 0; -} - -static int finish(void) -{ - int magic = BPF_PRELOAD_END; - struct pid *tgid; - loff_t pos = 0; - ssize_t n; - - /* send the last magic to UMD. It will do a normal exit. */ - n = kernel_write(umd_ops.info.pipe_to_umh, - &magic, sizeof(magic), &pos); - if (n != sizeof(magic)) - return -EPIPE; - - tgid = umd_ops.info.tgid; - if (tgid) { - wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); - umd_cleanup_helper(&umd_ops.info); + progs_link = bpf_link_get_from_fd(skel->links.dump_bpf_prog_fd); + if (IS_ERR(progs_link)) { + err = PTR_ERR(progs_link); + goto out; } return 0; +out: + free_links_and_skel(); + return err; } -static int __init load_umd(void) +static int __init load(void) { int err; - err = umd_load_blob(&umd_ops.info, &bpf_preload_umd_start, - &bpf_preload_umd_end - &bpf_preload_umd_start); + err = load_skel(); if (err) return err; - bpf_preload_ops = &umd_ops; + bpf_preload_ops = &ops; return err; } -static void __exit fini_umd(void) +static void __exit fini(void) { - struct pid *tgid; - bpf_preload_ops = NULL; - - /* kill UMD in case it's still there due to earlier error */ - tgid = umd_ops.info.tgid; - if (tgid) { - kill_pid(tgid, SIGKILL, 1); - - wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); - umd_cleanup_helper(&umd_ops.info); - } - umd_unload_blob(&umd_ops.info); + free_links_and_skel(); } -late_initcall(load_umd); -module_exit(fini_umd); +late_initcall(load); +module_exit(fini); MODULE_LICENSE("GPL"); diff --git a/kernel/bpf/preload/bpf_preload_umd_blob.S b/kernel/bpf/preload/bpf_preload_umd_blob.S deleted file mode 100644 index f1f40223b5c39..0000000000000 --- a/kernel/bpf/preload/bpf_preload_umd_blob.S +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - .section .init.rodata, "a" - .global bpf_preload_umd_start -bpf_preload_umd_start: - .incbin "kernel/bpf/preload/bpf_preload_umd" - .global bpf_preload_umd_end -bpf_preload_umd_end: diff --git a/kernel/bpf/preload/iterators/bpf_preload_common.h b/kernel/bpf/preload/iterators/bpf_preload_common.h deleted file mode 100644 index 8464d1a48c052..0000000000000 --- a/kernel/bpf/preload/iterators/bpf_preload_common.h +++ /dev/null @@ -1,13 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _BPF_PRELOAD_COMMON_H -#define _BPF_PRELOAD_COMMON_H - -#define BPF_PRELOAD_START 0x5555 -#define BPF_PRELOAD_END 0xAAAA - -struct bpf_preload_info { - char link_name[16]; - int link_id; -}; - -#endif diff --git a/kernel/bpf/preload/iterators/iterators.c b/kernel/bpf/preload/iterators/iterators.c deleted file mode 100644 index 4dafe0f4f2b2f..0000000000000 --- a/kernel/bpf/preload/iterators/iterators.c +++ /dev/null @@ -1,108 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2020 Facebook */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "iterators.lskel.h" -#include "bpf_preload_common.h" - -int to_kernel = -1; -int from_kernel = 0; - -static int __bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len) -{ - union bpf_attr attr; - int err; - - memset(&attr, 0, sizeof(attr)); - attr.info.bpf_fd = bpf_fd; - attr.info.info_len = *info_len; - attr.info.info = (long) info; - - err = skel_sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); - if (!err) - *info_len = attr.info.info_len; - return err; -} - -static int send_link_to_kernel(int link_fd, const char *link_name) -{ - struct bpf_preload_info obj = {}; - struct bpf_link_info info = {}; - __u32 info_len = sizeof(info); - int err; - - err = __bpf_obj_get_info_by_fd(link_fd, &info, &info_len); - if (err) - return err; - obj.link_id = info.id; - if (strlen(link_name) >= sizeof(obj.link_name)) - return -E2BIG; - strcpy(obj.link_name, link_name); - if (write(to_kernel, &obj, sizeof(obj)) != sizeof(obj)) - return -EPIPE; - return 0; -} - -int main(int argc, char **argv) -{ - struct iterators_bpf *skel; - int err, magic; - int debug_fd; - - debug_fd = open("/dev/console", O_WRONLY | O_NOCTTY | O_CLOEXEC); - if (debug_fd < 0) - return 1; - to_kernel = dup(1); - close(1); - dup(debug_fd); - /* now stdin and stderr point to /dev/console */ - - read(from_kernel, &magic, sizeof(magic)); - if (magic != BPF_PRELOAD_START) { - printf("bad start magic %d\n", magic); - return 1; - } - /* libbpf opens BPF object and loads it into the kernel */ - skel = iterators_bpf__open_and_load(); - if (!skel) { - /* iterators.skel.h is little endian. - * libbpf doesn't support automatic little->big conversion - * of BPF bytecode yet. - * The program load will fail in such case. - */ - printf("Failed load could be due to wrong endianness\n"); - return 1; - } - err = iterators_bpf__attach(skel); - if (err) - goto cleanup; - - /* send two bpf_link IDs with names to the kernel */ - err = send_link_to_kernel(skel->links.dump_bpf_map_fd, "maps.debug"); - if (err) - goto cleanup; - err = send_link_to_kernel(skel->links.dump_bpf_prog_fd, "progs.debug"); - if (err) - goto cleanup; - - /* The kernel will proceed with pinnging the links in bpffs. - * UMD will wait on read from pipe. - */ - read(from_kernel, &magic, sizeof(magic)); - if (magic != BPF_PRELOAD_END) { - printf("bad final magic %d\n", magic); - err = -EINVAL; - } -cleanup: - iterators_bpf__destroy(skel); - - return err != 0; -} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 49f88b30662a4..35646db3d950f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2490,6 +2490,7 @@ void bpf_link_put(struct bpf_link *link) bpf_link_free(link); } } +EXPORT_SYMBOL(bpf_link_put); static int bpf_link_release(struct inode *inode, struct file *filp) { @@ -2632,6 +2633,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) return link; } +EXPORT_SYMBOL(bpf_link_get_from_fd); struct bpf_tracing_link { struct bpf_link link; From a5a358abbc392c7ad7b12a23c99e8602460b5804 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 9 Feb 2022 01:35:12 +0100 Subject: [PATCH 06/29] selftest/bpf: Check invalid length in test_xdp_update_frags Update test_xdp_update_frags adding a test for a buffer size set to (MAX_SKB_FRAGS + 2) * PAGE_SIZE. The kernel is supposed to return -ENOMEM. Signed-off-by: Lorenzo Bianconi Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/3e4afa0ee4976854b2f0296998fe6754a80b62e5.1644366736.git.lorenzo@kernel.org --- .../bpf/prog_tests/xdp_adjust_frags.c | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c index d18e6f343c48d..2f033da4cd457 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c @@ -5,11 +5,12 @@ static void test_xdp_update_frags(void) { const char *file = "./test_xdp_update_frags.o"; + int err, prog_fd, max_skb_frags, buf_size, num; struct bpf_program *prog; struct bpf_object *obj; - int err, prog_fd; __u32 *offset; __u8 *buf; + FILE *f; LIBBPF_OPTS(bpf_test_run_opts, topts); obj = bpf_object__open(file); @@ -99,6 +100,41 @@ static void test_xdp_update_frags(void) ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]"); free(buf); + + /* test_xdp_update_frags: unsupported buffer size */ + f = fopen("/proc/sys/net/core/max_skb_frags", "r"); + if (!ASSERT_OK_PTR(f, "max_skb_frag file pointer")) + goto out; + + num = fscanf(f, "%d", &max_skb_frags); + fclose(f); + + if (!ASSERT_EQ(num, 1, "max_skb_frags read failed")) + goto out; + + /* xdp_buff linear area size is always set to 4096 in the + * bpf_prog_test_run_xdp routine. + */ + buf_size = 4096 + (max_skb_frags + 1) * sysconf(_SC_PAGE_SIZE); + buf = malloc(buf_size); + if (!ASSERT_OK_PTR(buf, "alloc buf")) + goto out; + + memset(buf, 0, buf_size); + offset = (__u32 *)buf; + *offset = 16; + buf[*offset] = 0xaa; + buf[*offset + 15] = 0xaa; + + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = buf_size; + topts.data_size_out = buf_size; + + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_EQ(err, -ENOMEM, + "unsupported buf size, possible non-default /proc/sys/net/core/max_skb_flags?"); + free(buf); out: bpf_object__close(obj); } From 4cc0991abd3954609a6929234bbb8c0fe7a0298d Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 10 Feb 2022 18:49:39 -0800 Subject: [PATCH 07/29] bpf: Fix bpf_prog_pack build for ppc64_defconfig bpf_prog_pack causes build error with powerpc ppc64_defconfig: kernel/bpf/core.c:830:23: error: variably modified 'bitmap' at file scope 830 | unsigned long bitmap[BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)]; | ^~~~~~ This is because the marco expands as: unsigned long bitmap[((((((1UL) << (16 + __pte_index_size)) / (1 << 6))) \ + ((sizeof(long) * 8)) - 1) / ((sizeof(long) * 8)))]; where __pte_index_size is a global variable. Fix it by turning bitmap into a 0-length array. Fixes: 57631054fae6 ("bpf: Introduce bpf_prog_pack allocator") Reported-by: Stephen Rothwell Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220211024939.2962537-1-song@kernel.org --- kernel/bpf/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 42d96549a8044..44623c9b5bb18 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -827,7 +827,7 @@ int bpf_jit_add_poke_descriptor(struct bpf_prog *prog, struct bpf_prog_pack { struct list_head list; void *ptr; - unsigned long bitmap[BITS_TO_LONGS(BPF_PROG_CHUNK_COUNT)]; + unsigned long bitmap[]; }; #define BPF_PROG_MAX_PACK_PROG_SIZE BPF_PROG_PACK_SIZE @@ -840,7 +840,7 @@ static struct bpf_prog_pack *alloc_new_pack(void) { struct bpf_prog_pack *pack; - pack = kzalloc(sizeof(*pack), GFP_KERNEL); + pack = kzalloc(sizeof(*pack) + BITS_TO_BYTES(BPF_PROG_CHUNK_COUNT), GFP_KERNEL); if (!pack) return NULL; pack->ptr = module_alloc(BPF_PROG_PACK_SIZE); From 61fce9693f0311949c59cc62a8fdee6e36243553 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 10 Feb 2022 10:42:36 +0000 Subject: [PATCH 08/29] bpftool: Add libbpf's version number to "bpftool version" output To help users check what version of libbpf is being used with bpftool, print the number along with bpftool's own version number. Output: $ ./bpftool version ./bpftool v5.16.0 using libbpf v0.7 features: libbfd, libbpf_strict, skeletons $ ./bpftool version --json --pretty { "version": "5.16.0", "libbpf_version": "0.7", "features": { "libbfd": true, "libbpf_strict": true, "skeletons": true } } Note that libbpf does not expose its patch number. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220210104237.11649-2-quentin@isovalent.com --- tools/bpf/bpftool/Documentation/common_options.rst | 13 +++++++------ tools/bpf/bpftool/main.c | 4 ++++ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/bpf/bpftool/Documentation/common_options.rst b/tools/bpf/bpftool/Documentation/common_options.rst index 908487b9c2adf..4107a586b68b9 100644 --- a/tools/bpf/bpftool/Documentation/common_options.rst +++ b/tools/bpf/bpftool/Documentation/common_options.rst @@ -4,12 +4,13 @@ Print short help message (similar to **bpftool help**). -V, --version - Print version number (similar to **bpftool version**), and optional - features that were included when bpftool was compiled. Optional - features include linking against libbfd to provide the disassembler - for JIT-ted programs (**bpftool prog dump jited**) and usage of BPF - skeletons (some features like **bpftool prog profile** or showing - pids associated to BPF objects may rely on it). + Print bpftool's version number (similar to **bpftool version**), the + number of the libbpf version in use, and optional features that were + included when bpftool was compiled. Optional features include linking + against libbfd to provide the disassembler for JIT-ted programs + (**bpftool prog dump jited**) and usage of BPF skeletons (some + features like **bpftool prog profile** or showing pids associated to + BPF objects may rely on it). -j, --json Generate JSON output. For commands that cannot produce JSON, this diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 490f7bd54e4c2..0f2f8de514a49 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -89,6 +89,9 @@ static int do_version(int argc, char **argv) jsonw_name(json_wtr, "version"); jsonw_printf(json_wtr, "\"%s\"", BPFTOOL_VERSION); + jsonw_name(json_wtr, "libbpf_version"); + jsonw_printf(json_wtr, "\"%d.%d\"", + libbpf_major_version(), libbpf_minor_version()); jsonw_name(json_wtr, "features"); jsonw_start_object(json_wtr); /* features */ @@ -102,6 +105,7 @@ static int do_version(int argc, char **argv) unsigned int nb_features = 0; printf("%s v%s\n", bin_name, BPFTOOL_VERSION); + printf("using libbpf %s\n", libbpf_version_string()); printf("features:"); if (has_libbfd) { printf(" libbfd"); From 9910a74d6ebf6e35d7adfae665022674fb90ea78 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 10 Feb 2022 10:42:37 +0000 Subject: [PATCH 09/29] bpftool: Update versioning scheme, align on libbpf's version number Since the notion of versions was introduced for bpftool, it has been following the version number of the kernel (using the version number corresponding to the tree in which bpftool's sources are located). The rationale was that bpftool's features are loosely tied to BPF features in the kernel, and that we could defer versioning to the kernel repository itself. But this versioning scheme is confusing today, because a bpftool binary should be able to work with both older and newer kernels, even if some of its recent features won't be available on older systems. Furthermore, if bpftool is ported to other systems in the future, keeping a Linux-based version number is not a good option. Looking at other options, we could either have a totally independent scheme for bpftool, or we could align it on libbpf's version number (with an offset on the major version number, to avoid going backwards). The latter comes with a few drawbacks: - We may want bpftool releases in-between two libbpf versions. We can always append pre-release numbers to distinguish versions, although those won't look as "official" as something with a proper release number. But at the same time, having bpftool with version numbers that look "official" hasn't really been an issue so far. - If no new feature lands in bpftool for some time, we may move from e.g. 6.7.0 to 6.8.0 when libbpf levels up and have two different versions which are in fact the same. - Following libbpf's versioning scheme sounds better than kernel's, but ultimately it doesn't make too much sense either, because even though bpftool uses the lib a lot, its behaviour is not that much conditioned by the internal evolution of the library (or by new APIs that it may not use). Having an independent versioning scheme solves the above, but at the cost of heavier maintenance. Developers will likely forget to increase the numbers when adding features or bug fixes, and we would take the risk of having to send occasional "catch-up" patches just to update the version number. Based on these considerations, this patch aligns bpftool's version number on libbpf's. This is not a perfect solution, but 1) it's certainly an improvement over the current scheme, 2) the issues raised above are all minor at the moment, and 3) we can still move to an independent scheme in the future if we realise we need it. Given that libbpf is currently at version 0.7.0, and bpftool, before this patch, was at 5.16, we use an offset of 6 for the major version, bumping bpftool to 6.7.0. Libbpf does not export its patch number; leave bpftool's patch number at 0 for now. It remains possible to manually override the version number by setting BPFTOOL_VERSION when calling make. Signed-off-by: Quentin Monnet Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220210104237.11649-3-quentin@isovalent.com --- tools/bpf/bpftool/Makefile | 6 ++---- tools/bpf/bpftool/main.c | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 83369f55df615..94b2c2f4ad43a 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -39,10 +39,6 @@ LIBBPF_BOOTSTRAP := $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h) LIBBPF_BOOTSTRAP_INTERNAL_HDRS := $(addprefix $(LIBBPF_BOOTSTRAP_HDRS_DIR)/,hashmap.h) -ifeq ($(BPFTOOL_VERSION),) -BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion) -endif - $(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT) $(LIBBPF_HDRS_DIR) $(LIBBPF_BOOTSTRAP_HDRS_DIR): $(QUIET_MKDIR)mkdir -p $@ @@ -83,7 +79,9 @@ CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ -I$(srctree)/kernel/bpf/ \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi +ifneq ($(BPFTOOL_VERSION),) CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"' +endif ifneq ($(EXTRA_CFLAGS),) CFLAGS += $(EXTRA_CFLAGS) endif diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 0f2f8de514a49..e81227761f5d1 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -71,6 +71,17 @@ static int do_help(int argc, char **argv) return 0; } +#ifndef BPFTOOL_VERSION +/* bpftool's major and minor version numbers are aligned on libbpf's. There is + * an offset of 6 for the version number, because bpftool's version was higher + * than libbpf's when we adopted this scheme. The patch number remains at 0 + * for now. Set BPFTOOL_VERSION to override. + */ +#define BPFTOOL_MAJOR_VERSION (LIBBPF_MAJOR_VERSION + 6) +#define BPFTOOL_MINOR_VERSION LIBBPF_MINOR_VERSION +#define BPFTOOL_PATCH_VERSION 0 +#endif + static int do_version(int argc, char **argv) { #ifdef HAVE_LIBBFD_SUPPORT @@ -88,7 +99,12 @@ static int do_version(int argc, char **argv) jsonw_start_object(json_wtr); /* root object */ jsonw_name(json_wtr, "version"); +#ifdef BPFTOOL_VERSION jsonw_printf(json_wtr, "\"%s\"", BPFTOOL_VERSION); +#else + jsonw_printf(json_wtr, "\"%d.%d.%d\"", BPFTOOL_MAJOR_VERSION, + BPFTOOL_MINOR_VERSION, BPFTOOL_PATCH_VERSION); +#endif jsonw_name(json_wtr, "libbpf_version"); jsonw_printf(json_wtr, "\"%d.%d\"", libbpf_major_version(), libbpf_minor_version()); @@ -104,7 +120,12 @@ static int do_version(int argc, char **argv) } else { unsigned int nb_features = 0; +#ifdef BPFTOOL_VERSION printf("%s v%s\n", bin_name, BPFTOOL_VERSION); +#else + printf("%s v%d.%d.%d\n", bin_name, BPFTOOL_MAJOR_VERSION, + BPFTOOL_MINOR_VERSION, BPFTOOL_PATCH_VERSION); +#endif printf("using libbpf %s\n", libbpf_version_string()); printf("features:"); if (has_libbfd) { From d130e954a002b901391037c33b9ae11bae5aaa91 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 11 Feb 2022 12:52:35 -0800 Subject: [PATCH 10/29] libbpf: Fix libbpf.map inheritance chain for LIBBPF_0.7.0 Ensure that LIBBPF_0.7.0 inherits everything from LIBBPF_0.6.0. Fixes: dbdd2c7f8cec ("libbpf: Add API to get/set log_level at per-program level") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220211205235.2089104-1-andrii@kernel.org --- tools/lib/bpf/libbpf.map | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index aef6253a90c89..47e70c9058d95 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -438,4 +438,4 @@ LIBBPF_0.7.0 { libbpf_probe_bpf_map_type; libbpf_probe_bpf_prog_type; libbpf_set_memlock_rlim_max; -}; +} LIBBPF_0.6.0; From 9c3de619e13ee6693ec5ac74f50b7aa89056a70e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= Date: Sat, 12 Feb 2022 00:48:19 +0100 Subject: [PATCH 11/29] libbpf: Use dynamically allocated buffer when receiving netlink messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When receiving netlink messages, libbpf was using a statically allocated stack buffer of 4k bytes. This happened to work fine on systems with a 4k page size, but on systems with larger page sizes it can lead to truncated messages. The user-visible impact of this was that libbpf would insist no XDP program was attached to some interfaces because that bit of the netlink message got chopped off. Fix this by switching to a dynamically allocated buffer; we borrow the approach from iproute2 of using recvmsg() with MSG_PEEK|MSG_TRUNC to get the actual size of the pending message before receiving it, adjusting the buffer as necessary. While we're at it, also add retries on interrupted system calls around the recvmsg() call. v2: - Move peek logic to libbpf_netlink_recv(), don't double free on ENOMEM. Fixes: 8bbb77b7c7a2 ("libbpf: Add various netlink helpers") Reported-by: Zhiqian Guan Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Andrii Nakryiko Acked-by: Kumar Kartikeya Dwivedi Link: https://lore.kernel.org/bpf/20220211234819.612288-1-toke@redhat.com --- tools/lib/bpf/netlink.c | 55 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index c39c37f99d5c4..a598061f6fea5 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -87,29 +87,75 @@ enum { NL_DONE, }; +static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags) +{ + int len; + + do { + len = recvmsg(sock, mhdr, flags); + } while (len < 0 && (errno == EINTR || errno == EAGAIN)); + + if (len < 0) + return -errno; + return len; +} + +static int alloc_iov(struct iovec *iov, int len) +{ + void *nbuf; + + nbuf = realloc(iov->iov_base, len); + if (!nbuf) + return -ENOMEM; + + iov->iov_base = nbuf; + iov->iov_len = len; + return 0; +} + static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, __dump_nlmsg_t _fn, libbpf_dump_nlmsg_t fn, void *cookie) { + struct iovec iov = {}; + struct msghdr mhdr = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; bool multipart = true; struct nlmsgerr *err; struct nlmsghdr *nh; - char buf[4096]; int len, ret; + ret = alloc_iov(&iov, 4096); + if (ret) + goto done; + while (multipart) { start: multipart = false; - len = recv(sock, buf, sizeof(buf), 0); + len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC); + if (len < 0) { + ret = len; + goto done; + } + + if (len > iov.iov_len) { + ret = alloc_iov(&iov, len); + if (ret) + goto done; + } + + len = netlink_recvmsg(sock, &mhdr, 0); if (len < 0) { - ret = -errno; + ret = len; goto done; } if (len == 0) break; - for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len); nh = NLMSG_NEXT(nh, len)) { if (nh->nlmsg_pid != nl_pid) { ret = -LIBBPF_ERRNO__WRNGPID; @@ -151,6 +197,7 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, } ret = 0; done: + free(iov.iov_base); return ret; } From edc21dc909c6c133a2727f063eadd7907af51f94 Mon Sep 17 00:00:00 2001 From: Yinjun Zhang Date: Tue, 8 Feb 2022 00:00:25 +0800 Subject: [PATCH 12/29] bpftool: Fix the error when lookup in no-btf maps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When reworking btf__get_from_id() in commit a19f93cfafdf the error handling when calling bpf_btf_get_fd_by_id() changed. Before the rework if bpf_btf_get_fd_by_id() failed the error would not be propagated to callers of btf__get_from_id(), after the rework it is. This lead to a change in behavior in print_key_value() that now prints an error when trying to lookup keys in maps with no btf available. Fix this by following the way used in dumping maps to allow to look up keys in no-btf maps, by which it decides whether and where to get the btf info according to the btf value type. Fixes: a19f93cfafdf ("libbpf: Add internal helper to load BTF data by FD") Signed-off-by: Yinjun Zhang Signed-off-by: Simon Horman Signed-off-by: Andrii Nakryiko Reviewed-by: Niklas Söderlund Acked-by: Jiri Olsa Link: https://lore.kernel.org/bpf/1644249625-22479-1-git-send-email-yinjun.zhang@corigine.com --- tools/bpf/bpftool/map.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index c66a3c979b7a7..7a341a472ea41 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -1054,11 +1054,9 @@ static void print_key_value(struct bpf_map_info *info, void *key, json_writer_t *btf_wtr; struct btf *btf; - btf = btf__load_from_kernel_by_id(info->btf_id); - if (libbpf_get_error(btf)) { - p_err("failed to get btf"); + btf = get_map_kv_btf(info); + if (libbpf_get_error(btf)) return; - } if (json_output) { print_entry_json(info, key, value, btf); From d3b0b80064e0416850f818184b8f7bba9fdf8c40 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 11 Feb 2022 11:09:27 -0800 Subject: [PATCH 13/29] selftests/bpf: Fix GCC11 compiler warnings in -O2 mode When compiling selftests in -O2 mode with GCC1, we get three new compilations warnings about potentially uninitialized variables. Compiler is wrong 2 out of 3 times, but this patch makes GCC11 happy anyways, as it doesn't cost us anything and makes optimized selftests build less annoying. The amazing one is tc_redirect case of token that is malloc()'ed before ASSERT_OK_PTR() check is done on it. Seems like GCC pessimistically assumes that libbpf_get_error() will dereference the contents of the pointer (no it won't), so the only way I found to shut GCC up was to do zero-initializaing calloc(). This one was new to me. For linfo case, GCC didn't realize that linfo_size will be initialized by the function that is returning linfo_size as out parameter. core_reloc.c case was a real bug, we can goto cleanup before initializing obj. But we don't need to do any clean up, so just continue iteration intstead. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220211190927.1434329-1-andrii@kernel.org --- tools/testing/selftests/bpf/prog_tests/btf.c | 2 +- tools/testing/selftests/bpf/prog_tests/core_reloc.c | 2 +- tools/testing/selftests/bpf/prog_tests/tc_redirect.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 8b652f5ce4239..ec823561b912f 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -6556,7 +6556,7 @@ static int test_get_linfo(const struct prog_info_raw_test *test, static void do_test_info_raw(unsigned int test_num) { const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1]; - unsigned int raw_btf_size, linfo_str_off, linfo_size; + unsigned int raw_btf_size, linfo_str_off, linfo_size = 0; int btf_fd = -1, prog_fd = -1, err = 0; void *raw_btf, *patched_linfo = NULL; const char *ret_next_str; diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index b8bdd1c3efcaa..68e4c8dafa001 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -872,7 +872,7 @@ void test_core_reloc(void) if (test_case->btf_src_file) { err = access(test_case->btf_src_file, R_OK); if (!ASSERT_OK(err, "btf_src_file")) - goto cleanup; + continue; } open_opts.btf_custom_path = test_case->btf_src_file; diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index c2426df58e172..647b0a8336289 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -140,7 +140,7 @@ static struct nstoken *open_netns(const char *name) int err; struct nstoken *token; - token = malloc(sizeof(struct nstoken)); + token = calloc(1, sizeof(struct nstoken)); if (!ASSERT_OK_PTR(token, "malloc token")) return NULL; From bb8ffe61ea454a565e4fb1f450ef71237c9f032c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 11 Feb 2022 21:57:32 -0800 Subject: [PATCH 14/29] bpftool: Add C++-specific open/load/etc skeleton wrappers Add C++-specific static methods for code-generated BPF skeleton for each skeleton operation: open, open_opts, open_and_load, load, attach, detach, destroy, and elf_bytes. This is to facilitate easier C++ templating on top of pure C BPF skeleton. In C, open/load/destroy/etc "methods" are of the form __() to avoid name collision with similar "methods" of other skeletons withint the same application. This works well, but is very inconvenient for C++ applications that would like to write generic (templated) wrappers around BPF skeleton to fit in with C++ code base and take advantage of destructors and other convenient C++ constructs. This patch makes it easier to build such generic templated wrappers by additionally defining C++ static methods for skeleton's struct with fixed names. This allows to refer to, say, open method as `T::open()` instead of having to somehow generate `T__open()` function call. Next patch adds an example template to test_cpp selftest to demonstrate how it's possible to have all the operations wrapped in a generic Skeleton type without explicitly passing function references. An example of generated declaration section without %1$s placeholders: #ifdef __cplusplus static struct test_attach_probe *open(const struct bpf_object_open_opts *opts = nullptr); static struct test_attach_probe *open_and_load(); static int load(struct test_attach_probe *skel); static int attach(struct test_attach_probe *skel); static void detach(struct test_attach_probe *skel); static void destroy(struct test_attach_probe *skel); static const void *elf_bytes(size_t *sz); #endif /* __cplusplus */ Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220212055733.539056-2-andrii@kernel.org --- tools/bpf/bpftool/gen.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 6f2e20be0c62f..022f30490567d 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -831,6 +831,16 @@ static int do_skeleton(int argc, char **argv) codegen("\ \n\ + \n\ + #ifdef __cplusplus \n\ + static struct %1$s *open(const struct bpf_object_open_opts *opts = nullptr);\n\ + static struct %1$s *open_and_load(); \n\ + static int load(struct %1$s *skel); \n\ + static int attach(struct %1$s *skel); \n\ + static void detach(struct %1$s *skel); \n\ + static void destroy(struct %1$s *skel); \n\ + static const void *elf_bytes(size_t *sz); \n\ + #endif /* __cplusplus */ \n\ }; \n\ \n\ static void \n\ @@ -1025,9 +1035,19 @@ static int do_skeleton(int argc, char **argv) \"; \n\ } \n\ \n\ - #endif /* %s */ \n\ + #ifdef __cplusplus \n\ + struct %1$s *%1$s::open(const struct bpf_object_open_opts *opts) { return %1$s__open_opts(opts); }\n\ + struct %1$s *%1$s::open_and_load() { return %1$s__open_and_load(); } \n\ + int %1$s::load(struct %1$s *skel) { return %1$s__load(skel); } \n\ + int %1$s::attach(struct %1$s *skel) { return %1$s__attach(skel); } \n\ + void %1$s::detach(struct %1$s *skel) { %1$s__detach(skel); } \n\ + void %1$s::destroy(struct %1$s *skel) { %1$s__destroy(skel); } \n\ + const void *%1$s::elf_bytes(size_t *sz) { return %1$s__elf_bytes(sz); } \n\ + #endif /* __cplusplus */ \n\ + \n\ + #endif /* %2$s */ \n\ ", - header_guard); + obj_name, header_guard); err = 0; out: bpf_object__close(obj); From 189e0ecabc1717db62deab751afa755f07bdbcf8 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 11 Feb 2022 21:57:33 -0800 Subject: [PATCH 15/29] selftests/bpf: Add Skeleton templated wrapper as an example Add an example of how to build C++ template-based BPF skeleton wrapper. It's an actually runnable valid use of skeleton through more C++-like interface. Note that skeleton destuction happens implicitly through Skeleton's destructor. Also make test_cpp runnable as it would have crashed on invalid btf passed into btf_dump__new(). Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220212055733.539056-3-andrii@kernel.org --- tools/testing/selftests/bpf/test_cpp.cpp | 87 +++++++++++++++++++++++- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp index e00201de28901..773f165c4898e 100644 --- a/tools/testing/selftests/bpf/test_cpp.cpp +++ b/tools/testing/selftests/bpf/test_cpp.cpp @@ -5,18 +5,100 @@ #include #include "test_core_extern.skel.h" -/* do nothing, just make sure we can link successfully */ +template +class Skeleton { +private: + T *skel; +public: + Skeleton(): skel(nullptr) { } + + ~Skeleton() { if (skel) T::destroy(skel); } + + int open(const struct bpf_object_open_opts *opts = nullptr) + { + int err; + + if (skel) + return -EBUSY; + + skel = T::open(opts); + err = libbpf_get_error(skel); + if (err) { + skel = nullptr; + return err; + } + + return 0; + } + + int load() { return T::load(skel); } + + int attach() { return T::attach(skel); } + + void detach() { return T::detach(skel); } + + const T* operator->() const { return skel; } + + T* operator->() { return skel; } + + const T *get() const { return skel; } +}; static void dump_printf(void *ctx, const char *fmt, va_list args) { } +static void try_skeleton_template() +{ + Skeleton skel; + std::string prog_name; + int err; + LIBBPF_OPTS(bpf_object_open_opts, opts); + + err = skel.open(&opts); + if (err) { + fprintf(stderr, "Skeleton open failed: %d\n", err); + return; + } + + skel->data->kern_ver = 123; + skel->data->int_val = skel->data->ushort_val; + + err = skel.load(); + if (err) { + fprintf(stderr, "Skeleton load failed: %d\n", err); + return; + } + + if (!skel->kconfig->CONFIG_BPF_SYSCALL) + fprintf(stderr, "Seems like CONFIG_BPF_SYSCALL isn't set?!\n"); + + err = skel.attach(); + if (err) { + fprintf(stderr, "Skeleton attach failed: %d\n", err); + return; + } + + prog_name = bpf_program__name(skel->progs.handle_sys_enter); + if (prog_name != "handle_sys_enter") + fprintf(stderr, "Unexpected program name: %s\n", prog_name.c_str()); + + bpf_link__destroy(skel->links.handle_sys_enter); + skel->links.handle_sys_enter = bpf_program__attach(skel->progs.handle_sys_enter); + + skel.detach(); + + /* destructor will destory underlying skeleton */ +} + int main(int argc, char *argv[]) { struct btf_dump_opts opts = { }; struct test_core_extern *skel; struct btf *btf; + try_skeleton_template(); + /* libbpf.h */ libbpf_set_print(NULL); @@ -25,7 +107,8 @@ int main(int argc, char *argv[]) /* btf.h */ btf = btf__new(NULL, 0); - btf_dump__new(btf, dump_printf, nullptr, &opts); + if (!libbpf_get_error(btf)) + btf_dump__new(btf, dump_printf, nullptr, &opts); /* BPF skeleton */ skel = test_core_extern__open_and_load(); From 8cbf062a250ed52148badf6f3ffd03657dd4a3f0 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Tue, 15 Feb 2022 14:57:32 +0800 Subject: [PATCH 16/29] bpf: Reject kfunc calls that overflow insn->imm Now kfunc call uses s32 to represent the offset between the address of kfunc and __bpf_call_base, but it doesn't check whether or not s32 will be overflowed. The overflow is possible when kfunc is in module and the offset between module and kernel is greater than 2GB. Take arm64 as an example, before commit b2eed9b58811 ("arm64/kernel: kaslr: reduce module randomization range to 2 GB"), the offset between module symbol and __bpf_call_base will in 4GB range due to KASLR and may overflow s32. So add an extra checking to reject these invalid kfunc calls. Signed-off-by: Hou Tao Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20220215065732.3179408-1-houtao1@huawei.com --- kernel/bpf/verifier.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bbef86cb4e722..d7473fee247c5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1842,6 +1842,7 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) struct bpf_kfunc_desc *desc; const char *func_name; struct btf *desc_btf; + unsigned long call_imm; unsigned long addr; int err; @@ -1926,9 +1927,17 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset) return -EINVAL; } + call_imm = BPF_CALL_IMM(addr); + /* Check whether or not the relative offset overflows desc->imm */ + if ((unsigned long)(s32)call_imm != call_imm) { + verbose(env, "address of kernel function %s is out of range\n", + func_name); + return -EINVAL; + } + desc = &tab->descs[tab->nr_descs++]; desc->func_id = func_id; - desc->imm = BPF_CALL_IMM(addr); + desc->imm = call_imm; desc->offset = offset; err = btf_distill_func_proto(&env->log, desc_btf, func_proto, func_name, From adb8fa195efdfaac5852aaac24810b456ce43b04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mauricio=20V=C3=A1squez?= Date: Tue, 15 Feb 2022 17:58:50 -0500 Subject: [PATCH 17/29] libbpf: Split bpf_core_apply_relo() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BTFGen needs to run the core relocation logic in order to understand what are the types involved in a given relocation. Currently bpf_core_apply_relo() calculates and **applies** a relocation to an instruction. Having both operations in the same function makes it difficult to only calculate the relocation without patching the instruction. This commit splits that logic in two different phases: (1) calculate the relocation and (2) patch the instruction. For the first phase bpf_core_apply_relo() is renamed to bpf_core_calc_relo_insn() who is now only on charge of calculating the relocation, the second phase uses the already existing bpf_core_patch_insn(). bpf_object__relocate_core() uses both of them and the BTFGen will use only bpf_core_calc_relo_insn(). Signed-off-by: Mauricio Vásquez Signed-off-by: Rafael David Tinoco Signed-off-by: Lorenzo Fontana Signed-off-by: Leonardo Di Donato Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220215225856.671072-2-mauricio@kinvolk.io --- kernel/bpf/btf.c | 13 +++++-- tools/lib/bpf/libbpf.c | 71 ++++++++++++++++++++--------------- tools/lib/bpf/relo_core.c | 79 ++++++++++++--------------------------- tools/lib/bpf/relo_core.h | 42 ++++++++++++++++++--- 4 files changed, 109 insertions(+), 96 deletions(-) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 11740b300de93..f1d3d2a2f5f6e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -7225,6 +7225,7 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, { bool need_cands = relo->kind != BPF_CORE_TYPE_ID_LOCAL; struct bpf_core_cand_list cands = {}; + struct bpf_core_relo_res targ_res; struct bpf_core_spec *specs; int err; @@ -7264,13 +7265,19 @@ int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo, cands.len = cc->cnt; /* cand_cache_mutex needs to span the cache lookup and * copy of btf pointer into bpf_core_cand_list, - * since module can be unloaded while bpf_core_apply_relo_insn + * since module can be unloaded while bpf_core_calc_relo_insn * is working with module's btf. */ } - err = bpf_core_apply_relo_insn((void *)ctx->log, insn, relo->insn_off / 8, - relo, relo_idx, ctx->btf, &cands, specs); + err = bpf_core_calc_relo_insn((void *)ctx->log, relo, relo_idx, ctx->btf, &cands, specs, + &targ_res); + if (err) + goto out; + + err = bpf_core_patch_insn((void *)ctx->log, insn, relo->insn_off / 8, relo, relo_idx, + &targ_res); + out: kfree(specs); if (need_cands) { diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 2262bcdfee921..d3c457fb045e1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5530,11 +5530,12 @@ static int record_relo_core(struct bpf_program *prog, return 0; } -static int bpf_core_apply_relo(struct bpf_program *prog, - const struct bpf_core_relo *relo, - int relo_idx, - const struct btf *local_btf, - struct hashmap *cand_cache) +static int bpf_core_resolve_relo(struct bpf_program *prog, + const struct bpf_core_relo *relo, + int relo_idx, + const struct btf *local_btf, + struct hashmap *cand_cache, + struct bpf_core_relo_res *targ_res) { struct bpf_core_spec specs_scratch[3] = {}; const void *type_key = u32_as_hash_key(relo->type_id); @@ -5543,20 +5544,7 @@ static int bpf_core_apply_relo(struct bpf_program *prog, const struct btf_type *local_type; const char *local_name; __u32 local_id = relo->type_id; - struct bpf_insn *insn; - int insn_idx, err; - - if (relo->insn_off % BPF_INSN_SZ) - return -EINVAL; - insn_idx = relo->insn_off / BPF_INSN_SZ; - /* adjust insn_idx from section frame of reference to the local - * program's frame of reference; (sub-)program code is not yet - * relocated, so it's enough to just subtract in-section offset - */ - insn_idx = insn_idx - prog->sec_insn_off; - if (insn_idx >= prog->insns_cnt) - return -EINVAL; - insn = &prog->insns[insn_idx]; + int err; local_type = btf__type_by_id(local_btf, local_id); if (!local_type) @@ -5566,15 +5554,6 @@ static int bpf_core_apply_relo(struct bpf_program *prog, if (!local_name) return -EINVAL; - if (prog->obj->gen_loader) { - const char *spec_str = btf__name_by_offset(local_btf, relo->access_str_off); - - pr_debug("record_relo_core: prog %td insn[%d] %s %s %s final insn_idx %d\n", - prog - prog->obj->programs, relo->insn_off / 8, - btf_kind_str(local_type), local_name, spec_str, insn_idx); - return record_relo_core(prog, relo, insn_idx); - } - if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && !hashmap__find(cand_cache, type_key, (void **)&cands)) { cands = bpf_core_find_cands(prog->obj, local_btf, local_id); @@ -5591,19 +5570,21 @@ static int bpf_core_apply_relo(struct bpf_program *prog, } } - return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, - relo_idx, local_btf, cands, specs_scratch); + return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch, + targ_res); } static int bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) { const struct btf_ext_info_sec *sec; + struct bpf_core_relo_res targ_res; const struct bpf_core_relo *rec; const struct btf_ext_info *seg; struct hashmap_entry *entry; struct hashmap *cand_cache = NULL; struct bpf_program *prog; + struct bpf_insn *insn; const char *sec_name; int i, err = 0, insn_idx, sec_idx; @@ -5654,6 +5635,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) sec_name, sec->num_info); for_each_btf_ext_rec(seg, sec, i, rec) { + if (rec->insn_off % BPF_INSN_SZ) + return -EINVAL; insn_idx = rec->insn_off / BPF_INSN_SZ; prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx); if (!prog) { @@ -5668,12 +5651,38 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) if (!prog->load) continue; - err = bpf_core_apply_relo(prog, rec, i, obj->btf, cand_cache); + /* adjust insn_idx from section frame of reference to the local + * program's frame of reference; (sub-)program code is not yet + * relocated, so it's enough to just subtract in-section offset + */ + insn_idx = insn_idx - prog->sec_insn_off; + if (insn_idx >= prog->insns_cnt) + return -EINVAL; + insn = &prog->insns[insn_idx]; + + if (prog->obj->gen_loader) { + err = record_relo_core(prog, rec, insn_idx); + if (err) { + pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", + prog->name, i, err); + goto out; + } + continue; + } + + err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); if (err) { pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", prog->name, i, err); goto out; } + + err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); + if (err) { + pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", + prog->name, i, insn_idx, err); + goto out; + } } } diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 910865e29edc6..f946f23eab20b 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -775,31 +775,6 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo, return 0; } -struct bpf_core_relo_res -{ - /* expected value in the instruction, unless validate == false */ - __u32 orig_val; - /* new value that needs to be patched up to */ - __u32 new_val; - /* relocation unsuccessful, poison instruction, but don't fail load */ - bool poison; - /* some relocations can't be validated against orig_val */ - bool validate; - /* for field byte offset relocations or the forms: - * *(T *)(rX + ) = rY - * rX = *(T *)(rY + ), - * we remember original and resolved field size to adjust direct - * memory loads of pointers and integers; this is necessary for 32-bit - * host kernel architectures, but also allows to automatically - * relocate fields that were resized from, e.g., u32 to u64, etc. - */ - bool fail_memsz_adjust; - __u32 orig_sz; - __u32 orig_type_id; - __u32 new_sz; - __u32 new_type_id; -}; - /* Calculate original and target relocation values, given local and target * specs and relocation kind. These values are calculated for each candidate. * If there are multiple candidates, resulting values should all be consistent @@ -951,9 +926,9 @@ static int insn_bytes_to_bpf_size(__u32 sz) * 5. *(T *)(rX + ) = rY, where T is one of {u8, u16, u32, u64}; * 6. *(T *)(rX + ) = , where T is one of {u8, u16, u32, u64}. */ -static int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, - int insn_idx, const struct bpf_core_relo *relo, - int relo_idx, const struct bpf_core_relo_res *res) +int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, + int insn_idx, const struct bpf_core_relo *relo, + int relo_idx, const struct bpf_core_relo_res *res) { __u32 orig_val, new_val; __u8 class; @@ -1128,7 +1103,7 @@ static void bpf_core_dump_spec(const char *prog_name, int level, const struct bp } /* - * CO-RE relocate single instruction. + * Calculate CO-RE relocation target result. * * The outline and important points of the algorithm: * 1. For given local type, find corresponding candidate target types. @@ -1177,18 +1152,18 @@ static void bpf_core_dump_spec(const char *prog_name, int level, const struct bp * between multiple relocations for the same type ID and is updated as some * of the candidates are pruned due to structural incompatibility. */ -int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, - int insn_idx, - const struct bpf_core_relo *relo, - int relo_idx, - const struct btf *local_btf, - struct bpf_core_cand_list *cands, - struct bpf_core_spec *specs_scratch) +int bpf_core_calc_relo_insn(const char *prog_name, + const struct bpf_core_relo *relo, + int relo_idx, + const struct btf *local_btf, + struct bpf_core_cand_list *cands, + struct bpf_core_spec *specs_scratch, + struct bpf_core_relo_res *targ_res) { struct bpf_core_spec *local_spec = &specs_scratch[0]; struct bpf_core_spec *cand_spec = &specs_scratch[1]; struct bpf_core_spec *targ_spec = &specs_scratch[2]; - struct bpf_core_relo_res cand_res, targ_res; + struct bpf_core_relo_res cand_res; const struct btf_type *local_type; const char *local_name; __u32 local_id; @@ -1223,12 +1198,12 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, /* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { /* bpf_insn's imm value could get out of sync during linking */ - memset(&targ_res, 0, sizeof(targ_res)); - targ_res.validate = false; - targ_res.poison = false; - targ_res.orig_val = local_spec->root_type_id; - targ_res.new_val = local_spec->root_type_id; - goto patch_insn; + memset(targ_res, 0, sizeof(*targ_res)); + targ_res->validate = false; + targ_res->poison = false; + targ_res->orig_val = local_spec->root_type_id; + targ_res->new_val = local_spec->root_type_id; + return 0; } /* libbpf doesn't support candidate search for anonymous types */ @@ -1262,7 +1237,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, return err; if (j == 0) { - targ_res = cand_res; + *targ_res = cand_res; *targ_spec = *cand_spec; } else if (cand_spec->bit_offset != targ_spec->bit_offset) { /* if there are many field relo candidates, they @@ -1272,7 +1247,8 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, prog_name, relo_idx, cand_spec->bit_offset, targ_spec->bit_offset); return -EINVAL; - } else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) { + } else if (cand_res.poison != targ_res->poison || + cand_res.new_val != targ_res->new_val) { /* all candidates should result in the same relocation * decision and value, otherwise it's dangerous to * proceed due to ambiguity @@ -1280,7 +1256,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, pr_warn("prog '%s': relo #%d: relocation decision ambiguity: %s %u != %s %u\n", prog_name, relo_idx, cand_res.poison ? "failure" : "success", cand_res.new_val, - targ_res.poison ? "failure" : "success", targ_res.new_val); + targ_res->poison ? "failure" : "success", targ_res->new_val); return -EINVAL; } @@ -1314,19 +1290,10 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn, prog_name, relo_idx); /* calculate single target relo result explicitly */ - err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, &targ_res); + err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, targ_res); if (err) return err; } -patch_insn: - /* bpf_core_patch_insn() should know how to handle missing targ_spec */ - err = bpf_core_patch_insn(prog_name, insn, insn_idx, relo, relo_idx, &targ_res); - if (err) { - pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", - prog_name, relo_idx, relo->insn_off / 8, err); - return -EINVAL; - } - return 0; } diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index 17799819ad7ce..a28bf3711ce27 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -44,14 +44,44 @@ struct bpf_core_spec { __u32 bit_offset; }; -int bpf_core_apply_relo_insn(const char *prog_name, - struct bpf_insn *insn, int insn_idx, - const struct bpf_core_relo *relo, int relo_idx, - const struct btf *local_btf, - struct bpf_core_cand_list *cands, - struct bpf_core_spec *specs_scratch); +struct bpf_core_relo_res { + /* expected value in the instruction, unless validate == false */ + __u32 orig_val; + /* new value that needs to be patched up to */ + __u32 new_val; + /* relocation unsuccessful, poison instruction, but don't fail load */ + bool poison; + /* some relocations can't be validated against orig_val */ + bool validate; + /* for field byte offset relocations or the forms: + * *(T *)(rX + ) = rY + * rX = *(T *)(rY + ), + * we remember original and resolved field size to adjust direct + * memory loads of pointers and integers; this is necessary for 32-bit + * host kernel architectures, but also allows to automatically + * relocate fields that were resized from, e.g., u32 to u64, etc. + */ + bool fail_memsz_adjust; + __u32 orig_sz; + __u32 orig_type_id; + __u32 new_sz; + __u32 new_type_id; +}; + int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id, const struct btf *targ_btf, __u32 targ_id); size_t bpf_core_essential_name_len(const char *name); + +int bpf_core_calc_relo_insn(const char *prog_name, + const struct bpf_core_relo *relo, int relo_idx, + const struct btf *local_btf, + struct bpf_core_cand_list *cands, + struct bpf_core_spec *specs_scratch, + struct bpf_core_relo_res *targ_res); + +int bpf_core_patch_insn(const char *prog_name, struct bpf_insn *insn, + int insn_idx, const struct bpf_core_relo *relo, + int relo_idx, const struct bpf_core_relo_res *res); + #endif From 8de6cae40bce6e19f39de60056cad39a7274169d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mauricio=20V=C3=A1squez?= Date: Tue, 15 Feb 2022 17:58:51 -0500 Subject: [PATCH 18/29] libbpf: Expose bpf_core_{add,free}_cands() to bpftool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Expose bpf_core_add_cands() and bpf_core_free_cands() to handle candidates list. Signed-off-by: Mauricio Vásquez Signed-off-by: Rafael David Tinoco Signed-off-by: Lorenzo Fontana Signed-off-by: Leonardo Di Donato Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220215225856.671072-3-mauricio@kinvolk.io --- tools/lib/bpf/libbpf.c | 17 ++++++++++------- tools/lib/bpf/libbpf_internal.h | 9 +++++++++ 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d3c457fb045e1..ad43b6ce825e6 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -5192,18 +5192,21 @@ size_t bpf_core_essential_name_len(const char *name) return n; } -static void bpf_core_free_cands(struct bpf_core_cand_list *cands) +void bpf_core_free_cands(struct bpf_core_cand_list *cands) { + if (!cands) + return; + free(cands->cands); free(cands); } -static int bpf_core_add_cands(struct bpf_core_cand *local_cand, - size_t local_essent_len, - const struct btf *targ_btf, - const char *targ_btf_name, - int targ_start_id, - struct bpf_core_cand_list *cands) +int bpf_core_add_cands(struct bpf_core_cand *local_cand, + size_t local_essent_len, + const struct btf *targ_btf, + const char *targ_btf_name, + int targ_start_id, + struct bpf_core_cand_list *cands) { struct bpf_core_cand *new_cands, *cand; const struct btf_type *t, *local_t; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index bc86b82e90d1c..4fda8bdf0a0dc 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -529,4 +529,13 @@ static inline int ensure_good_fd(int fd) return fd; } +/* The following two functions are exposed to bpftool */ +int bpf_core_add_cands(struct bpf_core_cand *local_cand, + size_t local_essent_len, + const struct btf *targ_btf, + const char *targ_btf_name, + int targ_start_id, + struct bpf_core_cand_list *cands); +void bpf_core_free_cands(struct bpf_core_cand_list *cands); + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ From 0a9f4a20c6153d187c8ee58133357ac671372f5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mauricio=20V=C3=A1squez?= Date: Tue, 15 Feb 2022 17:58:52 -0500 Subject: [PATCH 19/29] bpftool: Add gen min_core_btf command MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This command is implemented under the "gen" command in bpftool and the syntax is the following: $ bpftool gen min_core_btf INPUT OUTPUT OBJECT [OBJECT...] INPUT is the file that contains all the BTF types for a kernel and OUTPUT is the path of the minimize BTF file that will be created with only the types needed by the objects. Signed-off-by: Mauricio Vásquez Signed-off-by: Rafael David Tinoco Signed-off-by: Lorenzo Fontana Signed-off-by: Leonardo Di Donato Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220215225856.671072-4-mauricio@kinvolk.io --- tools/bpf/bpftool/bash-completion/bpftool | 6 +++- tools/bpf/bpftool/gen.c | 42 +++++++++++++++++++++-- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 493753a4962e9..958e1fd71b5c5 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -1003,9 +1003,13 @@ _bpftool() ;; esac ;; + min_core_btf) + _filedir + return 0 + ;; *) [[ $prev == $object ]] && \ - COMPREPLY=( $( compgen -W 'object skeleton help' -- "$cur" ) ) + COMPREPLY=( $( compgen -W 'object skeleton help min_core_btf' -- "$cur" ) ) ;; esac ;; diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 022f30490567d..8e066c7476912 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -1108,6 +1108,7 @@ static int do_help(int argc, char **argv) fprintf(stderr, "Usage: %1$s %2$s object OUTPUT_FILE INPUT_FILE [INPUT_FILE...]\n" " %1$s %2$s skeleton FILE [name OBJECT_NAME]\n" + " %1$s %2$s min_core_btf INPUT OUTPUT OBJECT [OBJECT...]\n" " %1$s %2$s help\n" "\n" " " HELP_SPEC_OPTIONS " |\n" @@ -1118,10 +1119,45 @@ static int do_help(int argc, char **argv) return 0; } +/* Create minimized BTF file for a set of BPF objects */ +static int minimize_btf(const char *src_btf, const char *dst_btf, const char *objspaths[]) +{ + return -EOPNOTSUPP; +} + +static int do_min_core_btf(int argc, char **argv) +{ + const char *input, *output, **objs; + int i, err; + + if (!REQ_ARGS(3)) { + usage(); + return -1; + } + + input = GET_ARG(); + output = GET_ARG(); + + objs = (const char **) calloc(argc + 1, sizeof(*objs)); + if (!objs) { + p_err("failed to allocate array for object names"); + return -ENOMEM; + } + + i = 0; + while (argc) + objs[i++] = GET_ARG(); + + err = minimize_btf(input, output, objs); + free(objs); + return err; +} + static const struct cmd cmds[] = { - { "object", do_object }, - { "skeleton", do_skeleton }, - { "help", do_help }, + { "object", do_object }, + { "skeleton", do_skeleton }, + { "min_core_btf", do_min_core_btf}, + { "help", do_help }, { 0 } }; From a9caaba399f9cff34c6bffa1b1fd673d3d6043a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mauricio=20V=C3=A1squez?= Date: Tue, 15 Feb 2022 17:58:53 -0500 Subject: [PATCH 20/29] bpftool: Implement "gen min_core_btf" logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit implements the logic for the gen min_core_btf command. Specifically, it implements the following functions: - minimize_btf(): receives the path of a source and destination BTF files and a list of BPF objects. This function records the relocations for all objects and then generates the BTF file by calling btfgen_get_btf() (implemented in the following commit). - btfgen_record_obj(): loads the BTF and BTF.ext sections of the BPF objects and loops through all CO-RE relocations. It uses bpf_core_calc_relo_insn() from libbpf and passes the target spec to btfgen_record_reloc(), that calls one of the following functions depending on the relocation kind. - btfgen_record_field_relo(): uses the target specification to mark all the types that are involved in a field-based CO-RE relocation. In this case types resolved and marked recursively using btfgen_mark_type(). Only the struct and union members (and their types) involved in the relocation are marked to optimize the size of the generated BTF file. - btfgen_record_type_relo(): marks the types involved in a type-based CO-RE relocation. In this case no members for the struct and union types are marked as libbpf doesn't use them while performing this kind of relocation. Pointed types are marked as they are used by libbpf in this case. - btfgen_record_enumval_relo(): marks the whole enum type for enum-based relocations. Signed-off-by: Mauricio Vásquez Signed-off-by: Rafael David Tinoco Signed-off-by: Lorenzo Fontana Signed-off-by: Leonardo Di Donato Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220215225856.671072-5-mauricio@kinvolk.io --- tools/bpf/bpftool/Makefile | 8 +- tools/bpf/bpftool/gen.c | 455 ++++++++++++++++++++++++++++++++++++- 2 files changed, 457 insertions(+), 6 deletions(-) diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 94b2c2f4ad43a..a137db96bd56a 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -34,10 +34,10 @@ LIBBPF_BOOTSTRAP_INCLUDE := $(LIBBPF_BOOTSTRAP_DESTDIR)/include LIBBPF_BOOTSTRAP_HDRS_DIR := $(LIBBPF_BOOTSTRAP_INCLUDE)/bpf LIBBPF_BOOTSTRAP := $(LIBBPF_BOOTSTRAP_OUTPUT)libbpf.a -# We need to copy hashmap.h and nlattr.h which is not otherwise exported by -# libbpf, but still required by bpftool. -LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h) -LIBBPF_BOOTSTRAP_INTERNAL_HDRS := $(addprefix $(LIBBPF_BOOTSTRAP_HDRS_DIR)/,hashmap.h) +# We need to copy hashmap.h, nlattr.h, relo_core.h and libbpf_internal.h +# which are not otherwise exported by libbpf, but still required by bpftool. +LIBBPF_INTERNAL_HDRS := $(addprefix $(LIBBPF_HDRS_DIR)/,hashmap.h nlattr.h relo_core.h libbpf_internal.h) +LIBBPF_BOOTSTRAP_INTERNAL_HDRS := $(addprefix $(LIBBPF_BOOTSTRAP_HDRS_DIR)/,hashmap.h relo_core.h libbpf_internal.h) $(LIBBPF_OUTPUT) $(BOOTSTRAP_OUTPUT) $(LIBBPF_BOOTSTRAP_OUTPUT) $(LIBBPF_HDRS_DIR) $(LIBBPF_BOOTSTRAP_HDRS_DIR): $(QUIET_MKDIR)mkdir -p $@ diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 8e066c7476912..806001020841d 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -1119,10 +1120,460 @@ static int do_help(int argc, char **argv) return 0; } -/* Create minimized BTF file for a set of BPF objects */ +static int btf_save_raw(const struct btf *btf, const char *path) +{ + const void *data; + FILE *f = NULL; + __u32 data_sz; + int err = 0; + + data = btf__raw_data(btf, &data_sz); + if (!data) + return -ENOMEM; + + f = fopen(path, "wb"); + if (!f) + return -errno; + + if (fwrite(data, 1, data_sz, f) != data_sz) + err = -errno; + + fclose(f); + return err; +} + +struct btfgen_info { + struct btf *src_btf; + struct btf *marked_btf; /* btf structure used to mark used types */ +}; + +static size_t btfgen_hash_fn(const void *key, void *ctx) +{ + return (size_t)key; +} + +static bool btfgen_equal_fn(const void *k1, const void *k2, void *ctx) +{ + return k1 == k2; +} + +static void *u32_as_hash_key(__u32 x) +{ + return (void *)(uintptr_t)x; +} + +static void btfgen_free_info(struct btfgen_info *info) +{ + if (!info) + return; + + btf__free(info->src_btf); + btf__free(info->marked_btf); + + free(info); +} + +static struct btfgen_info * +btfgen_new_info(const char *targ_btf_path) +{ + struct btfgen_info *info; + int err; + + info = calloc(1, sizeof(*info)); + if (!info) + return NULL; + + info->src_btf = btf__parse(targ_btf_path, NULL); + if (!info->src_btf) { + err = -errno; + p_err("failed parsing '%s' BTF file: %s", targ_btf_path, strerror(errno)); + goto err_out; + } + + info->marked_btf = btf__parse(targ_btf_path, NULL); + if (!info->marked_btf) { + err = -errno; + p_err("failed parsing '%s' BTF file: %s", targ_btf_path, strerror(errno)); + goto err_out; + } + + return info; + +err_out: + btfgen_free_info(info); + errno = -err; + return NULL; +} + +#define MARKED UINT32_MAX + +static void btfgen_mark_member(struct btfgen_info *info, int type_id, int idx) +{ + const struct btf_type *t = btf__type_by_id(info->marked_btf, type_id); + struct btf_member *m = btf_members(t) + idx; + + m->name_off = MARKED; +} + +static int +btfgen_mark_type(struct btfgen_info *info, unsigned int type_id, bool follow_pointers) +{ + const struct btf_type *btf_type = btf__type_by_id(info->src_btf, type_id); + struct btf_type *cloned_type; + struct btf_param *param; + struct btf_array *array; + int err, i; + + if (type_id == 0) + return 0; + + /* mark type on cloned BTF as used */ + cloned_type = (struct btf_type *) btf__type_by_id(info->marked_btf, type_id); + cloned_type->name_off = MARKED; + + /* recursively mark other types needed by it */ + switch (btf_kind(btf_type)) { + case BTF_KIND_UNKN: + case BTF_KIND_INT: + case BTF_KIND_FLOAT: + case BTF_KIND_ENUM: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + break; + case BTF_KIND_PTR: + if (follow_pointers) { + err = btfgen_mark_type(info, btf_type->type, follow_pointers); + if (err) + return err; + } + break; + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_TYPEDEF: + err = btfgen_mark_type(info, btf_type->type, follow_pointers); + if (err) + return err; + break; + case BTF_KIND_ARRAY: + array = btf_array(btf_type); + + /* mark array type */ + err = btfgen_mark_type(info, array->type, follow_pointers); + /* mark array's index type */ + err = err ? : btfgen_mark_type(info, array->index_type, follow_pointers); + if (err) + return err; + break; + case BTF_KIND_FUNC_PROTO: + /* mark ret type */ + err = btfgen_mark_type(info, btf_type->type, follow_pointers); + if (err) + return err; + + /* mark parameters types */ + param = btf_params(btf_type); + for (i = 0; i < btf_vlen(btf_type); i++) { + err = btfgen_mark_type(info, param->type, follow_pointers); + if (err) + return err; + param++; + } + break; + /* tells if some other type needs to be handled */ + default: + p_err("unsupported kind: %s (%d)", btf_kind_str(btf_type), type_id); + return -EINVAL; + } + + return 0; +} + +static int btfgen_record_field_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec) +{ + struct btf *btf = info->src_btf; + const struct btf_type *btf_type; + struct btf_member *btf_member; + struct btf_array *array; + unsigned int type_id = targ_spec->root_type_id; + int idx, err; + + /* mark root type */ + btf_type = btf__type_by_id(btf, type_id); + err = btfgen_mark_type(info, type_id, false); + if (err) + return err; + + /* mark types for complex types (arrays, unions, structures) */ + for (int i = 1; i < targ_spec->raw_len; i++) { + /* skip typedefs and mods */ + while (btf_is_mod(btf_type) || btf_is_typedef(btf_type)) { + type_id = btf_type->type; + btf_type = btf__type_by_id(btf, type_id); + } + + switch (btf_kind(btf_type)) { + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + idx = targ_spec->raw_spec[i]; + btf_member = btf_members(btf_type) + idx; + + /* mark member */ + btfgen_mark_member(info, type_id, idx); + + /* mark member's type */ + type_id = btf_member->type; + btf_type = btf__type_by_id(btf, type_id); + err = btfgen_mark_type(info, type_id, false); + if (err) + return err; + break; + case BTF_KIND_ARRAY: + array = btf_array(btf_type); + type_id = array->type; + btf_type = btf__type_by_id(btf, type_id); + break; + default: + p_err("unsupported kind: %s (%d)", + btf_kind_str(btf_type), btf_type->type); + return -EINVAL; + } + } + + return 0; +} + +static int btfgen_record_type_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec) +{ + return btfgen_mark_type(info, targ_spec->root_type_id, true); +} + +static int btfgen_record_enumval_relo(struct btfgen_info *info, struct bpf_core_spec *targ_spec) +{ + return btfgen_mark_type(info, targ_spec->root_type_id, false); +} + +static int btfgen_record_reloc(struct btfgen_info *info, struct bpf_core_spec *res) +{ + switch (res->relo_kind) { + case BPF_CORE_FIELD_BYTE_OFFSET: + case BPF_CORE_FIELD_BYTE_SIZE: + case BPF_CORE_FIELD_EXISTS: + case BPF_CORE_FIELD_SIGNED: + case BPF_CORE_FIELD_LSHIFT_U64: + case BPF_CORE_FIELD_RSHIFT_U64: + return btfgen_record_field_relo(info, res); + case BPF_CORE_TYPE_ID_LOCAL: /* BPF_CORE_TYPE_ID_LOCAL doesn't require kernel BTF */ + return 0; + case BPF_CORE_TYPE_ID_TARGET: + case BPF_CORE_TYPE_EXISTS: + case BPF_CORE_TYPE_SIZE: + return btfgen_record_type_relo(info, res); + case BPF_CORE_ENUMVAL_EXISTS: + case BPF_CORE_ENUMVAL_VALUE: + return btfgen_record_enumval_relo(info, res); + default: + return -EINVAL; + } +} + +static struct bpf_core_cand_list * +btfgen_find_cands(const struct btf *local_btf, const struct btf *targ_btf, __u32 local_id) +{ + const struct btf_type *local_type; + struct bpf_core_cand_list *cands = NULL; + struct bpf_core_cand local_cand = {}; + size_t local_essent_len; + const char *local_name; + int err; + + local_cand.btf = local_btf; + local_cand.id = local_id; + + local_type = btf__type_by_id(local_btf, local_id); + if (!local_type) { + err = -EINVAL; + goto err_out; + } + + local_name = btf__name_by_offset(local_btf, local_type->name_off); + if (!local_name) { + err = -EINVAL; + goto err_out; + } + local_essent_len = bpf_core_essential_name_len(local_name); + + cands = calloc(1, sizeof(*cands)); + if (!cands) + return NULL; + + err = bpf_core_add_cands(&local_cand, local_essent_len, targ_btf, "vmlinux", 1, cands); + if (err) + goto err_out; + + return cands; + +err_out: + bpf_core_free_cands(cands); + errno = -err; + return NULL; +} + +/* Record relocation information for a single BPF object */ +static int btfgen_record_obj(struct btfgen_info *info, const char *obj_path) +{ + const struct btf_ext_info_sec *sec; + const struct bpf_core_relo *relo; + const struct btf_ext_info *seg; + struct hashmap_entry *entry; + struct hashmap *cand_cache = NULL; + struct btf_ext *btf_ext = NULL; + unsigned int relo_idx; + struct btf *btf = NULL; + size_t i; + int err; + + btf = btf__parse(obj_path, &btf_ext); + if (!btf) { + err = -errno; + p_err("failed to parse BPF object '%s': %s", obj_path, strerror(errno)); + return err; + } + + if (!btf_ext) { + p_err("failed to parse BPF object '%s': section %s not found", + obj_path, BTF_EXT_ELF_SEC); + err = -EINVAL; + goto out; + } + + if (btf_ext->core_relo_info.len == 0) { + err = 0; + goto out; + } + + cand_cache = hashmap__new(btfgen_hash_fn, btfgen_equal_fn, NULL); + if (IS_ERR(cand_cache)) { + err = PTR_ERR(cand_cache); + goto out; + } + + seg = &btf_ext->core_relo_info; + for_each_btf_ext_sec(seg, sec) { + for_each_btf_ext_rec(seg, sec, relo_idx, relo) { + struct bpf_core_spec specs_scratch[3] = {}; + struct bpf_core_relo_res targ_res = {}; + struct bpf_core_cand_list *cands = NULL; + const void *type_key = u32_as_hash_key(relo->type_id); + const char *sec_name = btf__name_by_offset(btf, sec->sec_name_off); + + if (relo->kind != BPF_CORE_TYPE_ID_LOCAL && + !hashmap__find(cand_cache, type_key, (void **)&cands)) { + cands = btfgen_find_cands(btf, info->src_btf, relo->type_id); + if (!cands) { + err = -errno; + goto out; + } + + err = hashmap__set(cand_cache, type_key, cands, NULL, NULL); + if (err) + goto out; + } + + err = bpf_core_calc_relo_insn(sec_name, relo, relo_idx, btf, cands, + specs_scratch, &targ_res); + if (err) + goto out; + + /* specs_scratch[2] is the target spec */ + err = btfgen_record_reloc(info, &specs_scratch[2]); + if (err) + goto out; + } + } + +out: + btf__free(btf); + btf_ext__free(btf_ext); + + if (!IS_ERR_OR_NULL(cand_cache)) { + hashmap__for_each_entry(cand_cache, entry, i) { + bpf_core_free_cands(entry->value); + } + hashmap__free(cand_cache); + } + + return err; +} + +/* Generate BTF from relocation information previously recorded */ +static struct btf *btfgen_get_btf(struct btfgen_info *info) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +/* Create minimized BTF file for a set of BPF objects. + * + * The BTFGen algorithm is divided in two main parts: (1) collect the + * BTF types that are involved in relocations and (2) generate the BTF + * object using the collected types. + * + * In order to collect the types involved in the relocations, we parse + * the BTF and BTF.ext sections of the BPF objects and use + * bpf_core_calc_relo_insn() to get the target specification, this + * indicates how the types and fields are used in a relocation. + * + * Types are recorded in different ways according to the kind of the + * relocation. For field-based relocations only the members that are + * actually used are saved in order to reduce the size of the generated + * BTF file. For type-based relocations empty struct / unions are + * generated and for enum-based relocations the whole type is saved. + * + * The second part of the algorithm generates the BTF object. It creates + * an empty BTF object and fills it with the types recorded in the + * previous step. This function takes care of only adding the structure + * and union members that were marked as used and it also fixes up the + * type IDs on the generated BTF object. + */ static int minimize_btf(const char *src_btf, const char *dst_btf, const char *objspaths[]) { - return -EOPNOTSUPP; + struct btfgen_info *info; + struct btf *btf_new = NULL; + int err, i; + + info = btfgen_new_info(src_btf); + if (!info) { + err = -errno; + p_err("failed to allocate info structure: %s", strerror(errno)); + goto out; + } + + for (i = 0; objspaths[i] != NULL; i++) { + err = btfgen_record_obj(info, objspaths[i]); + if (err) { + p_err("error recording relocations for %s: %s", objspaths[i], + strerror(errno)); + goto out; + } + } + + btf_new = btfgen_get_btf(info); + if (!btf_new) { + err = -errno; + p_err("error generating BTF: %s", strerror(errno)); + goto out; + } + + err = btf_save_raw(btf_new, dst_btf); + if (err) { + p_err("error saving btf file: %s", strerror(errno)); + goto out; + } + +out: + btf__free(btf_new); + btfgen_free_info(info); + + return err; } static int do_min_core_btf(int argc, char **argv) From dc695516b6f5cb322b95de7ef3a6ec1db707ff8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mauricio=20V=C3=A1squez?= Date: Tue, 15 Feb 2022 17:58:54 -0500 Subject: [PATCH 21/29] bpftool: Implement btfgen_get_btf() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last part of the BTFGen algorithm is to create a new BTF object with all the types that were recorded in the previous steps. This function performs two different steps: 1. Add the types to the new BTF object by using btf__add_type(). Some special logic around struct and unions is implemented to only add the members that are really used in the field-based relocations. The type ID on the new and old BTF objects is stored on a map. 2. Fix all the type IDs on the new BTF object by using the IDs saved in the previous step. Signed-off-by: Mauricio Vásquez Signed-off-by: Rafael David Tinoco Signed-off-by: Lorenzo Fontana Signed-off-by: Leonardo Di Donato Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220215225856.671072-6-mauricio@kinvolk.io --- tools/bpf/bpftool/gen.c | 100 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 99 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index 806001020841d..e461059a72ee0 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -1505,10 +1505,108 @@ static int btfgen_record_obj(struct btfgen_info *info, const char *obj_path) return err; } +static int btfgen_remap_id(__u32 *type_id, void *ctx) +{ + unsigned int *ids = ctx; + + *type_id = ids[*type_id]; + + return 0; +} + /* Generate BTF from relocation information previously recorded */ static struct btf *btfgen_get_btf(struct btfgen_info *info) { - return ERR_PTR(-EOPNOTSUPP); + struct btf *btf_new = NULL; + unsigned int *ids = NULL; + unsigned int i, n = btf__type_cnt(info->marked_btf); + int err = 0; + + btf_new = btf__new_empty(); + if (!btf_new) { + err = -errno; + goto err_out; + } + + ids = calloc(n, sizeof(*ids)); + if (!ids) { + err = -errno; + goto err_out; + } + + /* first pass: add all marked types to btf_new and add their new ids to the ids map */ + for (i = 1; i < n; i++) { + const struct btf_type *cloned_type, *type; + const char *name; + int new_id; + + cloned_type = btf__type_by_id(info->marked_btf, i); + + if (cloned_type->name_off != MARKED) + continue; + + type = btf__type_by_id(info->src_btf, i); + + /* add members for struct and union */ + if (btf_is_composite(type)) { + struct btf_member *cloned_m, *m; + unsigned short vlen; + int idx_src; + + name = btf__str_by_offset(info->src_btf, type->name_off); + + if (btf_is_struct(type)) + err = btf__add_struct(btf_new, name, type->size); + else + err = btf__add_union(btf_new, name, type->size); + + if (err < 0) + goto err_out; + new_id = err; + + cloned_m = btf_members(cloned_type); + m = btf_members(type); + vlen = btf_vlen(cloned_type); + for (idx_src = 0; idx_src < vlen; idx_src++, cloned_m++, m++) { + /* add only members that are marked as used */ + if (cloned_m->name_off != MARKED) + continue; + + name = btf__str_by_offset(info->src_btf, m->name_off); + err = btf__add_field(btf_new, name, m->type, + btf_member_bit_offset(cloned_type, idx_src), + btf_member_bitfield_size(cloned_type, idx_src)); + if (err < 0) + goto err_out; + } + } else { + err = btf__add_type(btf_new, info->src_btf, type); + if (err < 0) + goto err_out; + new_id = err; + } + + /* add ID mapping */ + ids[i] = new_id; + } + + /* second pass: fix up type ids */ + for (i = 1; i < btf__type_cnt(btf_new); i++) { + struct btf_type *btf_type = (struct btf_type *) btf__type_by_id(btf_new, i); + + err = btf_type_visit_type_ids(btf_type, btfgen_remap_id, ids); + if (err) + goto err_out; + } + + free(ids); + return btf_new; + +err_out: + btf__free(btf_new); + free(ids); + errno = -err; + return NULL; } /* Create minimized BTF file for a set of BPF objects. From 1d1ffbf7f0b261fd5dcac2cd40a92b9394df08f6 Mon Sep 17 00:00:00 2001 From: Rafael David Tinoco Date: Tue, 15 Feb 2022 17:58:55 -0500 Subject: [PATCH 22/29] bpftool: Gen min_core_btf explanation and examples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add "min_core_btf" feature explanation and one example of how to use it to bpftool-gen man page. Signed-off-by: Mauricio Vásquez Signed-off-by: Rafael David Tinoco Signed-off-by: Lorenzo Fontana Signed-off-by: Leonardo Di Donato Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220215225856.671072-7-mauricio@kinvolk.io --- .../bpf/bpftool/Documentation/bpftool-gen.rst | 90 +++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst index bc276388f4322..18d646b571ec7 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -25,6 +25,7 @@ GEN COMMANDS | **bpftool** **gen object** *OUTPUT_FILE* *INPUT_FILE* [*INPUT_FILE*...] | **bpftool** **gen skeleton** *FILE* [**name** *OBJECT_NAME*] +| **bpftool** **gen min_core_btf** *INPUT* *OUTPUT* *OBJECT* [*OBJECT*...] | **bpftool** **gen help** DESCRIPTION @@ -149,6 +150,26 @@ DESCRIPTION (non-read-only) data from userspace, with same simplicity as for BPF side. + **bpftool** **gen min_core_btf** *INPUT* *OUTPUT* *OBJECT* [*OBJECT*...] + Generate a minimum BTF file as *OUTPUT*, derived from a given + *INPUT* BTF file, containing all needed BTF types so one, or + more, given eBPF objects CO-RE relocations may be satisfied. + + When kernels aren't compiled with CONFIG_DEBUG_INFO_BTF, + libbpf, when loading an eBPF object, has to rely on external + BTF files to be able to calculate CO-RE relocations. + + Usually, an external BTF file is built from existing kernel + DWARF data using pahole. It contains all the types used by + its respective kernel image and, because of that, is big. + + The min_core_btf feature builds smaller BTF files, customized + to one or multiple eBPF objects, so they can be distributed + together with an eBPF CO-RE based application, turning the + application portable to different kernel versions. + + Check examples bellow for more information how to use it. + **bpftool gen help** Print short help message. @@ -215,7 +236,9 @@ This is example BPF application with two BPF programs and a mix of BPF maps and global variables. Source code is split across two source code files. **$ clang -target bpf -g example1.bpf.c -o example1.bpf.o** + **$ clang -target bpf -g example2.bpf.c -o example2.bpf.o** + **$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o** This set of commands compiles *example1.bpf.c* and *example2.bpf.c* @@ -329,3 +352,70 @@ BPF ELF object file *example.bpf.o*. my_static_var: 7 This is a stripped-out version of skeleton generated for above example code. + +min_core_btf +------------ + +**$ bpftool btf dump file 5.4.0-example.btf format raw** + +:: + + [1] INT 'long unsigned int' size=8 bits_offset=0 nr_bits=64 encoding=(none) + [2] CONST '(anon)' type_id=1 + [3] VOLATILE '(anon)' type_id=1 + [4] ARRAY '(anon)' type_id=1 index_type_id=21 nr_elems=2 + [5] PTR '(anon)' type_id=8 + [6] CONST '(anon)' type_id=5 + [7] INT 'char' size=1 bits_offset=0 nr_bits=8 encoding=(none) + [8] CONST '(anon)' type_id=7 + [9] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none) + + +**$ bpftool btf dump file one.bpf.o format raw** + +:: + + [1] PTR '(anon)' type_id=2 + [2] STRUCT 'trace_event_raw_sys_enter' size=64 vlen=4 + 'ent' type_id=3 bits_offset=0 + 'id' type_id=7 bits_offset=64 + 'args' type_id=9 bits_offset=128 + '__data' type_id=12 bits_offset=512 + [3] STRUCT 'trace_entry' size=8 vlen=4 + 'type' type_id=4 bits_offset=0 + 'flags' type_id=5 bits_offset=16 + 'preempt_count' type_id=5 bits_offset=24 + + +**$ bpftool gen min_core_btf 5.4.0-example.btf 5.4.0-smaller.btf one.bpf.o** + +**$ bpftool btf dump file 5.4.0-smaller.btf format raw** + +:: + + [1] TYPEDEF 'pid_t' type_id=6 + [2] STRUCT 'trace_event_raw_sys_enter' size=64 vlen=1 + 'args' type_id=4 bits_offset=128 + [3] STRUCT 'task_struct' size=9216 vlen=2 + 'pid' type_id=1 bits_offset=17920 + 'real_parent' type_id=7 bits_offset=18048 + [4] ARRAY '(anon)' type_id=5 index_type_id=8 nr_elems=6 + [5] INT 'long unsigned int' size=8 bits_offset=0 nr_bits=64 encoding=(none) + [6] TYPEDEF '__kernel_pid_t' type_id=8 + [7] PTR '(anon)' type_id=3 + [8] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED + + +Now, the "5.4.0-smaller.btf" file may be used by libbpf as an external BTF file +when loading the "one.bpf.o" object into the "5.4.0-example" kernel. Note that +the generated BTF file won't allow other eBPF objects to be loaded, just the +ones given to min_core_btf. + +:: + + LIBBPF_OPTS(bpf_object_open_opts, opts, .btf_custom_path = "5.4.0-smaller.btf"); + struct bpf_object *obj; + + obj = bpf_object__open_file("one.bpf.o", &opts); + + ... From 704c91e59fe045708aa3f11d0c2bf9c83e39d24c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mauricio=20V=C3=A1squez?= Date: Tue, 15 Feb 2022 17:58:56 -0500 Subject: [PATCH 23/29] selftests/bpf: Test "bpftool gen min_core_btf" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit reuses the core_reloc test to check if the BTF files generated with "bpftool gen min_core_btf" are correct. This introduces test_core_btfgen() that runs all the core_reloc tests, but this time the source BTF files are generated by using "bpftool gen min_core_btf". The goal of this test is to check that the generated files are usable, and not to check if the algorithm is creating an optimized BTF file. Signed-off-by: Mauricio Vásquez Signed-off-by: Rafael David Tinoco Signed-off-by: Lorenzo Fontana Signed-off-by: Leonardo Di Donato Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220215225856.671072-8-mauricio@kinvolk.io --- .../selftests/bpf/prog_tests/core_reloc.c | 50 ++++++++++++++++++- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 68e4c8dafa001..baf53c23c08d8 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -2,6 +2,7 @@ #include #include "progs/core_reloc_types.h" #include "bpf_testmod/bpf_testmod.h" +#include #include #include #include @@ -836,13 +837,27 @@ static size_t roundup_page(size_t sz) return (sz + page_size - 1) / page_size * page_size; } -void test_core_reloc(void) +static int run_btfgen(const char *src_btf, const char *dst_btf, const char *objpath) +{ + char command[4096]; + int n; + + n = snprintf(command, sizeof(command), + "./tools/build/bpftool/bpftool gen min_core_btf %s %s %s", + src_btf, dst_btf, objpath); + if (n < 0 || n >= sizeof(command)) + return -1; + + return system(command); +} + +static void run_core_reloc_tests(bool use_btfgen) { const size_t mmap_sz = roundup_page(sizeof(struct data)); DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts); struct core_reloc_test_case *test_case; const char *tp_name, *probe_name; - int err, i, equal; + int err, i, equal, fd; struct bpf_link *link = NULL; struct bpf_map *data_map; struct bpf_program *prog; @@ -854,6 +869,7 @@ void test_core_reloc(void) my_pid_tgid = getpid() | ((uint64_t)syscall(SYS_gettid) << 32); for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + char btf_file[] = "/tmp/core_reloc.btf.XXXXXX"; test_case = &test_cases[i]; if (!test__start_subtest(test_case->case_name)) continue; @@ -863,6 +879,25 @@ void test_core_reloc(void) continue; } + /* generate a "minimal" BTF file and use it as source */ + if (use_btfgen) { + if (!test_case->btf_src_file || test_case->fails) { + test__skip(); + continue; + } + + fd = mkstemp(btf_file); + if (!ASSERT_GE(fd, 0, "btf_tmp")) + goto cleanup; + close(fd); /* we only need the path */ + err = run_btfgen(test_case->btf_src_file, btf_file, + test_case->bpf_obj_file); + if (!ASSERT_OK(err, "run_btfgen")) + goto cleanup; + + test_case->btf_src_file = btf_file; + } + if (test_case->setup) { err = test_case->setup(test_case); if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err)) @@ -954,8 +989,19 @@ void test_core_reloc(void) CHECK_FAIL(munmap(mmap_data, mmap_sz)); mmap_data = NULL; } + remove(btf_file); bpf_link__destroy(link); link = NULL; bpf_object__close(obj); } } + +void test_core_reloc(void) +{ + run_core_reloc_tests(false); +} + +void test_core_btfgen(void) +{ + run_core_reloc_tests(true); +} From f76d8507d23834f7e56b0fe95c82605e7d7e0efe Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 16 Feb 2022 10:21:02 +0100 Subject: [PATCH 24/29] bpftool: Fix pretty print dump for maps without BTF loaded The commit e5043894b21f ("bpftool: Use libbpf_get_error() to check error") fails to dump map without BTF loaded in pretty mode (-p option). Fixing this by making sure get_map_kv_btf won't fail in case there's no BTF available for the map. Fixes: e5043894b21f ("bpftool: Use libbpf_get_error() to check error") Suggested-by: Andrii Nakryiko Signed-off-by: Jiri Olsa Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220216092102.125448-1-jolsa@kernel.org --- tools/bpf/bpftool/map.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 7a341a472ea41..e746642de2928 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -805,29 +805,30 @@ static int maps_have_btf(int *fds, int nb_fds) static struct btf *btf_vmlinux; -static struct btf *get_map_kv_btf(const struct bpf_map_info *info) +static int get_map_kv_btf(const struct bpf_map_info *info, struct btf **btf) { - struct btf *btf = NULL; + int err = 0; if (info->btf_vmlinux_value_type_id) { if (!btf_vmlinux) { btf_vmlinux = libbpf_find_kernel_btf(); - if (libbpf_get_error(btf_vmlinux)) + err = libbpf_get_error(btf_vmlinux); + if (err) { p_err("failed to get kernel btf"); + return err; + } } - return btf_vmlinux; + *btf = btf_vmlinux; } else if (info->btf_value_type_id) { - int err; - - btf = btf__load_from_kernel_by_id(info->btf_id); - err = libbpf_get_error(btf); - if (err) { + *btf = btf__load_from_kernel_by_id(info->btf_id); + err = libbpf_get_error(*btf); + if (err) p_err("failed to get btf"); - btf = ERR_PTR(err); - } + } else { + *btf = NULL; } - return btf; + return err; } static void free_map_kv_btf(struct btf *btf) @@ -862,8 +863,7 @@ map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, prev_key = NULL; if (wtr) { - btf = get_map_kv_btf(info); - err = libbpf_get_error(btf); + err = get_map_kv_btf(info, &btf); if (err) { goto exit_free; } @@ -1054,8 +1054,7 @@ static void print_key_value(struct bpf_map_info *info, void *key, json_writer_t *btf_wtr; struct btf *btf; - btf = get_map_kv_btf(info); - if (libbpf_get_error(btf)) + if (get_map_kv_btf(info, &btf)) return; if (json_output) { From 9b6eb0478dfad3b0e7af6c73523d96826210f4fe Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 16 Feb 2022 15:35:40 -0800 Subject: [PATCH 25/29] bpftool: Fix C++ additions to skeleton Mark C++-specific T::open() and other methods as static inline to avoid symbol redefinition when multiple files use the same skeleton header in an application. Fixes: bb8ffe61ea45 ("bpftool: Add C++-specific open/load/etc skeleton wrappers") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220216233540.216642-1-andrii@kernel.org --- tools/bpf/bpftool/gen.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c index e461059a72ee0..f8c1413523a3e 100644 --- a/tools/bpf/bpftool/gen.c +++ b/tools/bpf/bpftool/gen.c @@ -834,13 +834,13 @@ static int do_skeleton(int argc, char **argv) \n\ \n\ #ifdef __cplusplus \n\ - static struct %1$s *open(const struct bpf_object_open_opts *opts = nullptr);\n\ - static struct %1$s *open_and_load(); \n\ - static int load(struct %1$s *skel); \n\ - static int attach(struct %1$s *skel); \n\ - static void detach(struct %1$s *skel); \n\ - static void destroy(struct %1$s *skel); \n\ - static const void *elf_bytes(size_t *sz); \n\ + static inline struct %1$s *open(const struct bpf_object_open_opts *opts = nullptr);\n\ + static inline struct %1$s *open_and_load(); \n\ + static inline int load(struct %1$s *skel); \n\ + static inline int attach(struct %1$s *skel); \n\ + static inline void detach(struct %1$s *skel); \n\ + static inline void destroy(struct %1$s *skel); \n\ + static inline const void *elf_bytes(size_t *sz); \n\ #endif /* __cplusplus */ \n\ }; \n\ \n\ From 1b8c924a05934d2e758ec7da7bd217ef8ebd80ce Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 16 Feb 2022 23:39:58 -0800 Subject: [PATCH 26/29] libbpf: Fix memleak in libbpf_netlink_recv() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure that libbpf_netlink_recv() frees dynamically allocated buffer in all code paths. Fixes: 9c3de619e13e ("libbpf: Use dynamically allocated buffer when receiving netlink messages") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20220217073958.276959-1-andrii@kernel.org --- tools/lib/bpf/netlink.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index a598061f6fea5..cbc8967d54021 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -176,7 +176,8 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, libbpf_nla_dump_errormsg(nh); goto done; case NLMSG_DONE: - return 0; + ret = 0; + goto done; default: break; } @@ -188,9 +189,10 @@ static int libbpf_netlink_recv(int sock, __u32 nl_pid, int seq, case NL_NEXT: goto start; case NL_DONE: - return 0; + ret = 0; + goto done; default: - return ret; + goto done; } } } From b38101c57acf9543ed7c4b0f43fd65ea27240772 Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Thu, 17 Feb 2022 07:52:12 -0800 Subject: [PATCH 27/29] selftests/bpf: Fix vmtest.sh to launch smp vm. Fix typo in vmtest.sh to make sure it launch proper vm with 8 cpus. Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220217155212.2309672-1-fallentree@fb.com --- tools/testing/selftests/bpf/vmtest.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index b3afd43549fae..e0bb04a97e107 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -241,7 +241,7 @@ EOF -nodefaults \ -display none \ -serial mon:stdio \ - "${qemu_flags[@]}" \ + "${QEMU_FLAGS[@]}" \ -enable-kvm \ -m 4G \ -drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \ From b75dacaac4650478ed5a9d33975b91b99016daff Mon Sep 17 00:00:00 2001 From: Yucong Sun Date: Thu, 17 Feb 2022 10:02:10 -0800 Subject: [PATCH 28/29] selftests/bpf: Fix crash in core_reloc when bpftool btfgen fails Avoid unnecessary goto cleanup, as there is nothing to clean up. Signed-off-by: Yucong Sun Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20220217180210.2981502-1-fallentree@fb.com --- tools/testing/selftests/bpf/prog_tests/core_reloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index baf53c23c08d8..8fbb40a832d57 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -888,12 +888,12 @@ static void run_core_reloc_tests(bool use_btfgen) fd = mkstemp(btf_file); if (!ASSERT_GE(fd, 0, "btf_tmp")) - goto cleanup; + continue; close(fd); /* we only need the path */ err = run_btfgen(test_case->btf_src_file, btf_file, test_case->bpf_obj_file); if (!ASSERT_OK(err, "run_btfgen")) - goto cleanup; + continue; test_case->btf_src_file = btf_file; } From d24d2a2b0a81dd5e9bb99aeb4559ec9734e1416f Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 17 Feb 2022 10:30:01 -0800 Subject: [PATCH 29/29] bpf: bpf_prog_pack: Set proper size before freeing ro_header bpf_prog_pack_free() uses header->size to decide whether the header should be freed with module_memfree() or the bpf_prog_pack logic. However, in kvmalloc() failure path of bpf_jit_binary_pack_alloc(), header->size is not set yet. As a result, bpf_prog_pack_free() may treat a slice of a pack as a standalone kvmalloc'd header and call module_memfree() on the whole pack. This in turn causes use-after-free by other users of the pack. Fix this by setting ro_header->size before freeing ro_header. Fixes: 33c9805860e5 ("bpf: Introduce bpf_jit_binary_pack_[alloc|finalize|free]") Reported-by: syzbot+2f649ec6d2eea1495a8f@syzkaller.appspotmail.com Reported-by: syzbot+ecb1e7e51c52f68f7481@syzkaller.appspotmail.com Reported-by: syzbot+87f65c75f4a72db05445@syzkaller.appspotmail.com Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20220217183001.1876034-1-song@kernel.org --- kernel/bpf/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 44623c9b5bb18..ebb0193d07f07 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1069,6 +1069,7 @@ bpf_jit_binary_pack_alloc(unsigned int proglen, u8 **image_ptr, *rw_header = kvmalloc(size, GFP_KERNEL); if (!*rw_header) { + bpf_arch_text_copy(&ro_header->size, &size, sizeof(size)); bpf_prog_pack_free(ro_header); bpf_jit_uncharge_modmem(size); return NULL;